Address comments for PR #9988 and #9996 (#10020)

Summary:
1. The latest change of DecideRateLimiterPriority in https://github.com/facebook/rocksdb/pull/9988 is reverted.
2. For https://github.com/facebook/rocksdb/blob/main/db/builder.cc#L345-L349
  2.1. Remove `we will regrad this verification as user reads` from the comments.
  2.2. `Do not set` the read_options.rate_limiter_priority to Env::IO_USER . Flush should be a background job.
  2.3. Update db_rate_limiter_test.cc.
3. In IOOptions, mark `prio` as deprecated for future removal.
4. In `file_system.h`, mark `IOPriority` as deprecated for future removal.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10020

Test Plan: Unit tests.

Reviewed By: ajkr

Differential Revision: D36525317

Pulled By: gitbw95

fbshipit-source-id: 011ba421822f8a124e6d25a2661c4e242df6ad36
main
Bo Wang 2 years ago committed by Facebook GitHub Bot
parent 280b9f371a
commit 5be1579ead
  1. 2
      HISTORY.md
  2. 6
      db/builder.cc
  3. 29
      db/db_rate_limiter_test.cc
  4. 50
      file/writable_file_writer.cc
  5. 5
      file/writable_file_writer.h
  6. 2
      include/rocksdb/file_system.h

@ -23,6 +23,8 @@
* Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate. * Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate.
* Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and `BlockBasedTableOptions::reserve_table_reader_memory`. * Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and `BlockBasedTableOptions::reserve_table_reader_memory`.
* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use. * Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use.
* In IOOptions, mark `prio` as deprecated for future removal.
* In `file_system.h`, mark `IOPriority` as deprecated for future removal.
### Bug Fixes ### Bug Fixes
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.

@ -341,12 +341,10 @@ Status BuildTable(
if (s.ok() && !empty) { if (s.ok() && !empty) {
// Verify that the table is usable // Verify that the table is usable
// We set for_compaction to false and don't OptimizeForCompactionTableRead // We set for_compaction to false and don't OptimizeForCompactionTableRead
// here because this is a special case after we finish the table building // here because this is a special case after we finish the table building.
// No matter whether use_direct_io_for_flush_and_compaction is true, // No matter whether use_direct_io_for_flush_and_compaction is true,
// we will regrad this verification as user reads since the goal is // the goal is to cache it here for further user reads.
// to cache it here for further user reads
ReadOptions read_options; ReadOptions read_options;
read_options.rate_limiter_priority = Env::IO_USER;
std::unique_ptr<InternalIterator> it(table_cache->NewIterator( std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
read_options, file_options, tboptions.internal_comparator, *meta, read_options, file_options, tboptions.internal_comparator, *meta,
nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor, nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor,

@ -116,8 +116,9 @@ TEST_P(DBRateLimiterOnReadTest, Get) {
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`. ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
int expected = 0;
for (int i = 0; i < kNumFiles; ++i) { for (int i = 0; i < kNumFiles; ++i) {
{ {
std::string value; std::string value;
@ -145,8 +146,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`. ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
const int kNumKeys = kNumFiles * kNumKeysPerFile; const int kNumKeys = kNumFiles * kNumKeysPerFile;
{ {
@ -166,7 +166,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
ASSERT_TRUE(statuses[i].IsNotSupported()); ASSERT_TRUE(statuses[i].IsNotSupported());
} }
} }
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
} }
TEST_P(DBRateLimiterOnReadTest, OldMultiGet) { TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
@ -177,10 +177,10 @@ TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`. ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
const int kNumKeys = kNumFiles * kNumKeysPerFile; const int kNumKeys = kNumFiles * kNumKeysPerFile;
int expected = 0;
{ {
std::vector<std::string> key_bufs; std::vector<std::string> key_bufs;
key_bufs.reserve(kNumKeys); key_bufs.reserve(kNumKeys);
@ -207,10 +207,10 @@ TEST_P(DBRateLimiterOnReadTest, Iterator) {
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
std::unique_ptr<Iterator> iter(db_->NewIterator(GetReadOptions())); std::unique_ptr<Iterator> iter(db_->NewIterator(GetReadOptions()));
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int expected = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
++expected; ++expected;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
@ -236,12 +236,12 @@ TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) {
return; return;
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_OK(db_->VerifyChecksum(GetReadOptions())); ASSERT_OK(db_->VerifyChecksum(GetReadOptions()));
// The files are tiny so there should have just been one read per file. // The files are tiny so there should have just been one read per file.
expected += kNumFiles; int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
} }
@ -251,12 +251,11 @@ TEST_P(DBRateLimiterOnReadTest, VerifyFileChecksums) {
} }
Init(); Init();
// In Init(), compaction may request tokens for `Env::IO_USER`. ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions())); ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions()));
// The files are tiny so there should have just been one read per file. // The files are tiny so there should have just been one read per file.
expected += kNumFiles; int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
} }

@ -55,9 +55,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
{ {
IOOptions io_options; IOOptions io_options;
WritableFileWriter::DecideRateLimiterPriority( io_options.rate_limiter_priority =
writable_file_->GetIOPriority(), op_rate_limiter_priority, WritableFileWriter::DecideRateLimiterPriority(
io_options.rate_limiter_priority); writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOSTATS_TIMER_GUARD(prepare_write_nanos); IOSTATS_TIMER_GUARD(prepare_write_nanos);
TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite"); TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left, writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
@ -338,9 +338,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
} }
#endif #endif
IOOptions io_options; IOOptions io_options;
WritableFileWriter::DecideRateLimiterPriority( io_options.rate_limiter_priority =
writable_file_->GetIOPriority(), op_rate_limiter_priority, WritableFileWriter::DecideRateLimiterPriority(
io_options.rate_limiter_priority); writable_file_->GetIOPriority(), op_rate_limiter_priority);
s = writable_file_->Flush(io_options, nullptr); s = writable_file_->Flush(io_options, nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
@ -507,11 +507,11 @@ IOStatus WritableFileWriter::WriteBuffered(
size_t left = size; size_t left = size;
DataVerificationInfo v_info; DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)]; char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used = Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority( WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority, writable_file_->GetIOPriority(), op_rate_limiter_priority);
io_options.rate_limiter_priority); IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
while (left > 0) { while (left > 0) {
size_t allowed = left; size_t allowed = left;
@ -596,11 +596,11 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
size_t left = size; size_t left = size;
DataVerificationInfo v_info; DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)]; char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used = Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority( WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority, writable_file_->GetIOPriority(), op_rate_limiter_priority);
io_options.rate_limiter_priority); IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
// Check how much is allowed. Here, we loop until the rate limiter allows to // Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer. // write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate // TODO: need to be improved since it sort of defeats the purpose of the rate
@ -726,11 +726,11 @@ IOStatus WritableFileWriter::WriteDirect(
size_t left = buf_.CurrentSize(); size_t left = buf_.CurrentSize();
DataVerificationInfo v_info; DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)]; char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used = Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority( WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority, writable_file_->GetIOPriority(), op_rate_limiter_priority);
io_options.rate_limiter_priority); IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
while (left > 0) { while (left > 0) {
// Check how much is allowed // Check how much is allowed
@ -827,11 +827,11 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
DataVerificationInfo v_info; DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)]; char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used = Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority( WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority, writable_file_->GetIOPriority(), op_rate_limiter_priority);
io_options.rate_limiter_priority); IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
// Check how much is allowed. Here, we loop until the rate limiter allows to // Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer. // write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate // TODO: need to be improved since it sort of defeats the purpose of the rate
@ -901,21 +901,17 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE
Env::IOPriority WritableFileWriter::DecideRateLimiterPriority( Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
Env::IOPriority writable_file_io_priority, Env::IOPriority writable_file_io_priority,
Env::IOPriority op_rate_limiter_priority, Env::IOPriority op_rate_limiter_priority) {
Env::IOPriority& iooptions_io_priority) {
Env::IOPriority rate_limiter_priority{Env::IO_TOTAL};
if (writable_file_io_priority == Env::IO_TOTAL && if (writable_file_io_priority == Env::IO_TOTAL &&
op_rate_limiter_priority == Env::IO_TOTAL) { op_rate_limiter_priority == Env::IO_TOTAL) {
rate_limiter_priority = Env::IO_TOTAL; return Env::IO_TOTAL;
} else if (writable_file_io_priority == Env::IO_TOTAL) { } else if (writable_file_io_priority == Env::IO_TOTAL) {
rate_limiter_priority = op_rate_limiter_priority; return op_rate_limiter_priority;
} else if (op_rate_limiter_priority == Env::IO_TOTAL) { } else if (op_rate_limiter_priority == Env::IO_TOTAL) {
rate_limiter_priority = writable_file_io_priority; return writable_file_io_priority;
} else { } else {
rate_limiter_priority = op_rate_limiter_priority; return op_rate_limiter_priority;
} }
iooptions_io_priority = rate_limiter_priority;
return rate_limiter_priority;
} }
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -277,11 +277,10 @@ class WritableFileWriter {
const char* GetFileChecksumFuncName() const; const char* GetFileChecksumFuncName() const;
private: private:
// Decide the Rate Limiter priority and update io_options.io_priority. // Decide the Rate Limiter priority.
static Env::IOPriority DecideRateLimiterPriority( static Env::IOPriority DecideRateLimiterPriority(
Env::IOPriority writable_file_io_priority, Env::IOPriority writable_file_io_priority,
Env::IOPriority op_rate_limiter_priority, Env::IOPriority op_rate_limiter_priority);
Env::IOPriority& iooptions_io_priority);
// Used when os buffering is OFF and we are writing // Used when os buffering is OFF and we are writing
// DMA such as in Direct I/O mode // DMA such as in Direct I/O mode

@ -53,6 +53,7 @@ struct ConfigOptions;
using AccessPattern = RandomAccessFile::AccessPattern; using AccessPattern = RandomAccessFile::AccessPattern;
using FileAttributes = Env::FileAttributes; using FileAttributes = Env::FileAttributes;
// DEPRECATED
// Priority of an IO request. This is a hint and does not guarantee any // Priority of an IO request. This is a hint and does not guarantee any
// particular QoS. // particular QoS.
// IO_LOW - Typically background reads/writes such as compaction/flush // IO_LOW - Typically background reads/writes such as compaction/flush
@ -86,6 +87,7 @@ struct IOOptions {
// Timeout for the operation in microseconds // Timeout for the operation in microseconds
std::chrono::microseconds timeout; std::chrono::microseconds timeout;
// DEPRECATED
// Priority - high or low // Priority - high or low
IOPriority prio; IOPriority prio;

Loading…
Cancel
Save