Address comments for PR #9988 and #9996 (#10020)

Summary:
1. The latest change of DecideRateLimiterPriority in https://github.com/facebook/rocksdb/pull/9988 is reverted.
2. For https://github.com/facebook/rocksdb/blob/main/db/builder.cc#L345-L349
  2.1. Remove `we will regrad this verification as user reads` from the comments.
  2.2. `Do not set` the read_options.rate_limiter_priority to Env::IO_USER . Flush should be a background job.
  2.3. Update db_rate_limiter_test.cc.
3. In IOOptions, mark `prio` as deprecated for future removal.
4. In `file_system.h`, mark `IOPriority` as deprecated for future removal.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10020

Test Plan: Unit tests.

Reviewed By: ajkr

Differential Revision: D36525317

Pulled By: gitbw95

fbshipit-source-id: 011ba421822f8a124e6d25a2661c4e242df6ad36
main
Bo Wang 2 years ago committed by Facebook GitHub Bot
parent 280b9f371a
commit 5be1579ead
  1. 2
      HISTORY.md
  2. 6
      db/builder.cc
  3. 29
      db/db_rate_limiter_test.cc
  4. 50
      file/writable_file_writer.cc
  5. 5
      file/writable_file_writer.h
  6. 2
      include/rocksdb/file_system.h

@ -23,6 +23,8 @@
* Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate.
* Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and `BlockBasedTableOptions::reserve_table_reader_memory`.
* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use.
* In IOOptions, mark `prio` as deprecated for future removal.
* In `file_system.h`, mark `IOPriority` as deprecated for future removal.
### Bug Fixes
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.

@ -341,12 +341,10 @@ Status BuildTable(
if (s.ok() && !empty) {
// Verify that the table is usable
// We set for_compaction to false and don't OptimizeForCompactionTableRead
// here because this is a special case after we finish the table building
// here because this is a special case after we finish the table building.
// No matter whether use_direct_io_for_flush_and_compaction is true,
// we will regrad this verification as user reads since the goal is
// to cache it here for further user reads
// the goal is to cache it here for further user reads.
ReadOptions read_options;
read_options.rate_limiter_priority = Env::IO_USER;
std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
read_options, file_options, tboptions.internal_comparator, *meta,
nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor,

@ -116,8 +116,9 @@ TEST_P(DBRateLimiterOnReadTest, Get) {
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int expected = 0;
for (int i = 0; i < kNumFiles; ++i) {
{
std::string value;
@ -145,8 +146,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
const int kNumKeys = kNumFiles * kNumKeysPerFile;
{
@ -166,7 +166,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) {
ASSERT_TRUE(statuses[i].IsNotSupported());
}
}
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
@ -177,10 +177,10 @@ TEST_P(DBRateLimiterOnReadTest, OldMultiGet) {
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
const int kNumKeys = kNumFiles * kNumKeysPerFile;
int expected = 0;
{
std::vector<std::string> key_bufs;
key_bufs.reserve(kNumKeys);
@ -207,10 +207,10 @@ TEST_P(DBRateLimiterOnReadTest, Iterator) {
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
std::unique_ptr<Iterator> iter(db_->NewIterator(GetReadOptions()));
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
int expected = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
++expected;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
@ -236,12 +236,12 @@ TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) {
return;
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_OK(db_->VerifyChecksum(GetReadOptions()));
// The files are tiny so there should have just been one read per file.
expected += kNumFiles;
int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}
@ -251,12 +251,11 @@ TEST_P(DBRateLimiterOnReadTest, VerifyFileChecksums) {
}
Init();
// In Init(), compaction may request tokens for `Env::IO_USER`.
int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER);
ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions()));
// The files are tiny so there should have just been one read per file.
expected += kNumFiles;
int expected = kNumFiles;
ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER));
}

@ -55,9 +55,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
{
IOOptions io_options;
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
io_options.rate_limiter_priority =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOSTATS_TIMER_GUARD(prepare_write_nanos);
TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
@ -338,9 +338,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
}
#endif
IOOptions io_options;
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
io_options.rate_limiter_priority =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority);
s = writable_file_->Flush(io_options, nullptr);
#ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) {
@ -507,11 +507,11 @@ IOStatus WritableFileWriter::WriteBuffered(
size_t left = size;
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
while (left > 0) {
size_t allowed = left;
@ -596,11 +596,11 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
size_t left = size;
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
// Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate
@ -726,11 +726,11 @@ IOStatus WritableFileWriter::WriteDirect(
size_t left = buf_.CurrentSize();
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
while (left > 0) {
// Check how much is allowed
@ -827,11 +827,11 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
IOOptions io_options;
Env::IOPriority rate_limiter_priority_used =
WritableFileWriter::DecideRateLimiterPriority(
writable_file_->GetIOPriority(), op_rate_limiter_priority,
io_options.rate_limiter_priority);
writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOOptions io_options;
io_options.rate_limiter_priority = rate_limiter_priority_used;
// Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate
@ -901,21 +901,17 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
#endif // !ROCKSDB_LITE
Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
Env::IOPriority writable_file_io_priority,
Env::IOPriority op_rate_limiter_priority,
Env::IOPriority& iooptions_io_priority) {
Env::IOPriority rate_limiter_priority{Env::IO_TOTAL};
Env::IOPriority op_rate_limiter_priority) {
if (writable_file_io_priority == Env::IO_TOTAL &&
op_rate_limiter_priority == Env::IO_TOTAL) {
rate_limiter_priority = Env::IO_TOTAL;
return Env::IO_TOTAL;
} else if (writable_file_io_priority == Env::IO_TOTAL) {
rate_limiter_priority = op_rate_limiter_priority;
return op_rate_limiter_priority;
} else if (op_rate_limiter_priority == Env::IO_TOTAL) {
rate_limiter_priority = writable_file_io_priority;
return writable_file_io_priority;
} else {
rate_limiter_priority = op_rate_limiter_priority;
return op_rate_limiter_priority;
}
iooptions_io_priority = rate_limiter_priority;
return rate_limiter_priority;
}
} // namespace ROCKSDB_NAMESPACE

@ -277,11 +277,10 @@ class WritableFileWriter {
const char* GetFileChecksumFuncName() const;
private:
// Decide the Rate Limiter priority and update io_options.io_priority.
// Decide the Rate Limiter priority.
static Env::IOPriority DecideRateLimiterPriority(
Env::IOPriority writable_file_io_priority,
Env::IOPriority op_rate_limiter_priority,
Env::IOPriority& iooptions_io_priority);
Env::IOPriority op_rate_limiter_priority);
// Used when os buffering is OFF and we are writing
// DMA such as in Direct I/O mode

@ -53,6 +53,7 @@ struct ConfigOptions;
using AccessPattern = RandomAccessFile::AccessPattern;
using FileAttributes = Env::FileAttributes;
// DEPRECATED
// Priority of an IO request. This is a hint and does not guarantee any
// particular QoS.
// IO_LOW - Typically background reads/writes such as compaction/flush
@ -86,6 +87,7 @@ struct IOOptions {
// Timeout for the operation in microseconds
std::chrono::microseconds timeout;
// DEPRECATED
// Priority - high or low
IOPriority prio;

Loading…
Cancel
Save