diff --git a/HISTORY.md b/HISTORY.md index 99351eaba..bb1a063bf 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -23,6 +23,8 @@ * Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate. * Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and `BlockBasedTableOptions::reserve_table_reader_memory`. * Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use. +* In IOOptions, mark `prio` as deprecated for future removal. +* In `file_system.h`, mark `IOPriority` as deprecated for future removal. ### Bug Fixes * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. diff --git a/db/builder.cc b/db/builder.cc index 312ae6181..30d3bbad8 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -341,12 +341,10 @@ Status BuildTable( if (s.ok() && !empty) { // Verify that the table is usable // We set for_compaction to false and don't OptimizeForCompactionTableRead - // here because this is a special case after we finish the table building + // here because this is a special case after we finish the table building. // No matter whether use_direct_io_for_flush_and_compaction is true, - // we will regrad this verification as user reads since the goal is - // to cache it here for further user reads + // the goal is to cache it here for further user reads. ReadOptions read_options; - read_options.rate_limiter_priority = Env::IO_USER; std::unique_ptr it(table_cache->NewIterator( read_options, file_options, tboptions.internal_comparator, *meta, nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor, diff --git a/db/db_rate_limiter_test.cc b/db/db_rate_limiter_test.cc index 0c2bca224..f30af1974 100644 --- a/db/db_rate_limiter_test.cc +++ b/db/db_rate_limiter_test.cc @@ -116,8 +116,9 @@ TEST_P(DBRateLimiterOnReadTest, Get) { } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); + + int expected = 0; for (int i = 0; i < kNumFiles; ++i) { { std::string value; @@ -145,8 +146,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) { } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); const int kNumKeys = kNumFiles * kNumKeysPerFile; { @@ -166,7 +166,7 @@ TEST_P(DBRateLimiterOnReadTest, NewMultiGet) { ASSERT_TRUE(statuses[i].IsNotSupported()); } } - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); } TEST_P(DBRateLimiterOnReadTest, OldMultiGet) { @@ -177,10 +177,10 @@ TEST_P(DBRateLimiterOnReadTest, OldMultiGet) { } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); const int kNumKeys = kNumFiles * kNumKeysPerFile; + int expected = 0; { std::vector key_bufs; key_bufs.reserve(kNumKeys); @@ -207,10 +207,10 @@ TEST_P(DBRateLimiterOnReadTest, Iterator) { } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); std::unique_ptr iter(db_->NewIterator(GetReadOptions())); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); + int expected = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++expected; ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); @@ -236,12 +236,12 @@ TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) { return; } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); + + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_OK(db_->VerifyChecksum(GetReadOptions())); // The files are tiny so there should have just been one read per file. - expected += kNumFiles; + int expected = kNumFiles; ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); } @@ -251,12 +251,11 @@ TEST_P(DBRateLimiterOnReadTest, VerifyFileChecksums) { } Init(); - // In Init(), compaction may request tokens for `Env::IO_USER`. - int64_t expected = options_.rate_limiter->GetTotalRequests(Env::IO_USER); + ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions())); // The files are tiny so there should have just been one read per file. - expected += kNumFiles; + int expected = kNumFiles; ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); } diff --git a/file/writable_file_writer.cc b/file/writable_file_writer.cc index 87c08fed4..e56742f7f 100644 --- a/file/writable_file_writer.cc +++ b/file/writable_file_writer.cc @@ -55,9 +55,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, { IOOptions io_options; - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + io_options.rate_limiter_priority = + WritableFileWriter::DecideRateLimiterPriority( + writable_file_->GetIOPriority(), op_rate_limiter_priority); IOSTATS_TIMER_GUARD(prepare_write_nanos); TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite"); writable_file_->PrepareWrite(static_cast(GetFileSize()), left, @@ -338,9 +338,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) { } #endif IOOptions io_options; - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + io_options.rate_limiter_priority = + WritableFileWriter::DecideRateLimiterPriority( + writable_file_->GetIOPriority(), op_rate_limiter_priority); s = writable_file_->Flush(io_options, nullptr); #ifndef ROCKSDB_LITE if (ShouldNotifyListeners()) { @@ -507,11 +507,11 @@ IOStatus WritableFileWriter::WriteBuffered( size_t left = size; DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - IOOptions io_options; Env::IOPriority rate_limiter_priority_used = WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + writable_file_->GetIOPriority(), op_rate_limiter_priority); + IOOptions io_options; + io_options.rate_limiter_priority = rate_limiter_priority_used; while (left > 0) { size_t allowed = left; @@ -596,11 +596,11 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum( size_t left = size; DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - IOOptions io_options; Env::IOPriority rate_limiter_priority_used = WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + writable_file_->GetIOPriority(), op_rate_limiter_priority); + IOOptions io_options; + io_options.rate_limiter_priority = rate_limiter_priority_used; // Check how much is allowed. Here, we loop until the rate limiter allows to // write the entire buffer. // TODO: need to be improved since it sort of defeats the purpose of the rate @@ -726,11 +726,11 @@ IOStatus WritableFileWriter::WriteDirect( size_t left = buf_.CurrentSize(); DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - IOOptions io_options; Env::IOPriority rate_limiter_priority_used = WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + writable_file_->GetIOPriority(), op_rate_limiter_priority); + IOOptions io_options; + io_options.rate_limiter_priority = rate_limiter_priority_used; while (left > 0) { // Check how much is allowed @@ -827,11 +827,11 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum( DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - IOOptions io_options; Env::IOPriority rate_limiter_priority_used = WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority, - io_options.rate_limiter_priority); + writable_file_->GetIOPriority(), op_rate_limiter_priority); + IOOptions io_options; + io_options.rate_limiter_priority = rate_limiter_priority_used; // Check how much is allowed. Here, we loop until the rate limiter allows to // write the entire buffer. // TODO: need to be improved since it sort of defeats the purpose of the rate @@ -901,21 +901,17 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum( #endif // !ROCKSDB_LITE Env::IOPriority WritableFileWriter::DecideRateLimiterPriority( Env::IOPriority writable_file_io_priority, - Env::IOPriority op_rate_limiter_priority, - Env::IOPriority& iooptions_io_priority) { - Env::IOPriority rate_limiter_priority{Env::IO_TOTAL}; + Env::IOPriority op_rate_limiter_priority) { if (writable_file_io_priority == Env::IO_TOTAL && op_rate_limiter_priority == Env::IO_TOTAL) { - rate_limiter_priority = Env::IO_TOTAL; + return Env::IO_TOTAL; } else if (writable_file_io_priority == Env::IO_TOTAL) { - rate_limiter_priority = op_rate_limiter_priority; + return op_rate_limiter_priority; } else if (op_rate_limiter_priority == Env::IO_TOTAL) { - rate_limiter_priority = writable_file_io_priority; + return writable_file_io_priority; } else { - rate_limiter_priority = op_rate_limiter_priority; + return op_rate_limiter_priority; } - iooptions_io_priority = rate_limiter_priority; - return rate_limiter_priority; } } // namespace ROCKSDB_NAMESPACE diff --git a/file/writable_file_writer.h b/file/writable_file_writer.h index a1391f976..000159faa 100644 --- a/file/writable_file_writer.h +++ b/file/writable_file_writer.h @@ -277,11 +277,10 @@ class WritableFileWriter { const char* GetFileChecksumFuncName() const; private: - // Decide the Rate Limiter priority and update io_options.io_priority. + // Decide the Rate Limiter priority. static Env::IOPriority DecideRateLimiterPriority( Env::IOPriority writable_file_io_priority, - Env::IOPriority op_rate_limiter_priority, - Env::IOPriority& iooptions_io_priority); + Env::IOPriority op_rate_limiter_priority); // Used when os buffering is OFF and we are writing // DMA such as in Direct I/O mode diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 9eca00aac..27bab5833 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -53,6 +53,7 @@ struct ConfigOptions; using AccessPattern = RandomAccessFile::AccessPattern; using FileAttributes = Env::FileAttributes; +// DEPRECATED // Priority of an IO request. This is a hint and does not guarantee any // particular QoS. // IO_LOW - Typically background reads/writes such as compaction/flush @@ -86,6 +87,7 @@ struct IOOptions { // Timeout for the operation in microseconds std::chrono::microseconds timeout; + // DEPRECATED // Priority - high or low IOPriority prio;