diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index a55484bf0..4bbc36247 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1689,8 +1689,9 @@ Status CompactionJob::OpenCompactionOutputFile( // Pass temperature of botommost files to FileSystem. FileOptions fo_copy = file_options_; + Temperature temperature = Temperature::kUnknown; if (bottommost_level_) { - fo_copy.temperature = + fo_copy.temperature = temperature = sub_compact->compaction->mutable_cf_options()->bottommost_temperature; } @@ -1742,6 +1743,7 @@ Status CompactionJob::OpenCompactionOutputFile( sub_compact->compaction->output_path_id(), 0); meta.oldest_ancester_time = oldest_ancester_time; meta.file_creation_time = current_time; + meta.temperature = temperature; sub_compact->outputs.emplace_back( std::move(meta), cfd->internal_comparator(), /*enable_order_check=*/ diff --git a/db/db_test2.cc b/db/db_test2.cc index 42ec2d103..8a3724307 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -5531,6 +5531,28 @@ TEST_P(RenameCurrentTest, Compaction) { ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ("d_value", Get("d")); } + +TEST_F(DBTest2, BottommostTemperature) { + Options options = CurrentOptions(); + options.bottommost_temperature = Temperature::kWarm; + options.level0_file_num_compaction_trigger = 2; + Reopen(options); + + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Put("bar", "bar")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Put("bar", "bar")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + Reopen(options); + + ColumnFamilyMetaData metadata; + db_->GetColumnFamilyMetaData(&metadata); + ASSERT_EQ(1, metadata.file_count); + ASSERT_EQ(Temperature::kWarm, metadata.levels[1].files[0].temperature); +} #endif // ROCKSDB_LITE // WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery. @@ -5586,7 +5608,6 @@ TEST_F(DBTest2, PointInTimeRecoveryWithSyncFailureInCFCreation) { options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; ReopenWithColumnFamilies({"default", "test1", "test2"}, options); } - } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS diff --git a/db/version_edit.cc b/db/version_edit.cc index d9246367c..8cb173a2d 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -191,6 +191,11 @@ bool VersionEdit::EncodeTo(std::string* dst) const { char p = static_cast(f.fd.GetPathId()); PutLengthPrefixedSlice(dst, Slice(&p, 1)); } + if (f.temperature != Temperature::kUnknown) { + PutVarint32(dst, NewFileCustomTag::kTemperature); + char p = static_cast(f.temperature); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } if (f.marked_for_compaction) { PutVarint32(dst, NewFileCustomTag::kNeedCompaction); char p = static_cast(1); @@ -360,6 +365,16 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { return "invalid oldest blob file number"; } break; + case kTemperature: + if (field.size() != 1) { + return "temperature field wrong size"; + } else { + Temperature casted_field = static_cast(field[0]); + if (casted_field <= Temperature::kCold) { + f.temperature = casted_field; + } + } + break; default: if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { // Should not proceed if cannot understand it @@ -774,6 +789,12 @@ std::string VersionEdit::DebugString(bool hex_key) const { r.append(f.file_checksum); r.append(" file_checksum_func_name: "); r.append(f.file_checksum_func_name); + if (f.temperature != Temperature::kUnknown) { + r.append(" temperature: "); + // Maybe change to human readable format whenthe feature becomes + // permanent + r.append(ToString(static_cast(f.temperature))); + } } for (const auto& blob_file_addition : blob_file_additions_) { @@ -876,6 +897,11 @@ std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { if (f.oldest_blob_file_number != kInvalidBlobFileNumber) { jw << "OldestBlobFile" << f.oldest_blob_file_number; } + if (f.temperature != Temperature::kUnknown) { + // Maybe change to human readable format whenthe feature becomes + // permanent + jw << "Temperature" << static_cast(f.temperature); + } jw.EndArrayedObject(); } diff --git a/db/version_edit.h b/db/version_edit.h index 7b6884793..600634927 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -81,6 +81,7 @@ enum NewFileCustomTag : uint32_t { kFileCreationTime = 6, kFileChecksum = 7, kFileChecksumFuncName = 8, + kTemperature = 9, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. @@ -188,6 +189,7 @@ struct FileMetaData { bool marked_for_compaction = false; // True if client asked us nicely to // compact this file. + Temperature temperature = Temperature::kUnknown; // Used only in BlobDB. The file number of the oldest blob file this SST file // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1. diff --git a/db/version_set.cc b/db/version_set.cc index c9042d727..97eb3d11f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1485,15 +1485,16 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { file_path = ioptions->cf_paths.back().path; } const uint64_t file_number = file->fd.GetNumber(); - files.emplace_back(SstFileMetaData{ + files.emplace_back( MakeTableFileName("", file_number), file_number, file_path, static_cast(file->fd.GetFileSize()), file->fd.smallest_seqno, file->fd.largest_seqno, file->smallest.user_key().ToString(), file->largest.user_key().ToString(), file->stats.num_reads_sampled.load(std::memory_order_relaxed), - file->being_compacted, file->oldest_blob_file_number, - file->TryGetOldestAncesterTime(), file->TryGetFileCreationTime(), - file->file_checksum, file->file_checksum_func_name}); + file->being_compacted, file->temperature, + file->oldest_blob_file_number, file->TryGetOldestAncesterTime(), + file->TryGetFileCreationTime(), file->file_checksum, + file->file_checksum_func_name); files.back().num_entries = file->num_entries; files.back().num_deletions = file->num_deletions; level_size += file->fd.GetFileSize(); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index c682003a4..7804ec46b 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -188,12 +188,13 @@ struct CompressionOptions { // Temperature of a file. Used to pass to FileSystem for a different // placement and/or coding. +// Reserve some numbers in the middle, in case we need to insert new tier +// there. enum class Temperature : uint8_t { - kHot, - kWarm, - kCold, - kTotal, - kUnknown = kTotal, + kUnknown = 0, + kHot = 0x04, + kWarm = 0x08, + kCold = 0x0C, }; enum UpdateStatus { // Return status For inplace update callback diff --git a/include/rocksdb/metadata.h b/include/rocksdb/metadata.h index a789c75de..b515c51a1 100644 --- a/include/rocksdb/metadata.h +++ b/include/rocksdb/metadata.h @@ -11,6 +11,7 @@ #include #include +#include "rocksdb/options.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { @@ -62,6 +63,7 @@ struct SstFileMetaData { being_compacted(false), num_entries(0), num_deletions(0), + temperature(Temperature::kUnknown), oldest_blob_file_number(0), oldest_ancester_time(0), file_creation_time(0) {} @@ -71,7 +73,8 @@ struct SstFileMetaData { SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno, const std::string& _smallestkey, const std::string& _largestkey, uint64_t _num_reads_sampled, - bool _being_compacted, uint64_t _oldest_blob_file_number, + bool _being_compacted, Temperature _temperature, + uint64_t _oldest_blob_file_number, uint64_t _oldest_ancester_time, uint64_t _file_creation_time, std::string& _file_checksum, std::string& _file_checksum_func_name) @@ -87,6 +90,7 @@ struct SstFileMetaData { being_compacted(_being_compacted), num_entries(0), num_deletions(0), + temperature(_temperature), oldest_blob_file_number(_oldest_blob_file_number), oldest_ancester_time(_oldest_ancester_time), file_creation_time(_file_creation_time), @@ -112,6 +116,9 @@ struct SstFileMetaData { uint64_t num_entries; uint64_t num_deletions; + // This feature is experimental and subject to change. + Temperature temperature; + uint64_t oldest_blob_file_number; // The id of the oldest blob file // referenced by the file. // An SST file may be generated by compactions whose input files may diff --git a/tools/simulated_hybrid_file_system.cc b/tools/simulated_hybrid_file_system.cc index 771b2e0db..59b4654be 100644 --- a/tools/simulated_hybrid_file_system.cc +++ b/tools/simulated_hybrid_file_system.cc @@ -7,6 +7,7 @@ #include "tools/simulated_hybrid_file_system.h" +#include #include #include @@ -110,8 +111,7 @@ IOStatus SimulatedHybridRaf::Read(uint64_t offset, size_t n, char* scratch, IODebugContext* dbg) const { if (temperature_ == Temperature::kWarm) { Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); - rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, - nullptr); + RequestRateLimit(1); } return target()->Read(offset, n, options, result, scratch, dbg); } @@ -120,11 +120,9 @@ IOStatus SimulatedHybridRaf::MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) { if (temperature_ == Temperature::kWarm) { + RequestRateLimit(static_cast(num_reqs)); Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs * static_cast(num_reqs)); - rate_limiter_->Request( - static_cast(num_reqs) * kDummyBytesPerRequest, - Env::IOPriority::IO_LOW, nullptr); } return target()->MultiRead(reqs, num_reqs, options, dbg); } @@ -133,13 +131,22 @@ IOStatus SimulatedHybridRaf::Prefetch(uint64_t offset, size_t n, const IOOptions& options, IODebugContext* dbg) { if (temperature_ == Temperature::kWarm) { - rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, - nullptr); + RequestRateLimit(1); Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); } return target()->Prefetch(offset, n, options, dbg); } +void SimulatedHybridRaf::RequestRateLimit(int64_t num_requests) const { + int64_t left = num_requests * kDummyBytesPerRequest; + const int64_t kMaxToRequest = kDummyBytesPerRequest / 100; + while (left > 0) { + int64_t to_request = std::min(kMaxToRequest, left); + rate_limiter_->Request(to_request, Env::IOPriority::IO_LOW, nullptr); + left -= to_request; + } +} + } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE diff --git a/tools/simulated_hybrid_file_system.h b/tools/simulated_hybrid_file_system.h index 40e68575a..a3d16c4e7 100644 --- a/tools/simulated_hybrid_file_system.h +++ b/tools/simulated_hybrid_file_system.h @@ -83,6 +83,8 @@ class SimulatedHybridRaf : public FSRandomAccessFileWrapper { private: std::shared_ptr rate_limiter_; Temperature temperature_; + + void RequestRateLimit(int64_t num_requests) const; }; } // namespace ROCKSDB_NAMESPACE