From c55460c73485e2e57edcced982d2383c137b1273 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Sun, 15 Aug 2021 14:16:43 -0700 Subject: [PATCH] Add property `LiveSstFilesSizeAtTemperature` for tiered storage (#8644) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/8644 Reviewed By: siying, zhichao-cao Differential Revision: D30236535 Pulled By: jay-zhuang fbshipit-source-id: 1758d1c46d83a5087560fb63d53a016bf999da81 --- HISTORY.md | 1 + db/db_test2.cc | 81 ++++++++++++++++++++++++++++++++++++++++++++ db/internal_stats.cc | 29 ++++++++++++++++ db/internal_stats.h | 1 + include/rocksdb/db.h | 4 +++ 5 files changed, 116 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index e68d6dc1f..8fe057683 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -16,6 +16,7 @@ * Add CompactionOptionsFIFO.age_for_warm, which allows RocksDB to move old files to warm tier in FIFO compactions. Note that file temperature is still an experimental feature. * Add a comment to suggest btrfs user to disable file preallocation by setting `options.allow_fallocate=false`. * Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench. +* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature. ## Public API change * Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h and utilities/replayer.h files to access decoded Trace records and replay them. diff --git a/db/db_test2.cc b/db/db_test2.cc index d55f4a449..a7f82db1b 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -27,6 +27,19 @@ namespace ROCKSDB_NAMESPACE { class DBTest2 : public DBTestBase { public: DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {} + + protected: +#ifndef ROCKSDB_LITE + uint64_t GetSstSizeHelper(Temperature temperature) { + std::string prop; + bool s = + dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature + + ToString(static_cast(temperature)), + &prop); + assert(s); + return static_cast(std::atoi(prop.c_str())); + } +#endif // ROCKSDB_LITE }; #ifndef ROCKSDB_LITE @@ -6127,6 +6140,13 @@ TEST_F(DBTest2, BottommostTemperature) { options.level0_file_num_compaction_trigger = 2; Reopen(options); + auto size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kHot); + ASSERT_EQ(size, 0); + ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("bar", "bar")); ASSERT_OK(Flush()); @@ -6139,6 +6159,10 @@ TEST_F(DBTest2, BottommostTemperature) { db_->GetColumnFamilyMetaData(&metadata); ASSERT_EQ(1, metadata.file_count); ASSERT_EQ(Temperature::kWarm, metadata.levels[1].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_GT(size, 0); // non-bottommost file still has unknown temperature ASSERT_OK(Put("foo", "bar")); @@ -6147,6 +6171,10 @@ TEST_F(DBTest2, BottommostTemperature) { db_->GetColumnFamilyMetaData(&metadata); ASSERT_EQ(2, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_GT(size, 0); // reopen and check the information is persisted Reopen(options); @@ -6154,6 +6182,21 @@ TEST_F(DBTest2, BottommostTemperature) { ASSERT_EQ(2, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); ASSERT_EQ(Temperature::kWarm, metadata.levels[1].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_GT(size, 0); + + // check other non-exist temperatures + size = GetSstSizeHelper(Temperature::kHot); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kCold); + ASSERT_EQ(size, 0); + std::string prop; + ASSERT_TRUE(dbfull()->GetProperty( + DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22), + &prop)); + ASSERT_EQ(std::atoi(prop.c_str()), 0); } TEST_F(DBTest2, BottommostTemperatureUniversal) { @@ -6167,6 +6210,13 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { DestroyAndReopen(options); + auto size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kHot); + ASSERT_EQ(size, 0); + for (int i = 0; i < kTriggerNum; i++) { ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("bar", "bar")); @@ -6179,6 +6229,10 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { ASSERT_EQ(1, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[kBottommostLevel].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_EQ(size, 0); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("bar", "bar")); @@ -6187,6 +6241,10 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { db_->GetColumnFamilyMetaData(&metadata); ASSERT_EQ(2, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_EQ(size, 0); // Update bottommost temperature options.bottommost_temperature = Temperature::kWarm; @@ -6195,6 +6253,10 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { // Should not impact existing ones ASSERT_EQ(Temperature::kUnknown, metadata.levels[kBottommostLevel].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_EQ(size, 0); // new generated file should have the new settings ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); @@ -6202,6 +6264,10 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { ASSERT_EQ(1, metadata.file_count); ASSERT_EQ(Temperature::kWarm, metadata.levels[kBottommostLevel].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_GT(size, 0); // non-bottommost file still has unknown temperature ASSERT_OK(Put("foo", "bar")); @@ -6211,6 +6277,21 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) { db_->GetColumnFamilyMetaData(&metadata); ASSERT_EQ(2, metadata.file_count); ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); + size = GetSstSizeHelper(Temperature::kUnknown); + ASSERT_GT(size, 0); + size = GetSstSizeHelper(Temperature::kWarm); + ASSERT_GT(size, 0); + + // check other non-exist temperatures + size = GetSstSizeHelper(Temperature::kHot); + ASSERT_EQ(size, 0); + size = GetSstSizeHelper(Temperature::kCold); + ASSERT_EQ(size, 0); + std::string prop; + ASSERT_TRUE(dbfull()->GetProperty( + DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22), + &prop)); + ASSERT_EQ(std::atoi(prop.c_str()), 0); } #endif // ROCKSDB_LITE diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 2fe2e644a..a91883975 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -262,6 +262,8 @@ static const std::string min_obsolete_sst_number_to_keep_str = static const std::string base_level_str = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; static const std::string live_sst_files_size = "live-sst-files-size"; +static const std::string live_sst_files_size_at_temperature = + "live-sst-files-size-at-temperature"; static const std::string estimate_pending_comp_bytes = "estimate-pending-compaction-bytes"; static const std::string aggregated_table_properties = @@ -369,6 +371,8 @@ const std::string DB::Properties::kBlockCachePinnedUsage = rocksdb_prefix + block_cache_pinned_usage; const std::string DB::Properties::kOptionsStatistics = rocksdb_prefix + options_statistics; +const std::string DB::Properties::kLiveSstFilesSizeAtTemperature = + rocksdb_prefix + live_sst_files_size_at_temperature; const std::unordered_map InternalStats::ppt_name_to_info = { @@ -482,6 +486,9 @@ const std::unordered_map {DB::Properties::kLiveSstFilesSize, {false, nullptr, &InternalStats::HandleLiveSstFilesSize, nullptr, nullptr}}, + {DB::Properties::kLiveSstFilesSizeAtTemperature, + {true, &InternalStats::HandleLiveSstFilesSizeAtTemperature, nullptr, + nullptr, nullptr}}, {DB::Properties::kEstimatePendingCompactionBytes, {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes, nullptr, nullptr}}, @@ -682,6 +689,28 @@ bool InternalStats::HandleBlockCacheEntryStatsMap( return true; } +bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value, + Slice suffix) { + uint64_t temperature; + bool ok = ConsumeDecimalNumber(&suffix, &temperature) && suffix.empty(); + if (!ok) { + return false; + } + + uint64_t size = 0; + const auto* vstorage = cfd_->current()->storage_info(); + for (int level = 0; level < vstorage->num_levels(); level++) { + for (const auto& file_meta : vstorage->LevelFiles(level)) { + if (static_cast(file_meta->temperature) == temperature) { + size += file_meta->fd.GetFileSize(); + } + } + } + + *value = ToString(size); + return true; +} + const DBPropertyInfo* GetPropertyInfo(const Slice& property) { std::string ppt_name = GetPropertyNameAndArg(property).first.ToString(); auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name); diff --git a/db/internal_stats.h b/db/internal_stats.h index e3a869a63..b4cc6d8bd 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -675,6 +675,7 @@ class InternalStats { bool HandleBlockCacheEntryStats(std::string* value, Slice suffix); bool HandleBlockCacheEntryStatsMap(std::map* values, Slice suffix); + bool HandleLiveSstFilesSizeAtTemperature(std::string* value, Slice suffix); // Total number of background errors encountered. Every time a flush task // or compaction task fails, this counter is incremented. The failure can // be caused by any possible reason, including file system errors, out of diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 1f715b3cf..264ff06f4 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -914,6 +914,10 @@ class DB { // files belong to the latest LSM tree. static const std::string kLiveSstFilesSize; + // "rocksdb.live_sst_files_size_at_temperature" - returns total size (bytes) + // of SST files at all certain file temperature + static const std::string kLiveSstFilesSizeAtTemperature; + // "rocksdb.base-level" - returns number of level to which L0 data will be // compacted. static const std::string kBaseLevel;