From 06aebca59235ca086fd275778aa0e255625df0f4 Mon Sep 17 00:00:00 2001 From: Andres Notzli Date: Tue, 21 Jul 2015 21:33:20 -0700 Subject: [PATCH] Report live data size estimate Summary: Fixes T6548822. Added a new function for estimating the size of the live data as proposed in the task. The value can be accessed through the property rocksdb.estimate-live-data-size. Test Plan: There are two unit tests in version_set_test and a simple test in db_test. make version_set_test && ./version_set_test; make db_test && ./db_test gtest_filter=GetProperty Reviewers: rven, igor, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D41493 --- db/db_test.cc | 3 +++ db/internal_stats.cc | 26 ++++++++++++++++++-------- db/internal_stats.h | 1 + db/version_set.cc | 35 +++++++++++++++++++++++++++++++++++ db/version_set.h | 3 +++ db/version_set_test.cc | 23 +++++++++++++++++++++++ include/rocksdb/db.h | 3 +++ 7 files changed, 86 insertions(+), 8 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index f9e160ce7..1b5707dd0 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2098,6 +2098,9 @@ TEST_F(DBTest, GetProperty) { ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); + ASSERT_TRUE( + dbfull()->GetIntProperty("rocksdb.estimate-live-data-size", &int_num)); + ASSERT_EQ(int_num, 0U); ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 319232c41..68da0ed72 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -118,6 +118,7 @@ static const std::string is_file_deletions_enabled = static const std::string num_snapshots = "num-snapshots"; static const std::string oldest_snapshot_time = "oldest-snapshot-time"; static const std::string num_live_versions = "num-live-versions"; +static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string base_level = "base-level"; const std::string DB::Properties::kNumFilesAtLevelPrefix = @@ -158,6 +159,8 @@ const std::string DB::Properties::kOldestSnapshotTime = rocksdb_prefix + oldest_snapshot_time; const std::string DB::Properties::kNumLiveVersions = rocksdb_prefix + num_live_versions; +const std::string DB::Properties::kEstimateLiveDataSize = + rocksdb_prefix + estimate_live_data_size; DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, bool* need_out_of_mutex) { @@ -222,6 +225,9 @@ DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, return kOldestSnapshotTime; } else if (in == num_live_versions) { return kNumLiveVersions; + } else if (in == estimate_live_data_size) { + *need_out_of_mutex = true; + return kEstimateLiveDataSize; } else if (in == base_level) { return kBaseLevel; } @@ -232,15 +238,19 @@ bool InternalStats::GetIntPropertyOutOfMutex(DBPropertyType property_type, Version* version, uint64_t* value) const { assert(value != nullptr); - if (property_type != kEstimatedUsageByTableReaders) { - return false; - } - if (version == nullptr) { - *value = 0; - } else { - *value = version->GetMemoryUsageByTableReaders(); + const auto* vstorage = cfd_->current()->storage_info(); + + switch (property_type) { + case kEstimatedUsageByTableReaders: + *value = (version == nullptr) ? + 0 : version->GetMemoryUsageByTableReaders(); + return true; + case kEstimateLiveDataSize: + *value = vstorage->EstimateLiveDataSize(); + return true; + default: + return false; } - return true; } bool InternalStats::GetStringProperty(DBPropertyType property_type, diff --git a/db/internal_stats.h b/db/internal_stats.h index 392b5ddae..c91bb2523 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -58,6 +58,7 @@ enum DBPropertyType : uint32_t { kNumSnapshots, // Number of snapshots in the system kOldestSnapshotTime, // Unix timestamp of the first snapshot kNumLiveVersions, + kEstimateLiveDataSize, // Estimated amount of live data in bytes kBaseLevel, // The level that L0 data is compacted to }; diff --git a/db/version_set.cc b/db/version_set.cc index c3f090ba1..29599170c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1700,6 +1700,41 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableCFOptions& ioptions, } } +uint64_t VersionStorageInfo::EstimateLiveDataSize() const { + // Estimate the live data size by adding up the size of the last level for all + // key ranges. Note: Estimate depends on the ordering of files in level 0 + // because files in level 0 can be overlapping. + uint64_t size = 0; + + auto ikey_lt = [this](InternalKey* x, InternalKey* y) { + return internal_comparator_->Compare(*x, *y) < 0; + }; + // (Ordered) map of largest keys in non-overlapping files + std::map ranges(ikey_lt); + + for (int l = num_levels_ - 1; l >= 0; l--) { + bool found_end = false; + for (auto file : files_[l]) { + // Find the first file where the largest key is larger than the smallest + // key of the current file. If this file does not overlap with the + // current file, none of the files in the map does. If there is + // no potential overlap, we can safely insert the rest of this level + // (if the level is not 0) into the map without checking again because + // the elements in the level are sorted and non-overlapping. + auto lb = (found_end && l != 0) ? + ranges.end() : ranges.lower_bound(&file->smallest); + found_end = (lb == ranges.end()); + if (found_end || internal_comparator_->Compare( + file->largest, (*lb).second->smallest) < 0) { + ranges.emplace_hint(lb, &file->largest, file); + size += file->fd.file_size; + } + } + } + return size; +} + + void Version::AddLiveFiles(std::vector* live) { for (int level = 0; level < storage_info_.num_levels(); level++) { const std::vector& files = storage_info_.files_[level]; diff --git a/db/version_set.h b/db/version_set.h index 3d68de267..425925a05 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -312,6 +312,9 @@ class VersionStorageInfo { void CalculateBaseBytes(const ImmutableCFOptions& ioptions, const MutableCFOptions& options); + // Returns an estimate of the amount of live data in bytes. + uint64_t EstimateLiveDataSize() const; + private: const InternalKeyComparator* internal_comparator_; const Comparator* user_comparator_; diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 202bb1cfc..6e513828b 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -234,6 +234,29 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLargeLevel) { ASSERT_EQ(0, logger_->log_count); } +TEST_F(VersionStorageInfoTest, EstimateLiveDataSize) { + // Test whether the overlaps are detected as expected + Add(1, 1U, "4", "7", 1U); // Perfect overlap with last level + Add(2, 2U, "3", "5", 1U); // Partial overlap with last level + Add(2, 3U, "6", "8", 1U); // Partial overlap with last level + Add(3, 4U, "1", "9", 1U); // Contains range of last level + Add(4, 5U, "4", "5", 1U); // Inside range of last level + Add(4, 5U, "6", "7", 1U); // Inside range of last level + Add(5, 6U, "4", "7", 10U); + ASSERT_EQ(10U, vstorage_.EstimateLiveDataSize()); +} + +TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) { + Add(0, 1U, "9", "9", 1U); // Level 0 is not ordered + Add(0, 1U, "5", "6", 1U); // Ignored because of [5,6] in l1 + Add(1, 1U, "1", "2", 1U); // Ignored because of [2,3] in l2 + Add(1, 2U, "3", "4", 1U); // Ignored because of [2,3] in l2 + Add(1, 3U, "5", "6", 1U); + Add(2, 4U, "2", "3", 1U); + Add(3, 5U, "7", "8", 1U); + ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize()); +} + class FindLevelFileTest : public testing::Test { public: LevelFilesBrief file_level_; diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d11e95318..5ad1a390f 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -338,6 +338,7 @@ class DB { // See version_set.h for details. More live versions often mean more SST // files are held from being deleted, by iterators or unfinished // compactions. + // "rocksdb.estimate-live-data-size" #ifndef ROCKSDB_LITE struct Properties { static const std::string kNumFilesAtLevelPrefix; @@ -361,6 +362,7 @@ class DB { static const std::string kNumSnapshots; static const std::string kOldestSnapshotTime; static const std::string kNumLiveVersions; + static const std::string kEstimateLiveDataSize; }; #endif /* ROCKSDB_LITE */ @@ -389,6 +391,7 @@ class DB { // "rocksdb.num-snapshots" // "rocksdb.oldest-snapshot-time" // "rocksdb.num-live-versions" + // "rocksdb.estimate-live-data-size" virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) = 0; virtual bool GetIntProperty(const Slice& property, uint64_t* value) {