diff --git a/HISTORY.md b/HISTORY.md index 5326ba0e0..475dc2919 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### Public API Change * Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F +* Expose estimate of per-level compression ratio via DB property: "rocksdb.num-files-at-levelN". ## 4.7.0 (4/8/2016) ### Public API Change diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 262987a56..ad1afc661 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -19,6 +19,7 @@ #include "rocksdb/perf_level.h" #include "rocksdb/table.h" #include "util/random.h" +#include "util/string_util.h" namespace rocksdb { @@ -898,6 +899,48 @@ TEST_F(DBPropertiesTest, EstimatePendingCompBytes) { "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_EQ(int_num, 0U); } + +TEST_F(DBPropertiesTest, EstimateCompressionRatio) { + if (!Snappy_Supported()) { + return; + } + const int kNumL0Files = 3; + const int kNumEntriesPerFile = 1000; + + Options options = CurrentOptions(); + options.compression_per_level = {kNoCompression, kSnappyCompression}; + options.disable_auto_compactions = true; + options.max_background_flushes = 0; + options.num_levels = 2; + Reopen(options); + + // compression ratio is -1.0 when no open files at level + ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); + + const std::string kVal(100, 'a'); + for (int i = 0; i < kNumL0Files; ++i) { + for (int j = 0; j < kNumEntriesPerFile; ++j) { + // Put common data ("key") at end to prevent delta encoding from + // compressing the key effectively + std::string key = ToString(i) + ToString(j) + "key"; + ASSERT_OK(dbfull()->Put(WriteOptions(), key, kVal)); + } + Flush(); + } + + // no compression at L0, so ratio is less than one + ASSERT_LT(CompressionRatioAtLevel(0), 1.0); + ASSERT_GT(CompressionRatioAtLevel(0), 0.0); + ASSERT_EQ(CompressionRatioAtLevel(1), -1.0); + + dbfull()->TEST_CompactRange(0, nullptr, nullptr); + + ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); + // Data at L1 should be highly compressed thanks to Snappy and redundant data + // in values (ratio is 12.846 as of 4/19/2016). + ASSERT_GT(CompressionRatioAtLevel(1), 10.0); +} + #endif // ROCKSDB_LITE class CountingUserTblPropCollector : public TablePropertiesCollector { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 7e2adaa78..41da85f34 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -714,6 +714,22 @@ int DBTestBase::NumTableFilesAtLevel(int level, int cf) { return atoi(property.c_str()); } +double DBTestBase::CompressionRatioAtLevel(int level, int cf) { + std::string property; + if (cf == 0) { + // default cfd + EXPECT_TRUE(db_->GetProperty( + "rocksdb.compression-ratio-at-level" + NumberToString(level), + &property)); + } else { + EXPECT_TRUE(db_->GetProperty( + handles_[cf], + "rocksdb.compression-ratio-at-level" + NumberToString(level), + &property)); + } + return std::stod(property); +} + int DBTestBase::TotalTableFiles(int cf, int levels) { if (levels == -1) { levels = CurrentOptions().num_levels; diff --git a/db/db_test_util.h b/db/db_test_util.h index 0ece8a766..dffcc9037 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -700,6 +700,8 @@ class DBTestBase : public testing::Test { int NumTableFilesAtLevel(int level, int cf = 0); + double CompressionRatioAtLevel(int level, int cf = 0); + int TotalTableFiles(int cf = 0, int levels = -1); // Return spread of files per level diff --git a/db/internal_stats.cc b/db/internal_stats.cc index a554f0b85..453cb813a 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -101,6 +101,8 @@ std::pair GetPropertyNameAndArg(const Slice& property) { static const std::string rocksdb_prefix = "rocksdb."; static const std::string num_files_at_level_prefix = "num-files-at-level"; +static const std::string compression_ratio_at_level_prefix = + "compression-ratio-at-level"; static const std::string allstats = "stats"; static const std::string sstables = "sstables"; static const std::string cfstats = "cfstats"; @@ -148,6 +150,8 @@ static const std::string num_running_flushes = "num-running-flushes"; const std::string DB::Properties::kNumFilesAtLevelPrefix = rocksdb_prefix + num_files_at_level_prefix; +const std::string DB::Properties::kCompressionRatioAtLevelPrefix = + rocksdb_prefix + compression_ratio_at_level_prefix; const std::string DB::Properties::kStats = rocksdb_prefix + allstats; const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables; const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats; @@ -211,6 +215,8 @@ const std::unordered_map InternalStats::ppt_name_to_info = { {DB::Properties::kNumFilesAtLevelPrefix, {false, &InternalStats::HandleNumFilesAtLevel, nullptr}}, + {DB::Properties::kCompressionRatioAtLevelPrefix, + {false, &InternalStats::HandleCompressionRatioAtLevelPrefix, nullptr}}, {DB::Properties::kLevelStats, {false, &InternalStats::HandleLevelStats, nullptr}}, {DB::Properties::kStats, {false, &InternalStats::HandleStats, nullptr}}, @@ -324,6 +330,19 @@ bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) { } } +bool InternalStats::HandleCompressionRatioAtLevelPrefix(std::string* value, + Slice suffix) { + uint64_t level; + const auto* vstorage = cfd_->current()->storage_info(); + bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); + if (!ok || level >= static_cast(number_levels_)) { + return false; + } + *value = ToString( + vstorage->GetEstimatedCompressionRatioAtLevel(static_cast(level))); + return true; +} + bool InternalStats::HandleLevelStats(std::string* value, Slice suffix) { char buf[1000]; const auto* vstorage = cfd_->current()->storage_info(); diff --git a/db/internal_stats.h b/db/internal_stats.h index 03b2bd882..9aad82e7d 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -294,6 +294,7 @@ class InternalStats { // Handler functions for getting property values. They use "value" as a value- // result argument, and return true upon successfully setting "value". bool HandleNumFilesAtLevel(std::string* value, Slice suffix); + bool HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix); bool HandleLevelStats(std::string* value, Slice suffix); bool HandleStats(std::string* value, Slice suffix); bool HandleCFStats(std::string* value, Slice suffix); diff --git a/db/version_set.cc b/db/version_set.cc index 4fb2f75ef..3ea22cf79 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -779,6 +779,21 @@ uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const { } } +double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel( + int level) const { + assert(level < num_levels_); + uint64_t sum_file_size_bytes = 0; + uint64_t sum_data_size_bytes = 0; + for (auto* file_meta : files_[level]) { + sum_file_size_bytes += file_meta->fd.GetFileSize(); + sum_data_size_bytes += file_meta->raw_key_size + file_meta->raw_value_size; + } + if (sum_file_size_bytes == 0) { + return -1.0; + } + return static_cast(sum_data_size_bytes) / sum_file_size_bytes; +} + void Version::AddIterators(const ReadOptions& read_options, const EnvOptions& soptions, MergeIteratorBuilder* merge_iter_builder) { diff --git a/db/version_set.h b/db/version_set.h index 5aa7d56fa..b30b37400 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -301,6 +301,8 @@ class VersionStorageInfo { uint64_t GetEstimatedActiveKeys() const; + double GetEstimatedCompressionRatioAtLevel(int level) const; + // re-initializes the index that is used to offset into // files_by_compaction_pri_ // to find the next compaction candidate file. diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d05c04ea1..4b9ff8fc2 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -338,6 +338,13 @@ class DB { // level number (e.g., "0"). static const std::string kNumFilesAtLevelPrefix; + // "rocksdb.compression-ratio-at-level" - returns string containing the + // compression ratio of data at level , where is an ASCII + // representation of a level number (e.g., "0"). Here, compression + // ratio is defined as uncompressed data size / compressed file size. + // Returns "-1.0" if no open files at level . + static const std::string kCompressionRatioAtLevelPrefix; + // "rocksdb.stats" - returns a multi-line string containing the data // described by kCFStats followed by the data described by kDBStats. static const std::string kStats;