From 6996de87af39a61a4509edc855379302a6422f80 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Tue, 25 Aug 2015 12:03:54 -0700 Subject: [PATCH] Expose per-level aggregated table properties via GetProperty() Summary: This patch adds "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", the former returns the aggreated table properties of a column family, while the later returns the aggregated table properties of the specified level N. Test Plan: Added tests in db_test Reviewers: igor, sdong, IslamAbdelRahman, anthony Reviewed By: anthony Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D45087 --- HISTORY.md | 3 + db/db_test.cc | 200 +++++++++++++++++++++++++++++ db/internal_stats.cc | 39 ++++++ db/internal_stats.h | 5 + db/version_set.cc | 58 +++++++-- db/version_set.h | 8 ++ include/rocksdb/db.h | 5 + include/rocksdb/table_properties.h | 4 + table/table_properties.cc | 10 ++ 9 files changed, 319 insertions(+), 13 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ea53e4ea5..61809025f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,9 @@ ## Unreleased +### New Features +* DB::GetProperty() now accept "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", in which case it returns aggregated table properties of the target column family, or the aggregated table properties of the specified level N if the "at-level" version is used. + ### Public API Changes * Removed class Env::RandomRWFile and Env::NewRandomRWFile(). * Renamed DBOptions.num_subcompactions to DBOptions.max_subcompactions to make the name better match the actual funcionality of the option. diff --git a/db/db_test.cc b/db/db_test.cc index e56b0aca3..e9a17a470 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -468,6 +468,206 @@ TEST_F(DBTest, GetPropertiesOfAllTablesTest) { VerifyTableProperties(db_, 10 + 11 + 12 + 13); } +namespace { +void ResetTableProperties(TableProperties* tp) { + tp->data_size = 0; + tp->index_size = 0; + tp->filter_size = 0; + tp->raw_key_size = 0; + tp->raw_value_size = 0; + tp->num_data_blocks = 0; + tp->num_entries = 0; +} + +void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { + double dummy_double; + std::replace(tp_string.begin(), tp_string.end(), ';', ' '); + std::replace(tp_string.begin(), tp_string.end(), '=', ' '); + ResetTableProperties(tp); + + sscanf(tp_string.c_str(), "# data blocks %" SCNu64 + " # entries %" SCNu64 + " raw key size %" SCNu64 + " raw average key size %lf " + " raw value size %" SCNu64 + " raw average value size %lf " + " data block size %" SCNu64 + " index block size %" SCNu64 + " filter block size %" SCNu64, + &tp->num_data_blocks, &tp->num_entries, &tp->raw_key_size, + &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, + &tp->index_size, &tp->filter_size); +} + +void VerifySimilar(uint64_t a, uint64_t b, double bias) { + ASSERT_EQ(a == 0U, b == 0U); + if (a == 0) { + return; + } + double dbl_a = static_cast(a); + double dbl_b = static_cast(b); + if (dbl_a > dbl_b) { + ASSERT_LT(static_cast(dbl_a - dbl_b) / (dbl_a + dbl_b), bias); + } else { + ASSERT_LT(static_cast(dbl_b - dbl_a) / (dbl_a + dbl_b), bias); + } +} + +void VerifyTableProperties(const TableProperties& base_tp, + const TableProperties& new_tp, + double filter_size_bias = 0.1, + double index_size_bias = 0.1, + double data_size_bias = 0.1, + double num_data_blocks_bias = 0.05) { + VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); + VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); + VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); + VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, + num_data_blocks_bias); + ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); + ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); + ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); +} + +void GetExpectedTableProperties(TableProperties* expected_tp, + const int kKeySize, const int kValueSize, + const int kKeysPerTable, const int kTableCount, + const int kBloomBitsPerKey, + const size_t kBlockSize) { + const int kKeyCount = kTableCount * kKeysPerTable; + const int kAvgSuccessorSize = kKeySize / 2; + const int kEncodingSavePerKey = kKeySize / 4; + expected_tp->raw_key_size = kKeyCount * (kKeySize + 8); + expected_tp->raw_value_size = kKeyCount * kValueSize; + expected_tp->num_entries = kKeyCount; + expected_tp->num_data_blocks = + kTableCount * + (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / + kBlockSize; + expected_tp->data_size = + kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); + expected_tp->index_size = + expected_tp->num_data_blocks * (kAvgSuccessorSize + 12); + expected_tp->filter_size = + kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); +} +} // namespace + +TEST_F(DBTest, AggregatedTableProperties) { + for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { + const int kKeysPerTable = 100; + const int kKeySize = 80; + const int kValueSize = 200; + const int kBloomBitsPerKey = 20; + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 8; + options.compression = kNoCompression; + options.create_if_missing = true; + + BlockBasedTableOptions table_options; + table_options.filter_policy.reset( + NewBloomFilterPolicy(kBloomBitsPerKey, false)); + table_options.block_size = 1024; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + DestroyAndReopen(options); + + Random rnd(5632); + for (int table = 1; table <= kTableCount; ++table) { + for (int i = 0; i < kKeysPerTable; ++i) { + db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } + db_->Flush(FlushOptions()); + } + std::string property; + db_->GetProperty(DB::Properties::kAggregatedTableProperties, &property); + + TableProperties expected_tp; + GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, + kKeysPerTable, kTableCount, kBloomBitsPerKey, + table_options.block_size); + + TableProperties output_tp; + ParseTablePropertiesString(property, &output_tp); + + VerifyTableProperties(expected_tp, output_tp); + } +} + +TEST_F(DBTest, AggregatedTablePropertiesAtLevel) { + const int kTableCount = 100; + const int kKeysPerTable = 10; + const int kKeySize = 50; + const int kValueSize = 400; + const int kMaxLevel = 7; + const int kBloomBitsPerKey = 20; + Random rnd(301); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 8; + options.compression = kNoCompression; + options.create_if_missing = true; + options.level0_file_num_compaction_trigger = 2; + options.target_file_size_base = 8192; + options.max_bytes_for_level_base = 10000; + options.max_bytes_for_level_multiplier = 2; + // This ensures there no compaction happening when we call GetProperty(). + options.disable_auto_compactions = true; + + BlockBasedTableOptions table_options; + table_options.filter_policy.reset( + NewBloomFilterPolicy(kBloomBitsPerKey, false)); + table_options.block_size = 1024; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + DestroyAndReopen(options); + + std::string level_tp_strings[kMaxLevel]; + std::string tp_string; + TableProperties level_tps[kMaxLevel]; + TableProperties tp, sum_tp, expected_tp; + for (int table = 1; table <= kTableCount; ++table) { + for (int i = 0; i < kKeysPerTable; ++i) { + db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), + RandomString(&rnd, kValueSize)); + } + db_->Flush(FlushOptions()); + db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ResetTableProperties(&sum_tp); + for (int level = 0; level < kMaxLevel; ++level) { + db_->GetProperty( + DB::Properties::kAggregatedTablePropertiesAtLevel + ToString(level), + &level_tp_strings[level]); + ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]); + sum_tp.data_size += level_tps[level].data_size; + sum_tp.index_size += level_tps[level].index_size; + sum_tp.filter_size += level_tps[level].filter_size; + sum_tp.raw_key_size += level_tps[level].raw_key_size; + sum_tp.raw_value_size += level_tps[level].raw_value_size; + sum_tp.num_data_blocks += level_tps[level].num_data_blocks; + sum_tp.num_entries += level_tps[level].num_entries; + } + db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); + ParseTablePropertiesString(tp_string, &tp); + ASSERT_EQ(sum_tp.data_size, tp.data_size); + ASSERT_EQ(sum_tp.index_size, tp.index_size); + ASSERT_EQ(sum_tp.filter_size, tp.filter_size); + ASSERT_EQ(sum_tp.raw_key_size, tp.raw_key_size); + ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); + ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); + ASSERT_EQ(sum_tp.num_entries, tp.num_entries); + if (table > 3) { + GetExpectedTableProperties(&expected_tp, kKeySize, kValueSize, + kKeysPerTable, table, kBloomBitsPerKey, + table_options.block_size); + // Gives larger bias here as index block size, filter block size, + // and data block size become much harder to estimate in this test. + VerifyTableProperties(tp, expected_tp, 0.5, 0.4, 0.4, 0.25); + } + } +} + class CoutingUserTblPropCollector : public TablePropertiesCollector { public: const char* Name() const override { return "CoutingUserTblPropCollector"; } diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 4fc404e5c..10aae13dc 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -13,6 +13,7 @@ #endif #include +#include #include #include #include "db/column_family.h" @@ -125,6 +126,10 @@ static const std::string base_level = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; static const std::string estimate_pending_comp_bytes = "estimate-pending-compaction-bytes"; +static const std::string aggregated_table_properties = + "aggregated-table-properties"; +static const std::string aggregated_table_properties_at_level = + aggregated_table_properties + "-at-level"; const std::string DB::Properties::kNumFilesAtLevelPrefix = rocksdb_prefix + num_files_at_level_prefix; @@ -172,6 +177,10 @@ const std::string DB::Properties::kTotalSstFilesSize = rocksdb_prefix + total_sst_files_size; const std::string DB::Properties::kEstimatePendingCompactionBytes = rocksdb_prefix + estimate_pending_comp_bytes; +const std::string DB::Properties::kAggregatedTableProperties = + rocksdb_prefix + aggregated_table_properties; +const std::string DB::Properties::kAggregatedTablePropertiesAtLevel = + rocksdb_prefix + aggregated_table_properties_at_level; DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, bool* need_out_of_mutex) { @@ -198,6 +207,10 @@ DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property, return kDBStats; } else if (in == sstables) { return kSsTables; + } else if (in == aggregated_table_properties) { + return kAggregatedTableProperties; + } else if (in.starts_with(aggregated_table_properties_at_level)) { + return kAggregatedTablePropertiesAtLevel; } *is_int_property = true; @@ -328,6 +341,32 @@ bool InternalStats::GetStringProperty(DBPropertyType property_type, case kSsTables: *value = current->DebugString(); return true; + case kAggregatedTableProperties: { + std::shared_ptr tp; + auto s = cfd_->current()->GetAggregatedTableProperties(&tp); + if (!s.ok()) { + return false; + } + *value = tp->ToString(); + return true; + } + case kAggregatedTablePropertiesAtLevel: { + in.remove_prefix( + DB::Properties::kAggregatedTablePropertiesAtLevel.length()); + uint64_t level; + bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); + if (!ok || static_cast(level) >= number_levels_) { + return false; + } + std::shared_ptr tp; + auto s = cfd_->current()->GetAggregatedTableProperties( + &tp, static_cast(level)); + if (!s.ok()) { + return false; + } + *value = tp->ToString(); + return true; + } default: return false; } diff --git a/db/internal_stats.h b/db/internal_stats.h index c746f397f..1d5512c94 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -64,6 +64,11 @@ enum DBPropertyType : uint32_t { kTotalSstFilesSize, // Total size of all sst files. kBaseLevel, // The level that L0 data is compacted to kEstimatePendingCompactionBytes, // Estimated bytes to compaction + kAggregatedTableProperties, // Return a string that contains the aggregated + // table properties. + kAggregatedTablePropertiesAtLevel, // Return a string that contains the + // aggregated + // table properties at the specified level. }; extern DBPropertyType GetPropertyType(const Slice& property, diff --git a/db/version_set.cc b/db/version_set.cc index 2740e45ea..9e84fa161 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -592,26 +592,58 @@ Status Version::GetTableProperties(std::shared_ptr* tp, } Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { + Status s; for (int level = 0; level < storage_info_.num_levels_; level++) { - for (const auto& file_meta : storage_info_.files_[level]) { - auto fname = - TableFileName(vset_->db_options_->db_paths, file_meta->fd.GetNumber(), - file_meta->fd.GetPathId()); - // 1. If the table is already present in table cache, load table - // properties from there. - std::shared_ptr table_properties; - Status s = GetTableProperties(&table_properties, file_meta, &fname); - if (s.ok()) { - props->insert({fname, table_properties}); - } else { - return s; - } + s = GetPropertiesOfAllTables(props, level); + if (!s.ok()) { + return s; + } + } + + return Status::OK(); +} + +Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props, + int level) { + for (const auto& file_meta : storage_info_.files_[level]) { + auto fname = + TableFileName(vset_->db_options_->db_paths, file_meta->fd.GetNumber(), + file_meta->fd.GetPathId()); + // 1. If the table is already present in table cache, load table + // properties from there. + std::shared_ptr table_properties; + Status s = GetTableProperties(&table_properties, file_meta, &fname); + if (s.ok()) { + props->insert({fname, table_properties}); + } else { + return s; } } return Status::OK(); } +Status Version::GetAggregatedTableProperties( + std::shared_ptr* tp, int level) { + TablePropertiesCollection props; + Status s; + if (level < 0) { + s = GetPropertiesOfAllTables(&props); + } else { + s = GetPropertiesOfAllTables(&props, level); + } + if (!s.ok()) { + return s; + } + + auto* new_tp = new TableProperties(); + for (const auto& item : props) { + new_tp->Add(*item.second); + } + tp->reset(new_tp); + return Status::OK(); +} + size_t Version::GetMemoryUsageByTableReaders() { size_t total_usage = 0; for (auto& file_level : storage_info_.level_files_brief_) { diff --git a/db/version_set.h b/db/version_set.h index 6702144b0..069342da9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -462,6 +462,14 @@ class Version { // tables' propertis, represented as shared_ptr. Status GetPropertiesOfAllTables(TablePropertiesCollection* props); + Status GetPropertiesOfAllTables(TablePropertiesCollection* props, int level); + + // REQUIRES: lock is held + // On success, "tp" will contains the aggregated table property amoug + // the table properties of all sst files in this version. + Status GetAggregatedTableProperties( + std::shared_ptr* tp, int level = -1); + uint64_t GetEstimatedActiveKeys() { return storage_info_.GetEstimatedActiveKeys(); } diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 0fb11dee8..0e2ffc43c 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -361,6 +361,8 @@ class DB { static const std::string kEstimateLiveDataSize; static const std::string kTotalSstFilesSize; static const std::string kEstimatePendingCompactionBytes; + static const std::string kAggregatedTableProperties; + static const std::string kAggregatedTablePropertiesAtLevel; }; #endif /* ROCKSDB_LITE */ @@ -394,6 +396,9 @@ class DB { // "rocksdb.total-sst-files-size" // "rocksdb.base-level" // "rocksdb.estimate-pending-compaction-bytes" + // "rocksdb.aggregated-table-properties" + // "rocksdb.aggregated-table-properties-at-levelN", where "N" should be + // replaced by the target level. virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) = 0; virtual bool GetIntProperty(const Slice& property, uint64_t* value) { diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 5a4096d01..9b3c8148e 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -61,6 +61,10 @@ struct TableProperties { // @prop_delim: delimiter for each property. std::string ToString(const std::string& prop_delim = "; ", const std::string& kv_delim = "=") const; + + // Aggregate the numerical member variables of the specified + // TableProperties. + void Add(const TableProperties& tp); }; // table properties' human-readable names in the property block. diff --git a/table/table_properties.cc b/table/table_properties.cc index 1ee34a671..86c084385 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -74,6 +74,16 @@ std::string TableProperties::ToString( return result; } +void TableProperties::Add(const TableProperties& tp) { + data_size += tp.data_size; + index_size += tp.index_size; + filter_size += tp.filter_size; + raw_key_size += tp.raw_key_size; + raw_value_size += tp.raw_value_size; + num_data_blocks += tp.num_data_blocks; + num_entries += tp.num_entries; +} + const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size"; const std::string TablePropertiesNames::kIndexSize =