From f6784766db165188613f581afd40ba8fb90aa2cd Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 28 Jul 2014 14:50:16 -0700 Subject: [PATCH] Add DB property estimated number of keys Summary: Add a DB property of estimated number of live keys, by adding number of entries of all mem tables and all files, subtracted by all deletions in all files. Test Plan: Add the case in unit tests Reviewers: hobbymanyp, ljin Reviewed By: ljin Subscribers: MarkCallaghan, yoshinorim, leveldb, igor, dhruba Differential Revision: https://reviews.facebook.net/D20631 --- HISTORY.md | 1 + db/db_test.cc | 7 +++++++ db/internal_stats.cc | 11 +++++++++-- db/internal_stats.h | 1 + db/version_set.cc | 18 ++++++++++++++---- db/version_set.h | 5 ++++- 6 files changed, 36 insertions(+), 7 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d09b6f905..02aa01695 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ * NewPlainTableFactory instead of bunch of parameters now accepts PlainTableOptions, which is defined in include/rocksdb/table.h * Moved include/utilities/*.h to include/rocksdb/utilities/*.h * Statistics APIs now take uint32_t as type instead of Tickers. Also make two access functions getTickerCount and histogramData const +* Add DB property rocksdb.estimate-num-keys, estimated number of live keys in DB. ## 3.3.0 (7/10/2014) ### New Features diff --git a/db/db_test.cc b/db/db_test.cc index 89b8a380f..ae5145266 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2495,11 +2495,14 @@ TEST(DBTest, GetProperty) { ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "1"); perf_context.Reset(); ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "1"); + ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing")); ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "2"); @@ -2507,6 +2510,8 @@ TEST(DBTest, GetProperty) { ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "4"); sleeping_task_high.WakeUp(); sleeping_task_high.WaitUntilDone(); @@ -2519,6 +2524,8 @@ TEST(DBTest, GetProperty) { ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ(num, "4"); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); } diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 1392d13f1..bc5799245 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -12,8 +12,6 @@ #include #include "db/column_family.h" -#include "db/column_family.h" - namespace rocksdb { namespace { @@ -120,6 +118,8 @@ DBPropertyType GetPropertyType(const Slice& property) { return kNumEntriesInMutableMemtable; } else if (in == "num-entries-imm-mem-tables") { return kNumEntriesInImmutableMemtable; + } else if (in == "estimate-num-keys") { + return kEstimatedNumKeys; } return kUnknown; } @@ -207,6 +207,13 @@ bool InternalStats::GetProperty(DBPropertyType property_type, // Current size of the active memtable *value = std::to_string(cfd_->imm()->current()->GetTotalNumEntries()); return true; + case kEstimatedNumKeys: + // Estimate number of entries in the column family: + // Use estimated entries in tables + total entries in memtables. + *value = std::to_string(cfd_->mem()->GetNumEntries() + + cfd_->imm()->current()->GetTotalNumEntries() + + current->GetEstimatedActiveKeys()); + return true; default: return false; } diff --git a/db/internal_stats.h b/db/internal_stats.h index c853a97ad..25ed871b4 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -38,6 +38,7 @@ enum DBPropertyType { // memtable. kNumEntriesInImmutableMemtable, // Return sum of number of entries in all // the immutable mem tables. + kEstimatedNumKeys, // Estimated total number of keys in the database. kUnknown, }; diff --git a/db/version_set.cc b/db/version_set.cc index e9c8d4d61..a8df5f860 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -593,6 +593,14 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { return Status::OK(); } +uint64_t Version::GetEstimatedActiveKeys() { + // Estimation will be not accurate when: + // (1) there is merge keys + // (2) keys are directly overwritten + // (3) deletion on non-existing keys + return num_non_deletions_ - num_deletions_; +} + void Version::AddIterators(const ReadOptions& read_options, const EnvOptions& soptions, std::vector* iters) { @@ -749,9 +757,8 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, : cfd_(cfd), internal_comparator_((cfd == nullptr) ? nullptr : &cfd->internal_comparator()), - user_comparator_((cfd == nullptr) - ? nullptr - : internal_comparator_->user_comparator()), + user_comparator_( + (cfd == nullptr) ? nullptr : internal_comparator_->user_comparator()), table_cache_((cfd == nullptr) ? nullptr : cfd->table_cache()), merge_operator_((cfd == nullptr) ? nullptr : cfd->options()->merge_operator.get()), @@ -777,12 +784,14 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, total_file_size_(0), total_raw_key_size_(0), total_raw_value_size_(0), - num_non_deletions_(0) { + num_non_deletions_(0), + num_deletions_(0) { if (cfd != nullptr && cfd->current() != nullptr) { total_file_size_ = cfd->current()->total_file_size_; total_raw_key_size_ = cfd->current()->total_raw_key_size_; total_raw_value_size_ = cfd->current()->total_raw_value_size_; num_non_deletions_ = cfd->current()->num_non_deletions_; + num_deletions_ = cfd->current()->num_deletions_; } } @@ -902,6 +911,7 @@ void Version::UpdateTemporaryStats() { total_raw_value_size_ += file_meta->raw_value_size; num_non_deletions_ += file_meta->num_entries - file_meta->num_deletions; + num_deletions_ += file_meta->num_deletions; init_count++; } total_count++; diff --git a/db/version_set.h b/db/version_set.h index c73a1d60f..89fd2bd49 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -232,6 +232,8 @@ class Version { // tables' propertis, represented as shared_ptr. Status GetPropertiesOfAllTables(TablePropertiesCollection* props); + uint64_t GetEstimatedActiveKeys(); + // used to sort files by size struct Fsize { int index; @@ -336,7 +338,8 @@ class Version { uint64_t total_raw_value_size_; // total number of non-deletion entries uint64_t num_non_deletions_; - + // total number of deletion entries + uint64_t num_deletions_; ~Version();