From cc87075d63ad99db1219240d5ff41d7cf01debc9 Mon Sep 17 00:00:00 2001 From: Aaron Gao Date: Fri, 1 Apr 2016 16:19:12 -0700 Subject: [PATCH] No need to limit to 20 files in UpdateAccumulatedStats() if options.max_open_files=-1 Summary: There is a hardcoded constraint in our statistics collection that prevents reading properties from more than 20 SST files. This means our statistics will be very inaccurate for databases with > 20 files since additional files are just ignored. The purpose of constraining the number of files used is to bound the I/O performed during statistics collection, since these statistics need to be recomputed every time the database reopened. However, this constraint doesn't take into account the case where option "max_open_files" is -1. In that case, all the file metadata has already been read, so MaybeInitializeFileMetaData() won't incur any I/O cost. so this diff gets rid of the 20-file constraint in case max_open_files == -1. Test Plan: write into unit test db/db_properties_test.cc - "ValidateSampleNumber". We generate 20 files with 2 rows and 10 files with 1 row. If max_open_files !=-1, the `rocksdb.estimate-num-keys` should be (10*1 + 10*2)/20 * 30 = 45. Otherwise, it should be the ground truth, 50. {F1089153} Reviewers: andrewkr Reviewed By: andrewkr Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D56253 --- db/db_properties_test.cc | 29 +++++++++++++++++++++++++++++ db/version_set.cc | 6 ++++++ 2 files changed, 35 insertions(+) diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 60e04cfad..262987a56 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -253,6 +253,35 @@ TEST_F(DBPropertiesTest, ValidatePropertyInfo) { } } +TEST_F(DBPropertiesTest, ValidateSampleNumber) { + // When "max_open_files" is -1, we read all the files for + // "rocksdb.estimate-num-keys" computation, which is the ground truth. + // Otherwise, we sample 20 newest files to make an estimation. + // Formula: lastest_20_files_active_key_ratio * total_files + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.level0_stop_writes_trigger = 1000; + DestroyAndReopen(options); + int key = 0; + for (int files = 20; files >= 10; files -= 10) { + for (int i = 0; i < files; i++) { + int rows = files / 10; + for (int j = 0; j < rows; j++) { + db_->Put(WriteOptions(), std::to_string(++key), "foo"); + } + db_->Flush(FlushOptions()); + } + } + std::string num; + Reopen(options); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ("45", num); + options.max_open_files = -1; + Reopen(options); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); + ASSERT_EQ("50", num); +} + TEST_F(DBPropertiesTest, AggregatedTableProperties) { for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { const int kKeysPerTable = 100; diff --git a/db/version_set.cc b/db/version_set.cc index f1dbb94e0..27b326ec5 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1079,6 +1079,12 @@ void Version::UpdateAccumulatedStats(bool update_stats) { if (MaybeInitializeFileMetaData(file_meta)) { // each FileMeta will be initialized only once. storage_info_.UpdateAccumulatedStats(file_meta); + // when option "max_open_files" is -1, all the file metadata has + // already been read, so MaybeInitializeFileMetaData() won't incur + // any I/O cost. + if (vset_->db_options_->max_open_files == -1) { + continue; + } if (++init_count >= kMaxInitCount) { break; }