From 983c93d731290687a9dcfe0d48bea91708b2a035 Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 13 Jun 2014 15:06:10 -0700 Subject: [PATCH] VersionSet::Get(): Bring back the logic of skipping key range check when there are <=3 level 0 files Summary: https://reviews.facebook.net/D17205 removed the logic of skipping file key range check when there are less than 3 level 0 files. This patch brings it back. Other than that, add another small optimization to avoid to check all the levels if most higher levels don't have any file. Test Plan: make all check Reviewers: ljin Reviewed By: ljin Subscribers: yhchiang, igor, haobo, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D19035 --- db/version_set.cc | 97 ++++++++++++++++++++++++++++++----------------- db/version_set.h | 11 +++++- 2 files changed, 73 insertions(+), 35 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index aee19dd1d..b9243196c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -498,12 +498,13 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, info_log_((cfd == nullptr) ? nullptr : cfd->options()->info_log.get()), db_statistics_((cfd == nullptr) ? nullptr : cfd->options()->statistics.get()), + // cfd is nullptr if Version is dummy + num_levels_(cfd == nullptr ? 0 : cfd->NumberLevels()), + num_non_empty_levels_(num_levels_), vset_(vset), next_(this), prev_(this), refs_(0), - // cfd is nullptr if Version is dummy - num_levels_(cfd == nullptr ? 0 : cfd->NumberLevels()), files_(new std::vector[num_levels_]), files_by_size_(num_levels_), next_file_to_compact_by_size_(num_levels_), @@ -551,7 +552,7 @@ void Version::Get(const ReadOptions& options, int32_t search_left_bound = 0; int32_t search_right_bound = FileIndexer::kLevelMaxIndex; - for (int level = 0; level < num_levels_; ++level) { + for (int level = 0; level < num_non_empty_levels_; ++level) { int num_files = files_[level].size(); if (num_files == 0) { // When current level is empty, the search bound generated from upper @@ -617,31 +618,46 @@ void Version::Get(const ReadOptions& options, for (int32_t i = start_index; i < num_files;) { FileMetaData* f = files[i]; - - // Check if key is within a file's range. If search left bound and right - // bound point to the same find, we are sure key falls in range. - assert(level == 0 || i == start_index || - user_comparator_->Compare(user_key, f->smallest.user_key()) <= 0); - - int cmp_smallest = user_comparator_->Compare(user_key, f->smallest.user_key()); int cmp_largest = -1; - if (cmp_smallest >= 0) { - cmp_largest = user_comparator_->Compare(user_key, f->largest.user_key()); - } - // Setup file search bound for the next level based on the comparison - // results - if (level > 0) { - file_indexer_.GetNextLevelIndex(level, i, cmp_smallest, cmp_largest, - &search_left_bound, &search_right_bound); - } - // Key falls out of current file's range - if (cmp_smallest < 0 || cmp_largest > 0) { - if (level == 0) { - ++i; - continue; - } else { - break; + // Do key range filtering of files or/and fractional cascading if: + // (1) not all the files are in level 0, or + // (2) there are more than 3 Level 0 files + // If there are only 3 or less level 0 files in the system, we skip the + // key range filtering. In this case, more likely, the system is highly + // tuned to minimize number of tables queried by each query, so it is + // unlikely that key range filtering is more efficient than querying the + // files. + if (num_non_empty_levels_ > 1 || num_files > 3) { + // Check if key is within a file's range. If search left bound and right + // bound point to the same find, we are sure key falls in range. + assert( + level == 0 || i == start_index + || user_comparator_->Compare(user_key, f->smallest.user_key()) + <= 0); + + int cmp_smallest = user_comparator_->Compare(user_key, + f->smallest.user_key()); + if (cmp_smallest >= 0) { + cmp_largest = user_comparator_->Compare(user_key, + f->largest.user_key()); + } + + // Setup file search bound for the next level based on the comparison + // results + if (level > 0) { + file_indexer_.GetNextLevelIndex(level, i, cmp_smallest, cmp_largest, + &search_left_bound, + &search_right_bound); + } + // Key falls out of current file's range + if (cmp_smallest < 0 || cmp_largest > 0) { + if (level == 0) { + ++i; + continue; + } else { + break; + } } } @@ -742,6 +758,12 @@ bool Version::UpdateStats(const GetStats& stats) { return false; } +void Version::PrepareApply(std::vector& size_being_compacted) { + ComputeCompactionScore(size_being_compacted); + UpdateFilesBySize(); + UpdateNumNonEmptyLevels(); +} + void Version::ComputeCompactionScore( std::vector& size_being_compacted) { double max_score = 0; @@ -844,6 +866,17 @@ bool CompareSeqnoDescending(const Version::Fsize& first, } // anonymous namespace +void Version::UpdateNumNonEmptyLevels() { + num_non_empty_levels_ = num_levels_; + for (int i = num_levels_ - 1; i >= 0; i--) { + if (files_[i].size() != 0) { + return; + } else { + num_non_empty_levels_ = i; + } + } +} + void Version::UpdateFilesBySize() { if (cfd_->options()->compaction_style == kCompactionStyleFIFO) { // don't need this @@ -1735,10 +1768,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, } if (!edit->IsColumnFamilyManipulation()) { - // The calls to ComputeCompactionScore and UpdateFilesBySize are cpu-heavy - // and is best called outside the mutex. - v->ComputeCompactionScore(size_being_compacted); - v->UpdateFilesBySize(); + // This is cpu-heavy operations, which should be called outside mutex. + v->PrepareApply(size_being_compacted); } // Write new record to MANIFEST log @@ -2155,8 +2186,7 @@ Status VersionSet::Recover( // Install recovered version std::vector size_being_compacted(v->NumberLevels() - 1); cfd->compaction_picker()->SizeBeingCompacted(size_being_compacted); - v->ComputeCompactionScore(size_being_compacted); - v->UpdateFilesBySize(); + v->PrepareApply(size_being_compacted); AppendVersion(cfd, v); } @@ -2489,8 +2519,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, builder->SaveTo(v); std::vector size_being_compacted(v->NumberLevels() - 1); cfd->compaction_picker()->SizeBeingCompacted(size_being_compacted); - v->ComputeCompactionScore(size_being_compacted); - v->UpdateFilesBySize(); + v->PrepareApply(size_being_compacted); delete builder; printf("--------------- Column family \"%s\" (ID %u) --------------\n", diff --git a/db/version_set.h b/db/version_set.h index 446dcf133..07dca8b9d 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -108,6 +108,10 @@ class Version { // a lock. Once a version is saved to current_, call only with mutex held void ComputeCompactionScore(std::vector& size_being_compacted); + // Update scores, pre-calculated variables. It needs to be called before + // applying the version to the version set. + void PrepareApply(std::vector& size_being_compacted); + // Reference count management (so Versions do not disappear out from // under live iterators) void Ref(); @@ -231,6 +235,9 @@ class Version { bool PrefixMayMatch(const ReadOptions& options, Iterator* level_iter, const Slice& internal_prefix) const; + // Update num_non_empty_levels_. + void UpdateNumNonEmptyLevels(); + // Sort all files for this version based on their file size and // record results in files_by_size_. The largest files are listed first. void UpdateFilesBySize(); @@ -242,11 +249,13 @@ class Version { const MergeOperator* merge_operator_; Logger* info_log_; Statistics* db_statistics_; + int num_levels_; // Number of levels + int num_non_empty_levels_; // Number of levels. Any level larger than it + // is guaranteed to be empty. VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list int refs_; // Number of live refs to this version - int num_levels_; // Number of levels // List of files per level, files in each level are arranged // in increasing order of keys