diff --git a/db/version_edit.h b/db/version_edit.h index db133402c..ef883297a 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include #include #include #include @@ -74,7 +75,7 @@ struct FileMetaData { // Stats for compensating deletion entries during compaction // File size compensated by deletion entry. - // This is updated in Version::UpdateTemporaryStats() first time when the + // This is updated in Version::UpdateAccumulatedStats() first time when the // file is created or loaded. After it is updated, it is immutable. uint64_t compensated_file_size; uint64_t num_entries; // the number of entries. diff --git a/db/version_set.cc b/db/version_set.cc index 78241d1f0..0819196fb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -597,7 +597,19 @@ uint64_t Version::GetEstimatedActiveKeys() { // (1) there is merge keys // (2) keys are directly overwritten // (3) deletion on non-existing keys - return num_non_deletions_ - num_deletions_; + // (4) low number of samples + if (num_samples_ == 0) { + return 0; + } + + if (num_samples_ < files_->size()) { + // casting to avoid overflowing + return static_cast(static_cast( + accumulated_num_non_deletions_ - accumulated_num_deletions_) * + files_->size() / num_samples_); + } else { + return accumulated_num_non_deletions_ - accumulated_num_deletions_; + } } void Version::AddIterators(const ReadOptions& read_options, @@ -658,17 +670,21 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, compaction_score_(num_levels_), compaction_level_(num_levels_), version_number_(version_number), - total_file_size_(0), - total_raw_key_size_(0), - total_raw_value_size_(0), - num_non_deletions_(0), - num_deletions_(0) { + accumulated_file_size_(0), + accumulated_raw_key_size_(0), + accumulated_raw_value_size_(0), + accumulated_num_non_deletions_(0), + accumulated_num_deletions_(0), + num_samples_(0) { if (cfd != nullptr && cfd->current() != nullptr) { - total_file_size_ = cfd->current()->total_file_size_; - total_raw_key_size_ = cfd->current()->total_raw_key_size_; - total_raw_value_size_ = cfd->current()->total_raw_value_size_; - num_non_deletions_ = cfd->current()->num_non_deletions_; - num_deletions_ = cfd->current()->num_deletions_; + accumulated_file_size_ = cfd->current()->accumulated_file_size_; + accumulated_raw_key_size_ = cfd->current()->accumulated_raw_key_size_; + accumulated_raw_value_size_ = + cfd->current()->accumulated_raw_value_size_; + accumulated_num_non_deletions_ = + cfd->current()->accumulated_num_non_deletions_; + accumulated_num_deletions_ = cfd->current()->accumulated_num_deletions_; + num_samples_ = cfd->current()->num_samples_; } } @@ -748,7 +764,7 @@ void Version::GenerateFileLevels() { void Version::PrepareApply(const MutableCFOptions& mutable_cf_options, std::vector& size_being_compacted) { - UpdateTemporaryStats(); + UpdateAccumulatedStats(); ComputeCompactionScore(mutable_cf_options, size_being_compacted); UpdateFilesBySize(); UpdateNumNonEmptyLevels(); @@ -757,7 +773,8 @@ void Version::PrepareApply(const MutableCFOptions& mutable_cf_options, } bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { - if (file_meta->init_stats_from_file) { + if (file_meta->init_stats_from_file || + file_meta->compensated_file_size > 0) { return false; } std::shared_ptr tp; @@ -778,26 +795,55 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { return true; } -void Version::UpdateTemporaryStats() { +void Version::UpdateAccumulatedStats(FileMetaData* file_meta) { + assert(file_meta->init_stats_from_file); + accumulated_file_size_ += file_meta->fd.GetFileSize(); + accumulated_raw_key_size_ += file_meta->raw_key_size; + accumulated_raw_value_size_ += file_meta->raw_value_size; + accumulated_num_non_deletions_ += + file_meta->num_entries - file_meta->num_deletions; + accumulated_num_deletions_ += file_meta->num_deletions; + num_samples_++; +} + +void Version::UpdateAccumulatedStats() { static const int kDeletionWeightOnCompaction = 2; - // incrementally update the average value size by - // including newly added files into the global stats + // maximum number of table properties loaded from files. + const int kMaxInitCount = 20; int init_count = 0; - int total_count = 0; - for (int level = 0; level < num_levels_; level++) { + // here only the first kMaxInitCount files which haven't been + // initialized from file will be updated with num_deletions. + // The motivation here is to cap the maximum I/O per Version creation. + // The reason for choosing files from lower-level instead of higher-level + // is that such design is able to propagate the initialization from + // lower-level to higher-level: When the num_deletions of lower-level + // files are updated, it will make the lower-level files have accurate + // compensated_file_size, making lower-level to higher-level compaction + // will be triggered, which creates higher-level files whose num_deletions + // will be updated here. + for (int level = 0; + level < num_levels_ && init_count < kMaxInitCount; ++level) { for (auto* file_meta : files_[level]) { if (MaybeInitializeFileMetaData(file_meta)) { // each FileMeta will be initialized only once. - total_file_size_ += file_meta->fd.GetFileSize(); - total_raw_key_size_ += file_meta->raw_key_size; - total_raw_value_size_ += file_meta->raw_value_size; - num_non_deletions_ += - file_meta->num_entries - file_meta->num_deletions; - num_deletions_ += file_meta->num_deletions; - init_count++; - } - total_count++; + UpdateAccumulatedStats(file_meta); + if (++init_count >= kMaxInitCount) { + break; + } + } + } + } + // In case all sampled-files contain only deletion entries, then we + // load the table-property of a file in higher-level to initialize + // that value. + for (int level = num_levels_ - 1; + accumulated_raw_value_size_ == 0 && level >= 0; --level) { + for (int i = static_cast(files_[level].size()) - 1; + accumulated_raw_value_size_ == 0 && i >= 0; --i) { + if (MaybeInitializeFileMetaData(files_[level][i])) { + UpdateAccumulatedStats(files_[level][i]); + } } } diff --git a/db/version_set.h b/db/version_set.h index 05e6e9a65..93e9e0c9d 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -212,13 +212,15 @@ class Version { uint64_t GetVersionNumber() const { return version_number_; } uint64_t GetAverageValueSize() const { - if (num_non_deletions_ == 0) { + if (accumulated_num_non_deletions_ == 0) { return 0; } - assert(total_raw_key_size_ + total_raw_value_size_ > 0); - assert(total_file_size_ > 0); - return total_raw_value_size_ / num_non_deletions_ * total_file_size_ / - (total_raw_key_size_ + total_raw_value_size_); + assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0); + assert(accumulated_file_size_ > 0); + return accumulated_raw_value_size_ / + accumulated_num_non_deletions_ * + accumulated_file_size_ / + (accumulated_raw_key_size_ + accumulated_raw_value_size_); } // REQUIRES: lock is held @@ -268,14 +270,17 @@ class Version { // Update num_non_empty_levels_. void UpdateNumNonEmptyLevels(); - // The helper function of UpdateTemporaryStats, which may fill the missing + // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_mata from its associated TableProperties. // Returns true if it does initialize FileMetaData. bool MaybeInitializeFileMetaData(FileMetaData* file_meta); - // Update the temporary stats associated with the current version. - // This temporary stats will be used in compaction. - void UpdateTemporaryStats(); + // Update the accumulated stats from a file-meta. + void UpdateAccumulatedStats(FileMetaData* file_meta); + + // Update the accumulated stats associated with the current version. + // This accumulated stats will be used in compaction. + void UpdateAccumulatedStats(); // Sort all files for this version based on their file size and // record results in files_by_size_. The largest files are listed first. @@ -337,16 +342,19 @@ class Version { Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0); - // total file size - uint64_t total_file_size_; - // the total size of all raw keys. - uint64_t total_raw_key_size_; - // the total size of all raw values. - uint64_t total_raw_value_size_; + // the following are the sampled temporary stats. + // the current accumulated size of sampled files. + uint64_t accumulated_file_size_; + // the current accumulated size of all raw keys based on the sampled files. + uint64_t accumulated_raw_key_size_; + // the current accumulated size of all raw keys based on the sampled files. + uint64_t accumulated_raw_value_size_; // total number of non-deletion entries - uint64_t num_non_deletions_; + uint64_t accumulated_num_non_deletions_; // total number of deletion entries - uint64_t num_deletions_; + uint64_t accumulated_num_deletions_; + // the number of samples + uint64_t num_samples_; ~Version();