From 6c66918645d1a2fc693b79d7a580659407cedac9 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 17 Oct 2014 14:58:30 -0700 Subject: [PATCH] Speed up DB::Open() and Version creation by limiting the number of FileMetaData initialization. Summary: This diff speeds up DB::Open() and Version creation by limiting the number of FileMetaData initialization. The behavior of Version::UpdateAccumulatedStats() is changed as follows: * It only initializes the first 20 uninitialized FileMetaData from file. This guarantees the size of the latest 20 files will always be compensated when they have any deletion entries. Previously it may initialize all FileMetaData by loading all files at DB::Open(). * In case none the first 20 files has any data entry, UpdateAccumulatedStats() will initialize the FileMetaData of the oldest file. Test Plan: db_test Reviewers: igor, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24255 --- db/version_edit.h | 3 +- db/version_set.cc | 100 +++++++++++++++++++++++++++++++++------------- db/version_set.h | 42 +++++++++++-------- 3 files changed, 100 insertions(+), 45 deletions(-) diff --git a/db/version_edit.h b/db/version_edit.h index db133402c..ef883297a 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include #include #include #include @@ -74,7 +75,7 @@ struct FileMetaData { // Stats for compensating deletion entries during compaction // File size compensated by deletion entry. - // This is updated in Version::UpdateTemporaryStats() first time when the + // This is updated in Version::UpdateAccumulatedStats() first time when the // file is created or loaded. After it is updated, it is immutable. uint64_t compensated_file_size; uint64_t num_entries; // the number of entries. diff --git a/db/version_set.cc b/db/version_set.cc index 78241d1f0..0819196fb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -597,7 +597,19 @@ uint64_t Version::GetEstimatedActiveKeys() { // (1) there is merge keys // (2) keys are directly overwritten // (3) deletion on non-existing keys - return num_non_deletions_ - num_deletions_; + // (4) low number of samples + if (num_samples_ == 0) { + return 0; + } + + if (num_samples_ < files_->size()) { + // casting to avoid overflowing + return static_cast(static_cast( + accumulated_num_non_deletions_ - accumulated_num_deletions_) * + files_->size() / num_samples_); + } else { + return accumulated_num_non_deletions_ - accumulated_num_deletions_; + } } void Version::AddIterators(const ReadOptions& read_options, @@ -658,17 +670,21 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, compaction_score_(num_levels_), compaction_level_(num_levels_), version_number_(version_number), - total_file_size_(0), - total_raw_key_size_(0), - total_raw_value_size_(0), - num_non_deletions_(0), - num_deletions_(0) { + accumulated_file_size_(0), + accumulated_raw_key_size_(0), + accumulated_raw_value_size_(0), + accumulated_num_non_deletions_(0), + accumulated_num_deletions_(0), + num_samples_(0) { if (cfd != nullptr && cfd->current() != nullptr) { - total_file_size_ = cfd->current()->total_file_size_; - total_raw_key_size_ = cfd->current()->total_raw_key_size_; - total_raw_value_size_ = cfd->current()->total_raw_value_size_; - num_non_deletions_ = cfd->current()->num_non_deletions_; - num_deletions_ = cfd->current()->num_deletions_; + accumulated_file_size_ = cfd->current()->accumulated_file_size_; + accumulated_raw_key_size_ = cfd->current()->accumulated_raw_key_size_; + accumulated_raw_value_size_ = + cfd->current()->accumulated_raw_value_size_; + accumulated_num_non_deletions_ = + cfd->current()->accumulated_num_non_deletions_; + accumulated_num_deletions_ = cfd->current()->accumulated_num_deletions_; + num_samples_ = cfd->current()->num_samples_; } } @@ -748,7 +764,7 @@ void Version::GenerateFileLevels() { void Version::PrepareApply(const MutableCFOptions& mutable_cf_options, std::vector& size_being_compacted) { - UpdateTemporaryStats(); + UpdateAccumulatedStats(); ComputeCompactionScore(mutable_cf_options, size_being_compacted); UpdateFilesBySize(); UpdateNumNonEmptyLevels(); @@ -757,7 +773,8 @@ void Version::PrepareApply(const MutableCFOptions& mutable_cf_options, } bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { - if (file_meta->init_stats_from_file) { + if (file_meta->init_stats_from_file || + file_meta->compensated_file_size > 0) { return false; } std::shared_ptr tp; @@ -778,26 +795,55 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { return true; } -void Version::UpdateTemporaryStats() { +void Version::UpdateAccumulatedStats(FileMetaData* file_meta) { + assert(file_meta->init_stats_from_file); + accumulated_file_size_ += file_meta->fd.GetFileSize(); + accumulated_raw_key_size_ += file_meta->raw_key_size; + accumulated_raw_value_size_ += file_meta->raw_value_size; + accumulated_num_non_deletions_ += + file_meta->num_entries - file_meta->num_deletions; + accumulated_num_deletions_ += file_meta->num_deletions; + num_samples_++; +} + +void Version::UpdateAccumulatedStats() { static const int kDeletionWeightOnCompaction = 2; - // incrementally update the average value size by - // including newly added files into the global stats + // maximum number of table properties loaded from files. + const int kMaxInitCount = 20; int init_count = 0; - int total_count = 0; - for (int level = 0; level < num_levels_; level++) { + // here only the first kMaxInitCount files which haven't been + // initialized from file will be updated with num_deletions. + // The motivation here is to cap the maximum I/O per Version creation. + // The reason for choosing files from lower-level instead of higher-level + // is that such design is able to propagate the initialization from + // lower-level to higher-level: When the num_deletions of lower-level + // files are updated, it will make the lower-level files have accurate + // compensated_file_size, making lower-level to higher-level compaction + // will be triggered, which creates higher-level files whose num_deletions + // will be updated here. + for (int level = 0; + level < num_levels_ && init_count < kMaxInitCount; ++level) { for (auto* file_meta : files_[level]) { if (MaybeInitializeFileMetaData(file_meta)) { // each FileMeta will be initialized only once. - total_file_size_ += file_meta->fd.GetFileSize(); - total_raw_key_size_ += file_meta->raw_key_size; - total_raw_value_size_ += file_meta->raw_value_size; - num_non_deletions_ += - file_meta->num_entries - file_meta->num_deletions; - num_deletions_ += file_meta->num_deletions; - init_count++; - } - total_count++; + UpdateAccumulatedStats(file_meta); + if (++init_count >= kMaxInitCount) { + break; + } + } + } + } + // In case all sampled-files contain only deletion entries, then we + // load the table-property of a file in higher-level to initialize + // that value. + for (int level = num_levels_ - 1; + accumulated_raw_value_size_ == 0 && level >= 0; --level) { + for (int i = static_cast(files_[level].size()) - 1; + accumulated_raw_value_size_ == 0 && i >= 0; --i) { + if (MaybeInitializeFileMetaData(files_[level][i])) { + UpdateAccumulatedStats(files_[level][i]); + } } } diff --git a/db/version_set.h b/db/version_set.h index 05e6e9a65..93e9e0c9d 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -212,13 +212,15 @@ class Version { uint64_t GetVersionNumber() const { return version_number_; } uint64_t GetAverageValueSize() const { - if (num_non_deletions_ == 0) { + if (accumulated_num_non_deletions_ == 0) { return 0; } - assert(total_raw_key_size_ + total_raw_value_size_ > 0); - assert(total_file_size_ > 0); - return total_raw_value_size_ / num_non_deletions_ * total_file_size_ / - (total_raw_key_size_ + total_raw_value_size_); + assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0); + assert(accumulated_file_size_ > 0); + return accumulated_raw_value_size_ / + accumulated_num_non_deletions_ * + accumulated_file_size_ / + (accumulated_raw_key_size_ + accumulated_raw_value_size_); } // REQUIRES: lock is held @@ -268,14 +270,17 @@ class Version { // Update num_non_empty_levels_. void UpdateNumNonEmptyLevels(); - // The helper function of UpdateTemporaryStats, which may fill the missing + // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_mata from its associated TableProperties. // Returns true if it does initialize FileMetaData. bool MaybeInitializeFileMetaData(FileMetaData* file_meta); - // Update the temporary stats associated with the current version. - // This temporary stats will be used in compaction. - void UpdateTemporaryStats(); + // Update the accumulated stats from a file-meta. + void UpdateAccumulatedStats(FileMetaData* file_meta); + + // Update the accumulated stats associated with the current version. + // This accumulated stats will be used in compaction. + void UpdateAccumulatedStats(); // Sort all files for this version based on their file size and // record results in files_by_size_. The largest files are listed first. @@ -337,16 +342,19 @@ class Version { Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0); - // total file size - uint64_t total_file_size_; - // the total size of all raw keys. - uint64_t total_raw_key_size_; - // the total size of all raw values. - uint64_t total_raw_value_size_; + // the following are the sampled temporary stats. + // the current accumulated size of sampled files. + uint64_t accumulated_file_size_; + // the current accumulated size of all raw keys based on the sampled files. + uint64_t accumulated_raw_key_size_; + // the current accumulated size of all raw keys based on the sampled files. + uint64_t accumulated_raw_value_size_; // total number of non-deletion entries - uint64_t num_non_deletions_; + uint64_t accumulated_num_non_deletions_; // total number of deletion entries - uint64_t num_deletions_; + uint64_t accumulated_num_deletions_; + // the number of samples + uint64_t num_samples_; ~Version();