Speed up DB::Open() and Version creation by limiting the number of FileMetaData initialization.

Summary:
This diff speeds up DB::Open() and Version creation by limiting the number of FileMetaData initialization. The behavior of Version::UpdateAccumulatedStats() is changed as follows:

* It only initializes the first 20 uninitialized FileMetaData from file.  This guarantees the size of the latest 20 files will always be compensated when they have any deletion entries.  Previously it may initialize all FileMetaData by loading all files at DB::Open().
* In case none the first 20 files has any data entry, UpdateAccumulatedStats() will initialize the FileMetaData of the oldest file.

Test Plan: db_test

Reviewers: igor, sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D24255
main
Yueh-Hsuan Chiang 10 years ago
parent 5db9e76644
commit 6c66918645
  1. 3
      db/version_edit.h
  2. 100
      db/version_set.cc
  3. 42
      db/version_set.h

@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once #pragma once
#include <algorithm>
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -74,7 +75,7 @@ struct FileMetaData {
// Stats for compensating deletion entries during compaction // Stats for compensating deletion entries during compaction
// File size compensated by deletion entry. // File size compensated by deletion entry.
// This is updated in Version::UpdateTemporaryStats() first time when the // This is updated in Version::UpdateAccumulatedStats() first time when the
// file is created or loaded. After it is updated, it is immutable. // file is created or loaded. After it is updated, it is immutable.
uint64_t compensated_file_size; uint64_t compensated_file_size;
uint64_t num_entries; // the number of entries. uint64_t num_entries; // the number of entries.

@ -597,7 +597,19 @@ uint64_t Version::GetEstimatedActiveKeys() {
// (1) there is merge keys // (1) there is merge keys
// (2) keys are directly overwritten // (2) keys are directly overwritten
// (3) deletion on non-existing keys // (3) deletion on non-existing keys
return num_non_deletions_ - num_deletions_; // (4) low number of samples
if (num_samples_ == 0) {
return 0;
}
if (num_samples_ < files_->size()) {
// casting to avoid overflowing
return static_cast<uint64_t>(static_cast<double>(
accumulated_num_non_deletions_ - accumulated_num_deletions_) *
files_->size() / num_samples_);
} else {
return accumulated_num_non_deletions_ - accumulated_num_deletions_;
}
} }
void Version::AddIterators(const ReadOptions& read_options, void Version::AddIterators(const ReadOptions& read_options,
@ -658,17 +670,21 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
compaction_score_(num_levels_), compaction_score_(num_levels_),
compaction_level_(num_levels_), compaction_level_(num_levels_),
version_number_(version_number), version_number_(version_number),
total_file_size_(0), accumulated_file_size_(0),
total_raw_key_size_(0), accumulated_raw_key_size_(0),
total_raw_value_size_(0), accumulated_raw_value_size_(0),
num_non_deletions_(0), accumulated_num_non_deletions_(0),
num_deletions_(0) { accumulated_num_deletions_(0),
num_samples_(0) {
if (cfd != nullptr && cfd->current() != nullptr) { if (cfd != nullptr && cfd->current() != nullptr) {
total_file_size_ = cfd->current()->total_file_size_; accumulated_file_size_ = cfd->current()->accumulated_file_size_;
total_raw_key_size_ = cfd->current()->total_raw_key_size_; accumulated_raw_key_size_ = cfd->current()->accumulated_raw_key_size_;
total_raw_value_size_ = cfd->current()->total_raw_value_size_; accumulated_raw_value_size_ =
num_non_deletions_ = cfd->current()->num_non_deletions_; cfd->current()->accumulated_raw_value_size_;
num_deletions_ = cfd->current()->num_deletions_; accumulated_num_non_deletions_ =
cfd->current()->accumulated_num_non_deletions_;
accumulated_num_deletions_ = cfd->current()->accumulated_num_deletions_;
num_samples_ = cfd->current()->num_samples_;
} }
} }
@ -748,7 +764,7 @@ void Version::GenerateFileLevels() {
void Version::PrepareApply(const MutableCFOptions& mutable_cf_options, void Version::PrepareApply(const MutableCFOptions& mutable_cf_options,
std::vector<uint64_t>& size_being_compacted) { std::vector<uint64_t>& size_being_compacted) {
UpdateTemporaryStats(); UpdateAccumulatedStats();
ComputeCompactionScore(mutable_cf_options, size_being_compacted); ComputeCompactionScore(mutable_cf_options, size_being_compacted);
UpdateFilesBySize(); UpdateFilesBySize();
UpdateNumNonEmptyLevels(); UpdateNumNonEmptyLevels();
@ -757,7 +773,8 @@ void Version::PrepareApply(const MutableCFOptions& mutable_cf_options,
} }
bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
if (file_meta->init_stats_from_file) { if (file_meta->init_stats_from_file ||
file_meta->compensated_file_size > 0) {
return false; return false;
} }
std::shared_ptr<const TableProperties> tp; std::shared_ptr<const TableProperties> tp;
@ -778,26 +795,55 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
return true; return true;
} }
void Version::UpdateTemporaryStats() { void Version::UpdateAccumulatedStats(FileMetaData* file_meta) {
assert(file_meta->init_stats_from_file);
accumulated_file_size_ += file_meta->fd.GetFileSize();
accumulated_raw_key_size_ += file_meta->raw_key_size;
accumulated_raw_value_size_ += file_meta->raw_value_size;
accumulated_num_non_deletions_ +=
file_meta->num_entries - file_meta->num_deletions;
accumulated_num_deletions_ += file_meta->num_deletions;
num_samples_++;
}
void Version::UpdateAccumulatedStats() {
static const int kDeletionWeightOnCompaction = 2; static const int kDeletionWeightOnCompaction = 2;
// incrementally update the average value size by // maximum number of table properties loaded from files.
// including newly added files into the global stats const int kMaxInitCount = 20;
int init_count = 0; int init_count = 0;
int total_count = 0; // here only the first kMaxInitCount files which haven't been
for (int level = 0; level < num_levels_; level++) { // initialized from file will be updated with num_deletions.
// The motivation here is to cap the maximum I/O per Version creation.
// The reason for choosing files from lower-level instead of higher-level
// is that such design is able to propagate the initialization from
// lower-level to higher-level: When the num_deletions of lower-level
// files are updated, it will make the lower-level files have accurate
// compensated_file_size, making lower-level to higher-level compaction
// will be triggered, which creates higher-level files whose num_deletions
// will be updated here.
for (int level = 0;
level < num_levels_ && init_count < kMaxInitCount; ++level) {
for (auto* file_meta : files_[level]) { for (auto* file_meta : files_[level]) {
if (MaybeInitializeFileMetaData(file_meta)) { if (MaybeInitializeFileMetaData(file_meta)) {
// each FileMeta will be initialized only once. // each FileMeta will be initialized only once.
total_file_size_ += file_meta->fd.GetFileSize(); UpdateAccumulatedStats(file_meta);
total_raw_key_size_ += file_meta->raw_key_size; if (++init_count >= kMaxInitCount) {
total_raw_value_size_ += file_meta->raw_value_size; break;
num_non_deletions_ += }
file_meta->num_entries - file_meta->num_deletions; }
num_deletions_ += file_meta->num_deletions; }
init_count++; }
} // In case all sampled-files contain only deletion entries, then we
total_count++; // load the table-property of a file in higher-level to initialize
// that value.
for (int level = num_levels_ - 1;
accumulated_raw_value_size_ == 0 && level >= 0; --level) {
for (int i = static_cast<int>(files_[level].size()) - 1;
accumulated_raw_value_size_ == 0 && i >= 0; --i) {
if (MaybeInitializeFileMetaData(files_[level][i])) {
UpdateAccumulatedStats(files_[level][i]);
}
} }
} }

@ -212,13 +212,15 @@ class Version {
uint64_t GetVersionNumber() const { return version_number_; } uint64_t GetVersionNumber() const { return version_number_; }
uint64_t GetAverageValueSize() const { uint64_t GetAverageValueSize() const {
if (num_non_deletions_ == 0) { if (accumulated_num_non_deletions_ == 0) {
return 0; return 0;
} }
assert(total_raw_key_size_ + total_raw_value_size_ > 0); assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0);
assert(total_file_size_ > 0); assert(accumulated_file_size_ > 0);
return total_raw_value_size_ / num_non_deletions_ * total_file_size_ / return accumulated_raw_value_size_ /
(total_raw_key_size_ + total_raw_value_size_); accumulated_num_non_deletions_ *
accumulated_file_size_ /
(accumulated_raw_key_size_ + accumulated_raw_value_size_);
} }
// REQUIRES: lock is held // REQUIRES: lock is held
@ -268,14 +270,17 @@ class Version {
// Update num_non_empty_levels_. // Update num_non_empty_levels_.
void UpdateNumNonEmptyLevels(); void UpdateNumNonEmptyLevels();
// The helper function of UpdateTemporaryStats, which may fill the missing // The helper function of UpdateAccumulatedStats, which may fill the missing
// fields of file_mata from its associated TableProperties. // fields of file_mata from its associated TableProperties.
// Returns true if it does initialize FileMetaData. // Returns true if it does initialize FileMetaData.
bool MaybeInitializeFileMetaData(FileMetaData* file_meta); bool MaybeInitializeFileMetaData(FileMetaData* file_meta);
// Update the temporary stats associated with the current version. // Update the accumulated stats from a file-meta.
// This temporary stats will be used in compaction. void UpdateAccumulatedStats(FileMetaData* file_meta);
void UpdateTemporaryStats();
// Update the accumulated stats associated with the current version.
// This accumulated stats will be used in compaction.
void UpdateAccumulatedStats();
// Sort all files for this version based on their file size and // Sort all files for this version based on their file size and
// record results in files_by_size_. The largest files are listed first. // record results in files_by_size_. The largest files are listed first.
@ -337,16 +342,19 @@ class Version {
Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0); Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0);
// total file size // the following are the sampled temporary stats.
uint64_t total_file_size_; // the current accumulated size of sampled files.
// the total size of all raw keys. uint64_t accumulated_file_size_;
uint64_t total_raw_key_size_; // the current accumulated size of all raw keys based on the sampled files.
// the total size of all raw values. uint64_t accumulated_raw_key_size_;
uint64_t total_raw_value_size_; // the current accumulated size of all raw keys based on the sampled files.
uint64_t accumulated_raw_value_size_;
// total number of non-deletion entries // total number of non-deletion entries
uint64_t num_non_deletions_; uint64_t accumulated_num_non_deletions_;
// total number of deletion entries // total number of deletion entries
uint64_t num_deletions_; uint64_t accumulated_num_deletions_;
// the number of samples
uint64_t num_samples_;
~Version(); ~Version();

Loading…
Cancel
Save