Speed up DB::Open() and Version creation by limiting the number of FileMetaData initialization.

Summary:
This diff speeds up DB::Open() and Version creation by limiting the number of FileMetaData initialization. The behavior of Version::UpdateAccumulatedStats() is changed as follows:

* It only initializes the first 20 uninitialized FileMetaData from file.  This guarantees the size of the latest 20 files will always be compensated when they have any deletion entries.  Previously it may initialize all FileMetaData by loading all files at DB::Open().
* In case none the first 20 files has any data entry, UpdateAccumulatedStats() will initialize the FileMetaData of the oldest file.

Test Plan: db_test

Reviewers: igor, sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D24255
main
Yueh-Hsuan Chiang 10 years ago
parent 5db9e76644
commit 6c66918645
  1. 3
      db/version_edit.h
  2. 100
      db/version_set.cc
  3. 42
      db/version_set.h

@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <algorithm>
#include <set>
#include <utility>
#include <vector>
@ -74,7 +75,7 @@ struct FileMetaData {
// Stats for compensating deletion entries during compaction
// File size compensated by deletion entry.
// This is updated in Version::UpdateTemporaryStats() first time when the
// This is updated in Version::UpdateAccumulatedStats() first time when the
// file is created or loaded. After it is updated, it is immutable.
uint64_t compensated_file_size;
uint64_t num_entries; // the number of entries.

@ -597,7 +597,19 @@ uint64_t Version::GetEstimatedActiveKeys() {
// (1) there is merge keys
// (2) keys are directly overwritten
// (3) deletion on non-existing keys
return num_non_deletions_ - num_deletions_;
// (4) low number of samples
if (num_samples_ == 0) {
return 0;
}
if (num_samples_ < files_->size()) {
// casting to avoid overflowing
return static_cast<uint64_t>(static_cast<double>(
accumulated_num_non_deletions_ - accumulated_num_deletions_) *
files_->size() / num_samples_);
} else {
return accumulated_num_non_deletions_ - accumulated_num_deletions_;
}
}
void Version::AddIterators(const ReadOptions& read_options,
@ -658,17 +670,21 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
compaction_score_(num_levels_),
compaction_level_(num_levels_),
version_number_(version_number),
total_file_size_(0),
total_raw_key_size_(0),
total_raw_value_size_(0),
num_non_deletions_(0),
num_deletions_(0) {
accumulated_file_size_(0),
accumulated_raw_key_size_(0),
accumulated_raw_value_size_(0),
accumulated_num_non_deletions_(0),
accumulated_num_deletions_(0),
num_samples_(0) {
if (cfd != nullptr && cfd->current() != nullptr) {
total_file_size_ = cfd->current()->total_file_size_;
total_raw_key_size_ = cfd->current()->total_raw_key_size_;
total_raw_value_size_ = cfd->current()->total_raw_value_size_;
num_non_deletions_ = cfd->current()->num_non_deletions_;
num_deletions_ = cfd->current()->num_deletions_;
accumulated_file_size_ = cfd->current()->accumulated_file_size_;
accumulated_raw_key_size_ = cfd->current()->accumulated_raw_key_size_;
accumulated_raw_value_size_ =
cfd->current()->accumulated_raw_value_size_;
accumulated_num_non_deletions_ =
cfd->current()->accumulated_num_non_deletions_;
accumulated_num_deletions_ = cfd->current()->accumulated_num_deletions_;
num_samples_ = cfd->current()->num_samples_;
}
}
@ -748,7 +764,7 @@ void Version::GenerateFileLevels() {
void Version::PrepareApply(const MutableCFOptions& mutable_cf_options,
std::vector<uint64_t>& size_being_compacted) {
UpdateTemporaryStats();
UpdateAccumulatedStats();
ComputeCompactionScore(mutable_cf_options, size_being_compacted);
UpdateFilesBySize();
UpdateNumNonEmptyLevels();
@ -757,7 +773,8 @@ void Version::PrepareApply(const MutableCFOptions& mutable_cf_options,
}
bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
if (file_meta->init_stats_from_file) {
if (file_meta->init_stats_from_file ||
file_meta->compensated_file_size > 0) {
return false;
}
std::shared_ptr<const TableProperties> tp;
@ -778,26 +795,55 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
return true;
}
void Version::UpdateTemporaryStats() {
void Version::UpdateAccumulatedStats(FileMetaData* file_meta) {
assert(file_meta->init_stats_from_file);
accumulated_file_size_ += file_meta->fd.GetFileSize();
accumulated_raw_key_size_ += file_meta->raw_key_size;
accumulated_raw_value_size_ += file_meta->raw_value_size;
accumulated_num_non_deletions_ +=
file_meta->num_entries - file_meta->num_deletions;
accumulated_num_deletions_ += file_meta->num_deletions;
num_samples_++;
}
void Version::UpdateAccumulatedStats() {
static const int kDeletionWeightOnCompaction = 2;
// incrementally update the average value size by
// including newly added files into the global stats
// maximum number of table properties loaded from files.
const int kMaxInitCount = 20;
int init_count = 0;
int total_count = 0;
for (int level = 0; level < num_levels_; level++) {
// here only the first kMaxInitCount files which haven't been
// initialized from file will be updated with num_deletions.
// The motivation here is to cap the maximum I/O per Version creation.
// The reason for choosing files from lower-level instead of higher-level
// is that such design is able to propagate the initialization from
// lower-level to higher-level: When the num_deletions of lower-level
// files are updated, it will make the lower-level files have accurate
// compensated_file_size, making lower-level to higher-level compaction
// will be triggered, which creates higher-level files whose num_deletions
// will be updated here.
for (int level = 0;
level < num_levels_ && init_count < kMaxInitCount; ++level) {
for (auto* file_meta : files_[level]) {
if (MaybeInitializeFileMetaData(file_meta)) {
// each FileMeta will be initialized only once.
total_file_size_ += file_meta->fd.GetFileSize();
total_raw_key_size_ += file_meta->raw_key_size;
total_raw_value_size_ += file_meta->raw_value_size;
num_non_deletions_ +=
file_meta->num_entries - file_meta->num_deletions;
num_deletions_ += file_meta->num_deletions;
init_count++;
}
total_count++;
UpdateAccumulatedStats(file_meta);
if (++init_count >= kMaxInitCount) {
break;
}
}
}
}
// In case all sampled-files contain only deletion entries, then we
// load the table-property of a file in higher-level to initialize
// that value.
for (int level = num_levels_ - 1;
accumulated_raw_value_size_ == 0 && level >= 0; --level) {
for (int i = static_cast<int>(files_[level].size()) - 1;
accumulated_raw_value_size_ == 0 && i >= 0; --i) {
if (MaybeInitializeFileMetaData(files_[level][i])) {
UpdateAccumulatedStats(files_[level][i]);
}
}
}

@ -212,13 +212,15 @@ class Version {
uint64_t GetVersionNumber() const { return version_number_; }
uint64_t GetAverageValueSize() const {
if (num_non_deletions_ == 0) {
if (accumulated_num_non_deletions_ == 0) {
return 0;
}
assert(total_raw_key_size_ + total_raw_value_size_ > 0);
assert(total_file_size_ > 0);
return total_raw_value_size_ / num_non_deletions_ * total_file_size_ /
(total_raw_key_size_ + total_raw_value_size_);
assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0);
assert(accumulated_file_size_ > 0);
return accumulated_raw_value_size_ /
accumulated_num_non_deletions_ *
accumulated_file_size_ /
(accumulated_raw_key_size_ + accumulated_raw_value_size_);
}
// REQUIRES: lock is held
@ -268,14 +270,17 @@ class Version {
// Update num_non_empty_levels_.
void UpdateNumNonEmptyLevels();
// The helper function of UpdateTemporaryStats, which may fill the missing
// The helper function of UpdateAccumulatedStats, which may fill the missing
// fields of file_mata from its associated TableProperties.
// Returns true if it does initialize FileMetaData.
bool MaybeInitializeFileMetaData(FileMetaData* file_meta);
// Update the temporary stats associated with the current version.
// This temporary stats will be used in compaction.
void UpdateTemporaryStats();
// Update the accumulated stats from a file-meta.
void UpdateAccumulatedStats(FileMetaData* file_meta);
// Update the accumulated stats associated with the current version.
// This accumulated stats will be used in compaction.
void UpdateAccumulatedStats();
// Sort all files for this version based on their file size and
// record results in files_by_size_. The largest files are listed first.
@ -337,16 +342,19 @@ class Version {
Version(ColumnFamilyData* cfd, VersionSet* vset, uint64_t version_number = 0);
// total file size
uint64_t total_file_size_;
// the total size of all raw keys.
uint64_t total_raw_key_size_;
// the total size of all raw values.
uint64_t total_raw_value_size_;
// the following are the sampled temporary stats.
// the current accumulated size of sampled files.
uint64_t accumulated_file_size_;
// the current accumulated size of all raw keys based on the sampled files.
uint64_t accumulated_raw_key_size_;
// the current accumulated size of all raw keys based on the sampled files.
uint64_t accumulated_raw_value_size_;
// total number of non-deletion entries
uint64_t num_non_deletions_;
uint64_t accumulated_num_non_deletions_;
// total number of deletion entries
uint64_t num_deletions_;
uint64_t accumulated_num_deletions_;
// the number of samples
uint64_t num_samples_;
~Version();

Loading…
Cancel
Save