diff --git a/db/version_set.cc b/db/version_set.cc index 05e7c7053..64ebb1427 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -590,6 +590,159 @@ bool Version::UpdateStats(const GetStats& stats) { return false; } +void Version::Finalize(std::vector& size_being_compacted) { + // Pre-sort level0 for Get() + if (vset_->options_->compaction_style == kCompactionStyleUniversal) { + std::sort(files_[0].begin(), files_[0].end(), NewestFirstBySeqNo); + } else { + std::sort(files_[0].begin(), files_[0].end(), NewestFirst); + } + + double max_score = 0; + int max_score_level = 0; + + int num_levels_to_check = + (vset_->options_->compaction_style != kCompactionStyleUniversal) + ? NumberLevels() - 1 + : 1; + + for (int level = 0; level < num_levels_to_check; level++) { + double score; + if (level == 0) { + // We treat level-0 specially by bounding the number of files + // instead of number of bytes for two reasons: + // + // (1) With larger write-buffer sizes, it is nice not to do too + // many level-0 compactions. + // + // (2) The files in level-0 are merged on every read and + // therefore we wish to avoid too many files when the individual + // file size is small (perhaps because of a small write-buffer + // setting, or very high compression ratios, or lots of + // overwrites/deletions). + int numfiles = 0; + for (unsigned int i = 0; i < files_[level].size(); i++) { + if (!files_[level][i]->being_compacted) { + numfiles++; + } + } + + // If we are slowing down writes, then we better compact that first + if (numfiles >= vset_->options_->level0_stop_writes_trigger) { + score = 1000000; + // Log(options_->info_log, "XXX score l0 = 1000000000 max"); + } else if (numfiles >= vset_->options_->level0_slowdown_writes_trigger) { + score = 10000; + // Log(options_->info_log, "XXX score l0 = 1000000 medium"); + } else { + score = static_cast(numfiles) / + vset_->options_->level0_file_num_compaction_trigger; + if (score >= 1) { + // Log(options_->info_log, "XXX score l0 = %d least", (int)score); + } + } + } else { + // Compute the ratio of current size to size limit. + const uint64_t level_bytes = + TotalFileSize(files_[level]) - size_being_compacted[level]; + score = static_cast(level_bytes) / vset_->MaxBytesForLevel(level); + if (score > 1) { + // Log(options_->info_log, "XXX score l%d = %d ", level, (int)score); + } + if (max_score < score) { + max_score = score; + max_score_level = level; + } + } + compaction_level_[level] = level; + compaction_score_[level] = score; + } + + // update the max compaction score in levels 1 to n-1 + max_compaction_score_ = max_score; + max_compaction_score_level_ = max_score_level; + + // sort all the levels based on their score. Higher scores get listed + // first. Use bubble sort because the number of entries are small. + for (int i = 0; i < NumberLevels() - 2; i++) { + for (int j = i + 1; j < NumberLevels() - 1; j++) { + if (compaction_score_[i] < compaction_score_[j]) { + double score = compaction_score_[i]; + int level = compaction_level_[i]; + compaction_score_[i] = compaction_score_[j]; + compaction_level_[i] = compaction_level_[j]; + compaction_score_[j] = score; + compaction_level_[j] = level; + } + } + } +} + +namespace { + +// Compator that is used to sort files based on their size +// In normal mode: descending size +bool CompareSizeDescending(const Version::Fsize& first, + const Version::Fsize& second) { + return (first.file->file_size > second.file->file_size); +} +// A static compator used to sort files based on their seqno +// In universal style : descending seqno +bool CompareSeqnoDescending(const Version::Fsize& first, + const Version::Fsize& second) { + if (first.file->smallest_seqno > second.file->smallest_seqno) { + assert(first.file->largest_seqno > second.file->largest_seqno); + return true; + } + assert(first.file->largest_seqno <= second.file->largest_seqno); + return false; +} + +} // anonymous namespace + +void Version::UpdateFilesBySize() { + // No need to sort the highest level because it is never compacted. + int max_level = + (vset_->options_->compaction_style == kCompactionStyleUniversal) + ? NumberLevels() + : NumberLevels() - 1; + + for (int level = 0; level < max_level; level++) { + const std::vector& files = files_[level]; + std::vector& files_by_size = files_by_size_[level]; + assert(files_by_size.size() == 0); + + // populate a temp vector for sorting based on size + std::vector temp(files.size()); + for (unsigned int i = 0; i < files.size(); i++) { + temp[i].index = i; + temp[i].file = files[i]; + } + + // sort the top number_of_files_to_sort_ based on file size + if (vset_->options_->compaction_style == kCompactionStyleUniversal) { + int num = temp.size(); + std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), + CompareSeqnoDescending); + } else { + int num = Version::number_of_files_to_sort_; + if (num > (int)temp.size()) { + num = temp.size(); + } + std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), + CompareSizeDescending); + } + assert(temp.size() == files.size()); + + // initialize files_by_size_ + for (unsigned int i = 0; i < temp.size(); i++) { + files_by_size.push_back(temp[i].index); + } + next_file_to_compact_by_size_[level] = 0; + assert(files_[level].size() == files_by_size_[level].size()); + } +} + void Version::Ref() { ++refs_; } @@ -1344,8 +1497,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, // The calls to Finalize and UpdateFilesBySize are cpu-heavy // and is best called outside the mutex. - Finalize(v, size_being_compacted); - UpdateFilesBySize(v); + v->Finalize(size_being_compacted); + v->UpdateFilesBySize(); // Write new record to MANIFEST log if (s.ok()) { @@ -1580,7 +1733,7 @@ Status VersionSet::Recover() { // Install recovered version std::vector size_being_compacted(v->NumberLevels() - 1); SizeBeingCompacted(size_being_compacted); - Finalize(v, size_being_compacted); + v->Finalize(size_being_compacted); manifest_file_size_ = manifest_file_size; AppendVersion(v); @@ -1712,7 +1865,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, // Install recovered version std::vector size_being_compacted(v->NumberLevels() - 1); SizeBeingCompacted(size_being_compacted); - Finalize(v, size_being_compacted); + v->Finalize(size_being_compacted); AppendVersion(v); manifest_file_number_ = next_file; @@ -1740,158 +1893,6 @@ void VersionSet::MarkFileNumberUsed(uint64_t number) { } } -void VersionSet::Finalize(Version* v, - std::vector& size_being_compacted) { - // Pre-sort level0 for Get() - if (options_->compaction_style == kCompactionStyleUniversal) { - std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirstBySeqNo); - } else { - std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirst); - } - - double max_score = 0; - int max_score_level = 0; - - int num_levels_to_check = - (options_->compaction_style != kCompactionStyleUniversal) ? - v->NumberLevels() - 1 : 1; - - for (int level = 0; level < num_levels_to_check; level++) { - - double score; - if (level == 0) { - // We treat level-0 specially by bounding the number of files - // instead of number of bytes for two reasons: - // - // (1) With larger write-buffer sizes, it is nice not to do too - // many level-0 compactions. - // - // (2) The files in level-0 are merged on every read and - // therefore we wish to avoid too many files when the individual - // file size is small (perhaps because of a small write-buffer - // setting, or very high compression ratios, or lots of - // overwrites/deletions). - int numfiles = 0; - for (unsigned int i = 0; i < v->files_[level].size(); i++) { - if (!v->files_[level][i]->being_compacted) { - numfiles++; - } - } - - // If we are slowing down writes, then we better compact that first - if (numfiles >= options_->level0_stop_writes_trigger) { - score = 1000000; - // Log(options_->info_log, "XXX score l0 = 1000000000 max"); - } else if (numfiles >= options_->level0_slowdown_writes_trigger) { - score = 10000; - // Log(options_->info_log, "XXX score l0 = 1000000 medium"); - } else { - score = numfiles / - static_cast(options_->level0_file_num_compaction_trigger); - if (score >= 1) { - // Log(options_->info_log, "XXX score l0 = %d least", (int)score); - } - } - } else { - // Compute the ratio of current size to size limit. - const uint64_t level_bytes = TotalFileSize(v->files_[level]) - - size_being_compacted[level]; - score = static_cast(level_bytes) / MaxBytesForLevel(level); - if (score > 1) { - // Log(options_->info_log, "XXX score l%d = %d ", level, (int)score); - } - if (max_score < score) { - max_score = score; - max_score_level = level; - } - } - v->compaction_level_[level] = level; - v->compaction_score_[level] = score; - } - - // update the max compaction score in levels 1 to n-1 - v->max_compaction_score_ = max_score; - v->max_compaction_score_level_ = max_score_level; - - // sort all the levels based on their score. Higher scores get listed - // first. Use bubble sort because the number of entries are small. - for (int i = 0; i < v->NumberLevels() - 2; i++) { - for (int j = i + 1; j < v->NumberLevels() - 1; j++) { - if (v->compaction_score_[i] < v->compaction_score_[j]) { - double score = v->compaction_score_[i]; - int level = v->compaction_level_[i]; - v->compaction_score_[i] = v->compaction_score_[j]; - v->compaction_level_[i] = v->compaction_level_[j]; - v->compaction_score_[j] = score; - v->compaction_level_[j] = level; - } - } - } -} - -// A static compator used to sort files based on their size -// In normal mode: descending size -static bool compareSizeDescending(const VersionSet::Fsize& first, - const VersionSet::Fsize& second) { - return (first.file->file_size > second.file->file_size); -} -// A static compator used to sort files based on their seqno -// In universal style : descending seqno -static bool compareSeqnoDescending(const VersionSet::Fsize& first, - const VersionSet::Fsize& second) { - if (first.file->smallest_seqno > second.file->smallest_seqno) { - assert(first.file->largest_seqno > second.file->largest_seqno); - return true; - } - assert(first.file->largest_seqno <= second.file->largest_seqno); - return false; -} - -// sort all files in level1 to level(n-1) based on file size -void VersionSet::UpdateFilesBySize(Version* v) { - - // No need to sort the highest level because it is never compacted. - int max_level = (options_->compaction_style == kCompactionStyleUniversal) - ? v->NumberLevels() - : v->NumberLevels() - 1; - - for (int level = 0; level < max_level; level++) { - - const std::vector& files = v->files_[level]; - std::vector& files_by_size = v->files_by_size_[level]; - assert(files_by_size.size() == 0); - - // populate a temp vector for sorting based on size - std::vector temp(files.size()); - for (unsigned int i = 0; i < files.size(); i++) { - temp[i].index = i; - temp[i].file = files[i]; - } - - // sort the top number_of_files_to_sort_ based on file size - if (options_->compaction_style == kCompactionStyleUniversal) { - int num = temp.size(); - std::partial_sort(temp.begin(), temp.begin() + num, - temp.end(), compareSeqnoDescending); - } else { - int num = Version::number_of_files_to_sort_; - if (num > (int)temp.size()) { - num = temp.size(); - } - std::partial_sort(temp.begin(), temp.begin() + num, - temp.end(), compareSizeDescending); - } - assert(temp.size() == files.size()); - - // initialize files_by_size_ - for (unsigned int i = 0; i < temp.size(); i++) { - files_by_size.push_back(temp[i].index); - } - v->next_file_to_compact_by_size_[level] = 0; - assert(v->files_[level].size() == v->files_by_size_[level].size()); - } -} - Status VersionSet::WriteSnapshot(log::Writer* log) { // TODO: Break up into multiple records to reduce memory usage on recovery? @@ -2586,7 +2587,7 @@ Compaction* VersionSet::PickCompaction() { // and also in LogAndApply(), otherwise the values could be stale. std::vector size_being_compacted(NumberLevels()-1); current_->vset_->SizeBeingCompacted(size_being_compacted); - Finalize(current_, size_being_compacted); + current_->Finalize(size_being_compacted); // In universal style of compaction, compact L0 files back into L0. if (options_->compaction_style == kCompactionStyleUniversal) { diff --git a/db/version_set.h b/db/version_set.h index 319067d1a..8651a6eb3 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -87,6 +87,11 @@ class Version { // REQUIRES: lock is held bool UpdateStats(const GetStats& stats); + // Updates internal structures that keep track of compaction scores + // We use compaction scores to figure out which compaction to do next + // Also pre-sorts level0 files for Get() + void Finalize(std::vector& size_being_compacted); + // Reference count management (so Versions do not disappear out from // under live iterators) void Ref(); @@ -170,6 +175,12 @@ class Version { // Returns the version nuber of this version uint64_t GetVersionNumber() const { return version_number_; } + // used to sort files by size + struct Fsize { + int index; + FileMetaData* file; + }; + private: friend class Compaction; friend class VersionSet; @@ -182,6 +193,10 @@ class Version { bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions, const Slice& internal_prefix, Iterator* level_iter) const; + // Sort all files for this version based on their file size and + // record results in files_by_size_. The largest files are listed first. + void UpdateFilesBySize(); + VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list @@ -417,16 +432,6 @@ class VersionSet { // pick the same files to compact. bool VerifyCompactionFileConsistency(Compaction* c); - // used to sort files by size - typedef struct fsize { - int index; - FileMetaData* file; - } Fsize; - - // Sort all files for this version based on their file size and - // record results in files_by_size_. The largest files are listed first. - void UpdateFilesBySize(Version *v); - // Get the max file size in a given level. uint64_t MaxFileSizeForLevel(int level); @@ -449,8 +454,6 @@ class VersionSet { void Init(int num_levels); - void Finalize(Version* v, std::vector&); - void GetRange(const std::vector& inputs, InternalKey* smallest, InternalKey* largest);