From ab8f6c01a6c48fd7b8c752a3ef0ef8640065dd48 Mon Sep 17 00:00:00 2001 From: Zhongyi Xie Date: Thu, 30 May 2019 21:30:41 -0700 Subject: [PATCH] move LevelCompactionPicker to a separate file (#5369) Summary: In order to improve code readability, this PR moves LevelCompactionBuilder and LevelCompactionPicker to compaction_picker_level.h and .cc Pull Request resolved: https://github.com/facebook/rocksdb/pull/5369 Differential Revision: D15540172 Pulled By: miasantreble fbshipit-source-id: c1a578b93f127cd63661b53f32b356e6edd349af --- CMakeLists.txt | 3 +- TARGETS | 1 + db/column_family.cc | 1 + db/compaction_picker.cc | 534 -------------------------------- db/compaction_picker.h | 17 -- db/compaction_picker_level.cc | 558 ++++++++++++++++++++++++++++++++++ db/compaction_picker_level.h | 32 ++ db/compaction_picker_test.cc | 1 + src.mk | 1 + 9 files changed, 596 insertions(+), 552 deletions(-) create mode 100644 db/compaction_picker_level.cc create mode 100644 db/compaction_picker_level.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c4dc2500f..3ddea95de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -485,6 +485,7 @@ set(SOURCES db/compaction_job.cc db/compaction_picker.cc db/compaction_picker_fifo.cc + db/compaction_picker_level.cc db/compaction_picker_universal.cc db/convenience.cc db/db_filesnapshot.cc @@ -945,7 +946,7 @@ if(WITH_TESTS) table/block_based/block_test.cc table/block_based/data_block_hash_index_test.cc table/block_based/full_filter_block_test.cc - table/block_based/partitioned_filter_block_test.cc + table/block_based/partitioned_filter_block_test.cc table/cleanable_test.cc table/cuckoo/cuckoo_table_builder_test.cc table/cuckoo/cuckoo_table_reader_test.cc diff --git a/TARGETS b/TARGETS index a59af2fa6..dc39f87bc 100644 --- a/TARGETS +++ b/TARGETS @@ -88,6 +88,7 @@ cpp_library( "db/compaction_job.cc", "db/compaction_picker.cc", "db/compaction_picker_fifo.cc", + "db/compaction_picker_level.cc", "db/compaction_picker_universal.cc", "db/convenience.cc", "db/db_filesnapshot.cc", diff --git a/db/column_family.cc b/db/column_family.cc index 84f521cd7..fde1996ae 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -21,6 +21,7 @@ #include "db/compaction_picker.h" #include "db/compaction_picker_fifo.h" +#include "db/compaction_picker_level.h" #include "db/compaction_picker_universal.h" #include "db/db_impl.h" #include "db/internal_stats.h" diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index b25f6cb08..bfe13828b 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -1111,538 +1111,4 @@ bool CompactionPicker::GetOverlappingL0Files( return true; } -bool LevelCompactionPicker::NeedsCompaction( - const VersionStorageInfo* vstorage) const { - if (!vstorage->ExpiredTtlFiles().empty()) { - return true; - } - if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) { - return true; - } - if (!vstorage->BottommostFilesMarkedForCompaction().empty()) { - return true; - } - if (!vstorage->FilesMarkedForCompaction().empty()) { - return true; - } - for (int i = 0; i <= vstorage->MaxInputLevel(); i++) { - if (vstorage->CompactionScore(i) >= 1) { - return true; - } - } - return false; -} - -namespace { -// A class to build a leveled compaction step-by-step. -class LevelCompactionBuilder { - public: - LevelCompactionBuilder(const std::string& cf_name, - VersionStorageInfo* vstorage, - CompactionPicker* compaction_picker, - LogBuffer* log_buffer, - const MutableCFOptions& mutable_cf_options, - const ImmutableCFOptions& ioptions) - : cf_name_(cf_name), - vstorage_(vstorage), - compaction_picker_(compaction_picker), - log_buffer_(log_buffer), - mutable_cf_options_(mutable_cf_options), - ioptions_(ioptions) {} - - // Pick and return a compaction. - Compaction* PickCompaction(); - - // Pick the initial files to compact to the next level. (or together - // in Intra-L0 compactions) - void SetupInitialFiles(); - - // If the initial files are from L0 level, pick other L0 - // files if needed. - bool SetupOtherL0FilesIfNeeded(); - - // Based on initial files, setup other files need to be compacted - // in this compaction, accordingly. - bool SetupOtherInputsIfNeeded(); - - Compaction* GetCompaction(); - - // For the specfied level, pick a file that we want to compact. - // Returns false if there is no file to compact. - // If it returns true, inputs->files.size() will be exactly one. - // If level is 0 and there is already a compaction on that level, this - // function will return false. - bool PickFileToCompact(); - - // For L0->L0, picks the longest span of files that aren't currently - // undergoing compaction for which work-per-deleted-file decreases. The span - // always starts from the newest L0 file. - // - // Intra-L0 compaction is independent of all other files, so it can be - // performed even when L0->base_level compactions are blocked. - // - // Returns true if `inputs` is populated with a span of files to be compacted; - // otherwise, returns false. - bool PickIntraL0Compaction(); - - void PickExpiredTtlFiles(); - - void PickFilesMarkedForPeriodicCompaction(); - - const std::string& cf_name_; - VersionStorageInfo* vstorage_; - CompactionPicker* compaction_picker_; - LogBuffer* log_buffer_; - int start_level_ = -1; - int output_level_ = -1; - int parent_index_ = -1; - int base_index_ = -1; - double start_level_score_ = 0; - bool is_manual_ = false; - CompactionInputFiles start_level_inputs_; - std::vector compaction_inputs_; - CompactionInputFiles output_level_inputs_; - std::vector grandparents_; - CompactionReason compaction_reason_ = CompactionReason::kUnknown; - - const MutableCFOptions& mutable_cf_options_; - const ImmutableCFOptions& ioptions_; - // Pick a path ID to place a newly generated file, with its level - static uint32_t GetPathId(const ImmutableCFOptions& ioptions, - const MutableCFOptions& mutable_cf_options, - int level); - - static const int kMinFilesForIntraL0Compaction = 4; -}; - -void LevelCompactionBuilder::PickExpiredTtlFiles() { - if (vstorage_->ExpiredTtlFiles().empty()) { - return; - } - - auto continuation = [&](std::pair level_file) { - // If it's being compacted it has nothing to do here. - // If this assert() fails that means that some function marked some - // files as being_compacted, but didn't call ComputeCompactionScore() - assert(!level_file.second->being_compacted); - start_level_ = level_file.first; - output_level_ = - (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; - - if ((start_level_ == vstorage_->num_non_empty_levels() - 1) || - (start_level_ == 0 && - !compaction_picker_->level0_compactions_in_progress()->empty())) { - return false; - } - - start_level_inputs_.files = {level_file.second}; - start_level_inputs_.level = start_level_; - return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_); - }; - - for (auto& level_file : vstorage_->ExpiredTtlFiles()) { - if (continuation(level_file)) { - // found the compaction! - return; - } - } - - start_level_inputs_.files.clear(); -} - -void LevelCompactionBuilder::PickFilesMarkedForPeriodicCompaction() { - if (vstorage_->FilesMarkedForPeriodicCompaction().empty()) { - return; - } - - auto continuation = [&](std::pair level_file) { - // If it's being compacted it has nothing to do here. - // If this assert() fails that means that some function marked some - // files as being_compacted, but didn't call ComputeCompactionScore() - assert(!level_file.second->being_compacted); - output_level_ = start_level_ = level_file.first; - - if (start_level_ == 0 && - !compaction_picker_->level0_compactions_in_progress()->empty()) { - return false; - } - - start_level_inputs_.files = {level_file.second}; - start_level_inputs_.level = start_level_; - return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_); - }; - - for (auto& level_file : vstorage_->FilesMarkedForPeriodicCompaction()) { - if (continuation(level_file)) { - // found the compaction! - return; - } - } - - start_level_inputs_.files.clear(); -} - -void LevelCompactionBuilder::SetupInitialFiles() { - // Find the compactions by size on all levels. - bool skipped_l0_to_base = false; - for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) { - start_level_score_ = vstorage_->CompactionScore(i); - start_level_ = vstorage_->CompactionScoreLevel(i); - assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1)); - if (start_level_score_ >= 1) { - if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) { - // If L0->base_level compaction is pending, don't schedule further - // compaction from base level. Otherwise L0->base_level compaction - // may starve. - continue; - } - output_level_ = - (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; - if (PickFileToCompact()) { - // found the compaction! - if (start_level_ == 0) { - // L0 score = `num L0 files` / `level0_file_num_compaction_trigger` - compaction_reason_ = CompactionReason::kLevelL0FilesNum; - } else { - // L1+ score = `Level files size` / `MaxBytesForLevel` - compaction_reason_ = CompactionReason::kLevelMaxLevelSize; - } - break; - } else { - // didn't find the compaction, clear the inputs - start_level_inputs_.clear(); - if (start_level_ == 0) { - skipped_l0_to_base = true; - // L0->base_level may be blocked due to ongoing L0->base_level - // compactions. It may also be blocked by an ongoing compaction from - // base_level downwards. - // - // In these cases, to reduce L0 file count and thus reduce likelihood - // of write stalls, we can attempt compacting a span of files within - // L0. - if (PickIntraL0Compaction()) { - output_level_ = 0; - compaction_reason_ = CompactionReason::kLevelL0FilesNum; - break; - } - } - } - } - } - - // if we didn't find a compaction, check if there are any files marked for - // compaction - if (start_level_inputs_.empty()) { - parent_index_ = base_index_ = -1; - - compaction_picker_->PickFilesMarkedForCompaction( - cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_); - if (!start_level_inputs_.empty()) { - is_manual_ = true; - compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; - return; - } - } - - // Bottommost Files Compaction on deleting tombstones - if (start_level_inputs_.empty()) { - size_t i; - for (i = 0; i < vstorage_->BottommostFilesMarkedForCompaction().size(); - ++i) { - auto& level_and_file = vstorage_->BottommostFilesMarkedForCompaction()[i]; - assert(!level_and_file.second->being_compacted); - start_level_inputs_.level = output_level_ = start_level_ = - level_and_file.first; - start_level_inputs_.files = {level_and_file.second}; - if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_)) { - break; - } - } - if (i == vstorage_->BottommostFilesMarkedForCompaction().size()) { - start_level_inputs_.clear(); - } else { - assert(!start_level_inputs_.empty()); - compaction_reason_ = CompactionReason::kBottommostFiles; - return; - } - } - - // TTL Compaction - if (start_level_inputs_.empty()) { - PickExpiredTtlFiles(); - if (!start_level_inputs_.empty()) { - compaction_reason_ = CompactionReason::kTtl; - return; - } - } - - // Periodic Compaction - if (start_level_inputs_.empty()) { - PickFilesMarkedForPeriodicCompaction(); - if (!start_level_inputs_.empty()) { - compaction_reason_ = CompactionReason::kPeriodicCompaction; - return; - } - } -} - -bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() { - if (start_level_ == 0 && output_level_ != 0) { - return compaction_picker_->GetOverlappingL0Files( - vstorage_, &start_level_inputs_, output_level_, &parent_index_); - } - return true; -} - -bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() { - // Setup input files from output level. For output to L0, we only compact - // spans of files that do not interact with any pending compactions, so don't - // need to consider other levels. - if (output_level_ != 0) { - output_level_inputs_.level = output_level_; - if (!compaction_picker_->SetupOtherInputs( - cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_, - &output_level_inputs_, &parent_index_, base_index_)) { - return false; - } - - compaction_inputs_.push_back(start_level_inputs_); - if (!output_level_inputs_.empty()) { - compaction_inputs_.push_back(output_level_inputs_); - } - - // In some edge cases we could pick a compaction that will be compacting - // a key range that overlap with another running compaction, and both - // of them have the same output level. This could happen if - // (1) we are running a non-exclusive manual compaction - // (2) AddFile ingest a new file into the LSM tree - // We need to disallow this from happening. - if (compaction_picker_->FilesRangeOverlapWithCompaction(compaction_inputs_, - output_level_)) { - // This compaction output could potentially conflict with the output - // of a currently running compaction, we cannot run it. - return false; - } - compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_, - output_level_inputs_, &grandparents_); - } else { - compaction_inputs_.push_back(start_level_inputs_); - } - return true; -} - -Compaction* LevelCompactionBuilder::PickCompaction() { - // Pick up the first file to start compaction. It may have been extended - // to a clean cut. - SetupInitialFiles(); - if (start_level_inputs_.empty()) { - return nullptr; - } - assert(start_level_ >= 0 && output_level_ >= 0); - - // If it is a L0 -> base level compaction, we need to set up other L0 - // files if needed. - if (!SetupOtherL0FilesIfNeeded()) { - return nullptr; - } - - // Pick files in the output level and expand more files in the start level - // if needed. - if (!SetupOtherInputsIfNeeded()) { - return nullptr; - } - - // Form a compaction object containing the files we picked. - Compaction* c = GetCompaction(); - - TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c); - - return c; -} - -Compaction* LevelCompactionBuilder::GetCompaction() { - auto c = new Compaction( - vstorage_, ioptions_, mutable_cf_options_, std::move(compaction_inputs_), - output_level_, - MaxFileSizeForLevel(mutable_cf_options_, output_level_, - ioptions_.compaction_style, vstorage_->base_level(), - ioptions_.level_compaction_dynamic_level_bytes), - mutable_cf_options_.max_compaction_bytes, - GetPathId(ioptions_, mutable_cf_options_, output_level_), - GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, - output_level_, vstorage_->base_level()), - GetCompressionOptions(ioptions_, vstorage_, output_level_), - /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, - start_level_score_, false /* deletion_compaction */, compaction_reason_); - - // If it's level 0 compaction, make sure we don't execute any other level 0 - // compactions in parallel - compaction_picker_->RegisterCompaction(c); - - // Creating a compaction influences the compaction score because the score - // takes running compactions into account (by skipping files that are already - // being compacted). Since we just changed compaction score, we recalculate it - // here - vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); - return c; -} - -/* - * Find the optimal path to place a file - * Given a level, finds the path where levels up to it will fit in levels - * up to and including this path - */ -uint32_t LevelCompactionBuilder::GetPathId( - const ImmutableCFOptions& ioptions, - const MutableCFOptions& mutable_cf_options, int level) { - uint32_t p = 0; - assert(!ioptions.cf_paths.empty()); - - // size remaining in the most recent path - uint64_t current_path_size = ioptions.cf_paths[0].target_size; - - uint64_t level_size; - int cur_level = 0; - - // max_bytes_for_level_base denotes L1 size. - // We estimate L0 size to be the same as L1. - level_size = mutable_cf_options.max_bytes_for_level_base; - - // Last path is the fallback - while (p < ioptions.cf_paths.size() - 1) { - if (level_size <= current_path_size) { - if (cur_level == level) { - // Does desired level fit in this path? - return p; - } else { - current_path_size -= level_size; - if (cur_level > 0) { - if (ioptions.level_compaction_dynamic_level_bytes) { - // Currently, level_compaction_dynamic_level_bytes is ignored when - // multiple db paths are specified. https://github.com/facebook/ - // rocksdb/blob/master/db/column_family.cc. - // Still, adding this check to avoid accidentally using - // max_bytes_for_level_multiplier_additional - level_size = static_cast( - level_size * mutable_cf_options.max_bytes_for_level_multiplier); - } else { - level_size = static_cast( - level_size * mutable_cf_options.max_bytes_for_level_multiplier * - mutable_cf_options.MaxBytesMultiplerAdditional(cur_level)); - } - } - cur_level++; - continue; - } - } - p++; - current_path_size = ioptions.cf_paths[p].target_size; - } - return p; -} - -bool LevelCompactionBuilder::PickFileToCompact() { - // level 0 files are overlapping. So we cannot pick more - // than one concurrent compactions at this level. This - // could be made better by looking at key-ranges that are - // being compacted at level 0. - if (start_level_ == 0 && - !compaction_picker_->level0_compactions_in_progress()->empty()) { - TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0"); - return false; - } - - start_level_inputs_.clear(); - - assert(start_level_ >= 0); - - // Pick the largest file in this level that is not already - // being compacted - const std::vector& file_size = - vstorage_->FilesByCompactionPri(start_level_); - const std::vector& level_files = - vstorage_->LevelFiles(start_level_); - - unsigned int cmp_idx; - for (cmp_idx = vstorage_->NextCompactionIndex(start_level_); - cmp_idx < file_size.size(); cmp_idx++) { - int index = file_size[cmp_idx]; - auto* f = level_files[index]; - - // do not pick a file to compact if it is being compacted - // from n-1 level. - if (f->being_compacted) { - continue; - } - - start_level_inputs_.files.push_back(f); - start_level_inputs_.level = start_level_; - if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_) || - compaction_picker_->FilesRangeOverlapWithCompaction( - {start_level_inputs_}, output_level_)) { - // A locked (pending compaction) input-level file was pulled in due to - // user-key overlap. - start_level_inputs_.clear(); - continue; - } - - // Now that input level is fully expanded, we check whether any output files - // are locked due to pending compaction. - // - // Note we rely on ExpandInputsToCleanCut() to tell us whether any output- - // level files are locked, not just the extra ones pulled in for user-key - // overlap. - InternalKey smallest, largest; - compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest); - CompactionInputFiles output_level_inputs; - output_level_inputs.level = output_level_; - vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, - &output_level_inputs.files); - if (!output_level_inputs.empty() && - !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &output_level_inputs)) { - start_level_inputs_.clear(); - continue; - } - base_index_ = index; - break; - } - - // store where to start the iteration in the next call to PickCompaction - vstorage_->SetNextCompactionIndex(start_level_, cmp_idx); - - return start_level_inputs_.size() > 0; -} - -bool LevelCompactionBuilder::PickIntraL0Compaction() { - start_level_inputs_.clear(); - const std::vector& level_files = - vstorage_->LevelFiles(0 /* level */); - if (level_files.size() < - static_cast( - mutable_cf_options_.level0_file_num_compaction_trigger + 2) || - level_files[0]->being_compacted) { - // If L0 isn't accumulating much files beyond the regular trigger, don't - // resort to L0->L0 compaction yet. - return false; - } - return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction, - port::kMaxUint64, - mutable_cf_options_.max_compaction_bytes, - &start_level_inputs_); -} -} // namespace - -Compaction* LevelCompactionPicker::PickCompaction( - const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer) { - LevelCompactionBuilder builder(cf_name, vstorage, this, log_buffer, - mutable_cf_options, ioptions_); - return builder.PickCompaction(); -} - } // namespace rocksdb diff --git a/db/compaction_picker.h b/db/compaction_picker.h index 05895a267..437c8d304 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -236,23 +236,6 @@ class CompactionPicker { const InternalKeyComparator* const icmp_; }; -// Picking compactions for leveled compaction. See wiki page -// https://github.com/facebook/rocksdb/wiki/Leveled-Compaction -// for description of Leveled compaction. -class LevelCompactionPicker : public CompactionPicker { - public: - LevelCompactionPicker(const ImmutableCFOptions& ioptions, - const InternalKeyComparator* icmp) - : CompactionPicker(ioptions, icmp) {} - virtual Compaction* PickCompaction(const std::string& cf_name, - const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, - LogBuffer* log_buffer) override; - - virtual bool NeedsCompaction( - const VersionStorageInfo* vstorage) const override; -}; - #ifndef ROCKSDB_LITE // A dummy compaction that never triggers any automatic // compaction. diff --git a/db/compaction_picker_level.cc b/db/compaction_picker_level.cc new file mode 100644 index 000000000..70fe46c5b --- /dev/null +++ b/db/compaction_picker_level.cc @@ -0,0 +1,558 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/compaction_picker_level.h" + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include +#include + +#include "test_util/sync_point.h" +#include "util/log_buffer.h" + +namespace rocksdb { + +bool LevelCompactionPicker::NeedsCompaction( + const VersionStorageInfo* vstorage) const { + if (!vstorage->ExpiredTtlFiles().empty()) { + return true; + } + if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) { + return true; + } + if (!vstorage->BottommostFilesMarkedForCompaction().empty()) { + return true; + } + if (!vstorage->FilesMarkedForCompaction().empty()) { + return true; + } + for (int i = 0; i <= vstorage->MaxInputLevel(); i++) { + if (vstorage->CompactionScore(i) >= 1) { + return true; + } + } + return false; +} + +namespace { +// A class to build a leveled compaction step-by-step. +class LevelCompactionBuilder { + public: + LevelCompactionBuilder(const std::string& cf_name, + VersionStorageInfo* vstorage, + CompactionPicker* compaction_picker, + LogBuffer* log_buffer, + const MutableCFOptions& mutable_cf_options, + const ImmutableCFOptions& ioptions) + : cf_name_(cf_name), + vstorage_(vstorage), + compaction_picker_(compaction_picker), + log_buffer_(log_buffer), + mutable_cf_options_(mutable_cf_options), + ioptions_(ioptions) {} + + // Pick and return a compaction. + Compaction* PickCompaction(); + + // Pick the initial files to compact to the next level. (or together + // in Intra-L0 compactions) + void SetupInitialFiles(); + + // If the initial files are from L0 level, pick other L0 + // files if needed. + bool SetupOtherL0FilesIfNeeded(); + + // Based on initial files, setup other files need to be compacted + // in this compaction, accordingly. + bool SetupOtherInputsIfNeeded(); + + Compaction* GetCompaction(); + + // For the specfied level, pick a file that we want to compact. + // Returns false if there is no file to compact. + // If it returns true, inputs->files.size() will be exactly one. + // If level is 0 and there is already a compaction on that level, this + // function will return false. + bool PickFileToCompact(); + + // For L0->L0, picks the longest span of files that aren't currently + // undergoing compaction for which work-per-deleted-file decreases. The span + // always starts from the newest L0 file. + // + // Intra-L0 compaction is independent of all other files, so it can be + // performed even when L0->base_level compactions are blocked. + // + // Returns true if `inputs` is populated with a span of files to be compacted; + // otherwise, returns false. + bool PickIntraL0Compaction(); + + void PickExpiredTtlFiles(); + + void PickFilesMarkedForPeriodicCompaction(); + + const std::string& cf_name_; + VersionStorageInfo* vstorage_; + CompactionPicker* compaction_picker_; + LogBuffer* log_buffer_; + int start_level_ = -1; + int output_level_ = -1; + int parent_index_ = -1; + int base_index_ = -1; + double start_level_score_ = 0; + bool is_manual_ = false; + CompactionInputFiles start_level_inputs_; + std::vector compaction_inputs_; + CompactionInputFiles output_level_inputs_; + std::vector grandparents_; + CompactionReason compaction_reason_ = CompactionReason::kUnknown; + + const MutableCFOptions& mutable_cf_options_; + const ImmutableCFOptions& ioptions_; + // Pick a path ID to place a newly generated file, with its level + static uint32_t GetPathId(const ImmutableCFOptions& ioptions, + const MutableCFOptions& mutable_cf_options, + int level); + + static const int kMinFilesForIntraL0Compaction = 4; +}; + +void LevelCompactionBuilder::PickExpiredTtlFiles() { + if (vstorage_->ExpiredTtlFiles().empty()) { + return; + } + + auto continuation = [&](std::pair level_file) { + // If it's being compacted it has nothing to do here. + // If this assert() fails that means that some function marked some + // files as being_compacted, but didn't call ComputeCompactionScore() + assert(!level_file.second->being_compacted); + start_level_ = level_file.first; + output_level_ = + (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; + + if ((start_level_ == vstorage_->num_non_empty_levels() - 1) || + (start_level_ == 0 && + !compaction_picker_->level0_compactions_in_progress()->empty())) { + return false; + } + + start_level_inputs_.files = {level_file.second}; + start_level_inputs_.level = start_level_; + return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_); + }; + + for (auto& level_file : vstorage_->ExpiredTtlFiles()) { + if (continuation(level_file)) { + // found the compaction! + return; + } + } + + start_level_inputs_.files.clear(); +} + +void LevelCompactionBuilder::PickFilesMarkedForPeriodicCompaction() { + if (vstorage_->FilesMarkedForPeriodicCompaction().empty()) { + return; + } + + auto continuation = [&](std::pair level_file) { + // If it's being compacted it has nothing to do here. + // If this assert() fails that means that some function marked some + // files as being_compacted, but didn't call ComputeCompactionScore() + assert(!level_file.second->being_compacted); + output_level_ = start_level_ = level_file.first; + + if (start_level_ == 0 && + !compaction_picker_->level0_compactions_in_progress()->empty()) { + return false; + } + + start_level_inputs_.files = {level_file.second}; + start_level_inputs_.level = start_level_; + return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_); + }; + + for (auto& level_file : vstorage_->FilesMarkedForPeriodicCompaction()) { + if (continuation(level_file)) { + // found the compaction! + return; + } + } + + start_level_inputs_.files.clear(); +} + +void LevelCompactionBuilder::SetupInitialFiles() { + // Find the compactions by size on all levels. + bool skipped_l0_to_base = false; + for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) { + start_level_score_ = vstorage_->CompactionScore(i); + start_level_ = vstorage_->CompactionScoreLevel(i); + assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1)); + if (start_level_score_ >= 1) { + if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) { + // If L0->base_level compaction is pending, don't schedule further + // compaction from base level. Otherwise L0->base_level compaction + // may starve. + continue; + } + output_level_ = + (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; + if (PickFileToCompact()) { + // found the compaction! + if (start_level_ == 0) { + // L0 score = `num L0 files` / `level0_file_num_compaction_trigger` + compaction_reason_ = CompactionReason::kLevelL0FilesNum; + } else { + // L1+ score = `Level files size` / `MaxBytesForLevel` + compaction_reason_ = CompactionReason::kLevelMaxLevelSize; + } + break; + } else { + // didn't find the compaction, clear the inputs + start_level_inputs_.clear(); + if (start_level_ == 0) { + skipped_l0_to_base = true; + // L0->base_level may be blocked due to ongoing L0->base_level + // compactions. It may also be blocked by an ongoing compaction from + // base_level downwards. + // + // In these cases, to reduce L0 file count and thus reduce likelihood + // of write stalls, we can attempt compacting a span of files within + // L0. + if (PickIntraL0Compaction()) { + output_level_ = 0; + compaction_reason_ = CompactionReason::kLevelL0FilesNum; + break; + } + } + } + } + } + + // if we didn't find a compaction, check if there are any files marked for + // compaction + if (start_level_inputs_.empty()) { + parent_index_ = base_index_ = -1; + + compaction_picker_->PickFilesMarkedForCompaction( + cf_name_, vstorage_, &start_level_, &output_level_, + &start_level_inputs_); + if (!start_level_inputs_.empty()) { + is_manual_ = true; + compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; + return; + } + } + + // Bottommost Files Compaction on deleting tombstones + if (start_level_inputs_.empty()) { + size_t i; + for (i = 0; i < vstorage_->BottommostFilesMarkedForCompaction().size(); + ++i) { + auto& level_and_file = vstorage_->BottommostFilesMarkedForCompaction()[i]; + assert(!level_and_file.second->being_compacted); + start_level_inputs_.level = output_level_ = start_level_ = + level_and_file.first; + start_level_inputs_.files = {level_and_file.second}; + if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_)) { + break; + } + } + if (i == vstorage_->BottommostFilesMarkedForCompaction().size()) { + start_level_inputs_.clear(); + } else { + assert(!start_level_inputs_.empty()); + compaction_reason_ = CompactionReason::kBottommostFiles; + return; + } + } + + // TTL Compaction + if (start_level_inputs_.empty()) { + PickExpiredTtlFiles(); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kTtl; + return; + } + } + + // Periodic Compaction + if (start_level_inputs_.empty()) { + PickFilesMarkedForPeriodicCompaction(); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kPeriodicCompaction; + return; + } + } +} + +bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() { + if (start_level_ == 0 && output_level_ != 0) { + return compaction_picker_->GetOverlappingL0Files( + vstorage_, &start_level_inputs_, output_level_, &parent_index_); + } + return true; +} + +bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() { + // Setup input files from output level. For output to L0, we only compact + // spans of files that do not interact with any pending compactions, so don't + // need to consider other levels. + if (output_level_ != 0) { + output_level_inputs_.level = output_level_; + if (!compaction_picker_->SetupOtherInputs( + cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_, + &output_level_inputs_, &parent_index_, base_index_)) { + return false; + } + + compaction_inputs_.push_back(start_level_inputs_); + if (!output_level_inputs_.empty()) { + compaction_inputs_.push_back(output_level_inputs_); + } + + // In some edge cases we could pick a compaction that will be compacting + // a key range that overlap with another running compaction, and both + // of them have the same output level. This could happen if + // (1) we are running a non-exclusive manual compaction + // (2) AddFile ingest a new file into the LSM tree + // We need to disallow this from happening. + if (compaction_picker_->FilesRangeOverlapWithCompaction(compaction_inputs_, + output_level_)) { + // This compaction output could potentially conflict with the output + // of a currently running compaction, we cannot run it. + return false; + } + compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_, + output_level_inputs_, &grandparents_); + } else { + compaction_inputs_.push_back(start_level_inputs_); + } + return true; +} + +Compaction* LevelCompactionBuilder::PickCompaction() { + // Pick up the first file to start compaction. It may have been extended + // to a clean cut. + SetupInitialFiles(); + if (start_level_inputs_.empty()) { + return nullptr; + } + assert(start_level_ >= 0 && output_level_ >= 0); + + // If it is a L0 -> base level compaction, we need to set up other L0 + // files if needed. + if (!SetupOtherL0FilesIfNeeded()) { + return nullptr; + } + + // Pick files in the output level and expand more files in the start level + // if needed. + if (!SetupOtherInputsIfNeeded()) { + return nullptr; + } + + // Form a compaction object containing the files we picked. + Compaction* c = GetCompaction(); + + TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c); + + return c; +} + +Compaction* LevelCompactionBuilder::GetCompaction() { + auto c = new Compaction( + vstorage_, ioptions_, mutable_cf_options_, std::move(compaction_inputs_), + output_level_, + MaxFileSizeForLevel(mutable_cf_options_, output_level_, + ioptions_.compaction_style, vstorage_->base_level(), + ioptions_.level_compaction_dynamic_level_bytes), + mutable_cf_options_.max_compaction_bytes, + GetPathId(ioptions_, mutable_cf_options_, output_level_), + GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, + output_level_, vstorage_->base_level()), + GetCompressionOptions(ioptions_, vstorage_, output_level_), + /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, + start_level_score_, false /* deletion_compaction */, compaction_reason_); + + // If it's level 0 compaction, make sure we don't execute any other level 0 + // compactions in parallel + compaction_picker_->RegisterCompaction(c); + + // Creating a compaction influences the compaction score because the score + // takes running compactions into account (by skipping files that are already + // being compacted). Since we just changed compaction score, we recalculate it + // here + vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); + return c; +} + +/* + * Find the optimal path to place a file + * Given a level, finds the path where levels up to it will fit in levels + * up to and including this path + */ +uint32_t LevelCompactionBuilder::GetPathId( + const ImmutableCFOptions& ioptions, + const MutableCFOptions& mutable_cf_options, int level) { + uint32_t p = 0; + assert(!ioptions.cf_paths.empty()); + + // size remaining in the most recent path + uint64_t current_path_size = ioptions.cf_paths[0].target_size; + + uint64_t level_size; + int cur_level = 0; + + // max_bytes_for_level_base denotes L1 size. + // We estimate L0 size to be the same as L1. + level_size = mutable_cf_options.max_bytes_for_level_base; + + // Last path is the fallback + while (p < ioptions.cf_paths.size() - 1) { + if (level_size <= current_path_size) { + if (cur_level == level) { + // Does desired level fit in this path? + return p; + } else { + current_path_size -= level_size; + if (cur_level > 0) { + if (ioptions.level_compaction_dynamic_level_bytes) { + // Currently, level_compaction_dynamic_level_bytes is ignored when + // multiple db paths are specified. https://github.com/facebook/ + // rocksdb/blob/master/db/column_family.cc. + // Still, adding this check to avoid accidentally using + // max_bytes_for_level_multiplier_additional + level_size = static_cast( + level_size * mutable_cf_options.max_bytes_for_level_multiplier); + } else { + level_size = static_cast( + level_size * mutable_cf_options.max_bytes_for_level_multiplier * + mutable_cf_options.MaxBytesMultiplerAdditional(cur_level)); + } + } + cur_level++; + continue; + } + } + p++; + current_path_size = ioptions.cf_paths[p].target_size; + } + return p; +} + +bool LevelCompactionBuilder::PickFileToCompact() { + // level 0 files are overlapping. So we cannot pick more + // than one concurrent compactions at this level. This + // could be made better by looking at key-ranges that are + // being compacted at level 0. + if (start_level_ == 0 && + !compaction_picker_->level0_compactions_in_progress()->empty()) { + TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0"); + return false; + } + + start_level_inputs_.clear(); + + assert(start_level_ >= 0); + + // Pick the largest file in this level that is not already + // being compacted + const std::vector& file_size = + vstorage_->FilesByCompactionPri(start_level_); + const std::vector& level_files = + vstorage_->LevelFiles(start_level_); + + unsigned int cmp_idx; + for (cmp_idx = vstorage_->NextCompactionIndex(start_level_); + cmp_idx < file_size.size(); cmp_idx++) { + int index = file_size[cmp_idx]; + auto* f = level_files[index]; + + // do not pick a file to compact if it is being compacted + // from n-1 level. + if (f->being_compacted) { + continue; + } + + start_level_inputs_.files.push_back(f); + start_level_inputs_.level = start_level_; + if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_) || + compaction_picker_->FilesRangeOverlapWithCompaction( + {start_level_inputs_}, output_level_)) { + // A locked (pending compaction) input-level file was pulled in due to + // user-key overlap. + start_level_inputs_.clear(); + continue; + } + + // Now that input level is fully expanded, we check whether any output files + // are locked due to pending compaction. + // + // Note we rely on ExpandInputsToCleanCut() to tell us whether any output- + // level files are locked, not just the extra ones pulled in for user-key + // overlap. + InternalKey smallest, largest; + compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest); + CompactionInputFiles output_level_inputs; + output_level_inputs.level = output_level_; + vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, + &output_level_inputs.files); + if (!output_level_inputs.empty() && + !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &output_level_inputs)) { + start_level_inputs_.clear(); + continue; + } + base_index_ = index; + break; + } + + // store where to start the iteration in the next call to PickCompaction + vstorage_->SetNextCompactionIndex(start_level_, cmp_idx); + + return start_level_inputs_.size() > 0; +} + +bool LevelCompactionBuilder::PickIntraL0Compaction() { + start_level_inputs_.clear(); + const std::vector& level_files = + vstorage_->LevelFiles(0 /* level */); + if (level_files.size() < + static_cast( + mutable_cf_options_.level0_file_num_compaction_trigger + 2) || + level_files[0]->being_compacted) { + // If L0 isn't accumulating much files beyond the regular trigger, don't + // resort to L0->L0 compaction yet. + return false; + } + return FindIntraL0Compaction( + level_files, kMinFilesForIntraL0Compaction, port::kMaxUint64, + mutable_cf_options_.max_compaction_bytes, &start_level_inputs_); +} +} // namespace + +Compaction* LevelCompactionPicker::PickCompaction( + const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + VersionStorageInfo* vstorage, LogBuffer* log_buffer) { + LevelCompactionBuilder builder(cf_name, vstorage, this, log_buffer, + mutable_cf_options, ioptions_); + return builder.PickCompaction(); +} +} // namespace rocksdb diff --git a/db/compaction_picker_level.h b/db/compaction_picker_level.h new file mode 100644 index 000000000..1d37fe50e --- /dev/null +++ b/db/compaction_picker_level.h @@ -0,0 +1,32 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +#include "db/compaction_picker.h" + +namespace rocksdb { +// Picking compactions for leveled compaction. See wiki page +// https://github.com/facebook/rocksdb/wiki/Leveled-Compaction +// for description of Leveled compaction. +class LevelCompactionPicker : public CompactionPicker { + public: + LevelCompactionPicker(const ImmutableCFOptions& ioptions, + const InternalKeyComparator* icmp) + : CompactionPicker(ioptions, icmp) {} + virtual Compaction* PickCompaction(const std::string& cf_name, + const MutableCFOptions& mutable_cf_options, + VersionStorageInfo* vstorage, + LogBuffer* log_buffer) override; + + virtual bool NeedsCompaction( + const VersionStorageInfo* vstorage) const override; +}; + +} // namespace rocksdb diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index dd33009eb..c3e9e450f 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -10,6 +10,7 @@ #include #include "db/compaction.h" #include "db/compaction_picker_fifo.h" +#include "db/compaction_picker_level.h" #include "db/compaction_picker_universal.h" #include "test_util/testharness.h" diff --git a/src.mk b/src.mk index c1ab36b8a..44013bc2e 100644 --- a/src.mk +++ b/src.mk @@ -12,6 +12,7 @@ LIB_SOURCES = \ db/compaction_job.cc \ db/compaction_picker.cc \ db/compaction_picker_fifo.cc \ + db/compaction_picker_level.cc \ db/compaction_picker_universal.cc \ db/convenience.cc \ db/db_filesnapshot.cc \