From 2297769b387af3c8f70ec12c10f17589f5daaf47 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 11 Aug 2022 17:10:36 -0700 Subject: [PATCH] Fix regression issue of too large score (#10518) Summary: https://github.com/facebook/rocksdb/pull/10057 caused a regression bug: since the base level size is not adjusted based on L0 size anymore, L0 score might become very large. This makes compaction heavily favor L0->L1 compaction against L1->L2 compaction, and cause in some cases, data stuck in L1 without being moved down. We fix calculating a score of L0 by size(L0)/size(L1) in the case where L0 is large.. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10518 Test Plan: run db_bench against data on tmpfs and watch the behavior of data stuck in L1 goes away. Reviewed By: ajkr Differential Revision: D38603145 fbshipit-source-id: 4949e52dc28b54aacfe08417c6e6cc7e40a27225 --- db/version_set.cc | 55 +++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 74040270f..9d37db84f 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2758,24 +2758,43 @@ void VersionStorageInfo::ComputeCompactionScore( // Level-based involves L0->L0 compactions that can lead to oversized // L0 files. Take into account size as well to avoid later giant // compactions to the base level. - uint64_t l0_target_size = mutable_cf_options.max_bytes_for_level_base; - if (immutable_options.level_compaction_dynamic_level_bytes && - level_multiplier_ != 0.0) { - // Prevent L0 to Lbase fanout from growing larger than - // `level_multiplier_`. This prevents us from getting stuck picking - // L0 forever even when it is hurting write-amp. That could happen - // in dynamic level compaction's write-burst mode where the base - // level's target size can grow to be enormous. - l0_target_size = - std::max(l0_target_size, - static_cast(level_max_bytes_[base_level_] / - level_multiplier_)); - } - score = - std::max(score, static_cast(total_size) / l0_target_size); - if (immutable_options.level_compaction_dynamic_level_bytes && - score > 1.0) { - score *= kScoreScale; + // If score in L0 is always too high, L0->L1 will always be + // prioritized over L1->L2 compaction and L1 will accumulate to + // too large. But if L0 score isn't high enough, L0 will accumulate + // and data is not moved to L1 fast enough. With potential L0->L0 + // compaction, number of L0 files aren't always an indication of + // L0 oversizing, and we also need to consider total size of L0. + if (immutable_options.level_compaction_dynamic_level_bytes) { + if (total_size >= mutable_cf_options.max_bytes_for_level_base) { + // When calculating estimated_compaction_needed_bytes, we assume + // L0 is qualified as pending compactions. We will need to make + // sure that it qualifies for compaction. + // It might be guafanteed by logic below anyway, but we are + // explicit here to make sure we don't stop writes with no + // compaction scheduled. + score = std::max(score, 1.01); + } + if (total_size > level_max_bytes_[base_level_]) { + // In this case, we compare L0 size with actual L1 size and make + // sure score is more than 1.0 (10.0 after scaled) if L0 is larger + // than L1. Since in this case L1 score is lower than 10.0, L0->L1 + // is prioritized over L1->L2. + uint64_t base_level_size = 0; + for (auto f : files_[base_level_]) { + base_level_size += f->compensated_file_size; + } + score = std::max(score, static_cast(total_size) / + static_cast(std::max( + base_level_size, + level_max_bytes_[base_level_]))); + } + if (score > 1.0) { + score *= kScoreScale; + } + } else { + score = std::max(score, + static_cast(total_size) / + mutable_cf_options.max_bytes_for_level_base); } } }