From 2297769b387af3c8f70ec12c10f17589f5daaf47 Mon Sep 17 00:00:00 2001
From: sdong <siying.d@fb.com>
Date: Thu, 11 Aug 2022 17:10:36 -0700
Subject: [PATCH] Fix regression issue of too large score (#10518)

Summary:
https://github.com/facebook/rocksdb/pull/10057 caused a regression bug: since the base level size is not adjusted based on L0 size anymore, L0 score might become very large. This makes compaction heavily favor L0->L1 compaction against L1->L2 compaction, and cause in some cases, data stuck in L1 without being moved down. We fix calculating a score of L0 by size(L0)/size(L1) in the case where L0 is large..

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10518

Test Plan: run db_bench against data on tmpfs and watch the behavior of data stuck in L1 goes away.

Reviewed By: ajkr

Differential Revision: D38603145

fbshipit-source-id: 4949e52dc28b54aacfe08417c6e6cc7e40a27225
---
 db/version_set.cc | 55 +++++++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 18 deletions(-)
diff --git a/db/version_set.cc b/db/version_set.cc
index 74040270f..9d37db84f 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -2758,24 +2758,43 @@ void VersionStorageInfo::ComputeCompactionScore(
           // Level-based involves L0->L0 compactions that can lead to oversized
           // L0 files. Take into account size as well to avoid later giant
           // compactions to the base level.
-          uint64_t l0_target_size = mutable_cf_options.max_bytes_for_level_base;
-          if (immutable_options.level_compaction_dynamic_level_bytes &&
-              level_multiplier_ != 0.0) {
-            // Prevent L0 to Lbase fanout from growing larger than
-            // `level_multiplier_`. This prevents us from getting stuck picking
-            // L0 forever even when it is hurting write-amp. That could happen
-            // in dynamic level compaction's write-burst mode where the base
-            // level's target size can grow to be enormous.
-            l0_target_size =
-                std::max(l0_target_size,
-                         static_cast<uint64_t>(level_max_bytes_[base_level_] /
-                                               level_multiplier_));
-          }
-          score =
-              std::max(score, static_cast<double>(total_size) / l0_target_size);
-          if (immutable_options.level_compaction_dynamic_level_bytes &&
-              score > 1.0) {
-            score *= kScoreScale;
+          // If score in L0 is always too high, L0->L1 will always be
+          // prioritized over L1->L2 compaction and L1 will accumulate to
+          // too large. But if L0 score isn't high enough, L0 will accumulate
+          // and data is not moved to L1 fast enough. With potential L0->L0
+          // compaction, number of L0 files aren't always an indication of
+          // L0 oversizing, and we also need to consider total size of L0.
+          if (immutable_options.level_compaction_dynamic_level_bytes) {
+            if (total_size >= mutable_cf_options.max_bytes_for_level_base) {
+              // When calculating estimated_compaction_needed_bytes, we assume
+              // L0 is qualified as pending compactions. We will need to make
+              // sure that it qualifies for compaction.
+              // It might be guafanteed by logic below anyway, but we are
+              // explicit here to make sure we don't stop writes with no
+              // compaction scheduled.
+              score = std::max(score, 1.01);
+            }
+            if (total_size > level_max_bytes_[base_level_]) {
+              // In this case, we compare L0 size with actual L1 size and make
+              // sure score is more than 1.0 (10.0 after scaled) if L0 is larger
+              // than L1. Since in this case L1 score is lower than 10.0, L0->L1
+              // is prioritized over L1->L2.
+              uint64_t base_level_size = 0;
+              for (auto f : files_[base_level_]) {
+                base_level_size += f->compensated_file_size;
+              }
+              score = std::max(score, static_cast<double>(total_size) /
+                                          static_cast<double>(std::max(
+                                              base_level_size,
+                                              level_max_bytes_[base_level_])));
+            }
+            if (score > 1.0) {
+              score *= kScoreScale;
+            }
+          } else {
+            score = std::max(score,
+                             static_cast<double>(total_size) /
+                                 mutable_cf_options.max_bytes_for_level_base);
           }
         }
       }