From bfb6b1b8a892d93ad1f41e5c571b848d1b9ea11d Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 5 May 2016 16:50:32 -0700 Subject: [PATCH] Estimate pending compaction bytes more accurately Summary: Currently we estimate bytes needed for compaction by assuming fanout value to be level multiplier. It overestimates when size of a level exceeds the target by large. We estimate by the ratio of actual sizes in levels instead. Test Plan: Fix existing test cases and add a new one. Reviewers: IslamAbdelRahman, igor, yhchiang Reviewed By: yhchiang Subscribers: MarkCallaghan, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D57789 --- db/compaction_picker_test.cc | 54 ++++++++++++++++++++++++++++-------- db/db_test.cc | 26 +++++++++++++++-- db/version_set.cc | 39 +++++++++++++++++++++----- 3 files changed, 98 insertions(+), 21 deletions(-) diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index 4b4ee87bd..cdad513ea 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -776,19 +776,22 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded1) { Add(1, 4U, "400", "500", 600); Add(1, 5U, "600", "700", 600); // Level 2 is less than target 10000 even added size of level 1 + // Size ratio of L2/L1 is 9600 / 1200 = 8 Add(2, 6U, "150", "200", 2500); Add(2, 7U, "201", "210", 2000); - Add(2, 8U, "300", "310", 2500); + Add(2, 8U, "300", "310", 2600); Add(2, 9U, "400", "500", 2500); // Level 3 exceeds target 100,000 of 1000 Add(3, 10U, "400", "500", 101000); - // Level 4 exceeds target 1,000,000 of 500 after adding size from level 3 - Add(4, 11U, "400", "500", 999500); - Add(5, 11U, "400", "500", 8000000); + // Level 4 exceeds target 1,000,000 by 900 after adding size from level 3 + // Size ratio L4/L3 is 9.9 + // After merge from L3, L4 size is 1000900 + Add(4, 11U, "400", "500", 999900); + Add(5, 11U, "400", "500", 8007200); UpdateVersionStorageInfo(); - ASSERT_EQ(2200u + 11000u + 5500u, + ASSERT_EQ(200u * 9u + 10900u + 900u * 9, vstorage_->estimated_compaction_needed_bytes()); } @@ -804,17 +807,42 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded2) { Add(0, 4U, "150", "200", 200); Add(0, 5U, "150", "200", 200); Add(0, 6U, "150", "200", 200); - // Level 1 is over target by + // Level 1 size will be 1400 after merging with L0 Add(1, 7U, "400", "500", 200); Add(1, 8U, "600", "700", 200); // Level 2 is less than target 10000 even added size of level 1 - Add(2, 9U, "150", "200", 9500); + Add(2, 9U, "150", "200", 9100); + // Level 3 over the target, but since level 4 is empty, we assume it will be + // a trivial move. Add(3, 10U, "400", "500", 101000); UpdateVersionStorageInfo(); - ASSERT_EQ(1400u + 4400u + 11000u, - vstorage_->estimated_compaction_needed_bytes()); + // estimated L1->L2 merge: 400 * (9100.0 / 1400.0 + 1.0) + ASSERT_EQ(1400u + 3000u, vstorage_->estimated_compaction_needed_bytes()); +} + +TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded3) { + int num_levels = ioptions_.num_levels; + ioptions_.level_compaction_dynamic_level_bytes = false; + mutable_cf_options_.level0_file_num_compaction_trigger = 3; + mutable_cf_options_.max_bytes_for_level_base = 1000; + mutable_cf_options_.max_bytes_for_level_multiplier = 10; + NewVersionStorage(num_levels, kCompactionStyleLevel); + Add(0, 1U, "150", "200", 2000); + Add(0, 2U, "150", "200", 2000); + Add(0, 4U, "150", "200", 2000); + Add(0, 5U, "150", "200", 2000); + Add(0, 6U, "150", "200", 1000); + // Level 1 size will be 10000 after merging with L0 + Add(1, 7U, "400", "500", 500); + Add(1, 8U, "600", "700", 500); + + Add(2, 9U, "150", "200", 10000); + + UpdateVersionStorageInfo(); + + ASSERT_EQ(10000u + 18000u, vstorage_->estimated_compaction_needed_bytes()); } TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) { @@ -838,12 +866,14 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) { // num_levels - 3 is over target by 100 + 1000 Add(num_levels - 3, 7U, "400", "500", 300); Add(num_levels - 3, 8U, "600", "700", 300); - // Level 2 is over target by 1100 + 100 - Add(num_levels - 2, 9U, "150", "200", 5100); + // num_levels - 2 is over target by 1100 + 200 + Add(num_levels - 2, 9U, "150", "200", 5200); UpdateVersionStorageInfo(); - ASSERT_EQ(1600u + 12100u + 13200u, + // Merging to the second last level: (5200 / 1600 + 1) * 1100 + // Merging to the last level: (50000 / 6300 + 1) * 1300 + ASSERT_EQ(1600u + 4675u + 11617u, vstorage_->estimated_compaction_needed_bytes()); } diff --git a/db/db_test.cc b/db/db_test.cc index 4d466c1e1..38ab6b794 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5675,7 +5675,7 @@ TEST_F(DBTest, SoftLimit) { options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 999999; options.delayed_write_rate = 20000; // About 200KB/s limited rate - options.soft_pending_compaction_bytes_limit = 200000; + options.soft_pending_compaction_bytes_limit = 160000; options.target_file_size_base = 99999999; // All into one file options.max_bytes_for_level_base = 50000; options.max_bytes_for_level_multiplier = 10; @@ -5683,6 +5683,27 @@ TEST_F(DBTest, SoftLimit) { options.compression = kNoCompression; Reopen(options); + + // Generating 360KB in Level 3 + for (int i = 0; i < 72; i++) { + Put(Key(i), std::string(5000, 'x')); + if (i % 10 == 0) { + Flush(); + } + } + dbfull()->TEST_WaitForCompact(); + MoveFilesToLevel(3); + + // Generating 360KB in Level 2 + for (int i = 0; i < 72; i++) { + Put(Key(i), std::string(5000, 'x')); + if (i % 10 == 0) { + Flush(); + } + } + dbfull()->TEST_WaitForCompact(); + MoveFilesToLevel(2); + Put(Key(0), ""); test::SleepingBackgroundTask sleeping_task_low; @@ -5758,7 +5779,8 @@ TEST_F(DBTest, SoftLimit) { sleeping_task_low.WaitUntilSleeping(); // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB - // Given level multiplier 10, estimated pending compaction is around 400KB + // L2 size is 360KB, so the estimated level fanout 4, estimated pending + // compaction is around 200KB // triggerring soft_pending_compaction_bytes_limit ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); diff --git a/db/version_set.cc b/db/version_set.cc index 3ea22cf79..be3359563 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1195,10 +1195,23 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( } // Level 1 and up. + uint64_t bytes_next_level = 0; for (int level = base_level(); level <= MaxInputLevel(); level++) { uint64_t level_size = 0; - for (auto* f : files_[level]) { - level_size += f->fd.GetFileSize(); + if (bytes_next_level > 0) { +#ifndef NDEBUG + uint64_t level_size2 = 0; + for (auto* f : files_[level]) { + level_size2 += f->fd.GetFileSize(); + } + assert(level_size2 == bytes_next_level); +#endif + level_size = bytes_next_level; + bytes_next_level = 0; + } else { + for (auto* f : files_[level]) { + level_size += f->fd.GetFileSize(); + } } if (level == base_level() && level0_compact_triggered) { // Add base level size to compaction if level0 compaction triggered. @@ -1210,11 +1223,23 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( uint64_t level_target = MaxBytesForLevel(level); if (level_size > level_target) { bytes_compact_to_next_level = level_size - level_target; - // Simplify to assume the actual compaction fan-out ratio is always - // mutable_cf_options.max_bytes_for_level_multiplier. - estimated_compaction_needed_bytes_ += - bytes_compact_to_next_level * - (1 + mutable_cf_options.max_bytes_for_level_multiplier); + // Estimate the actual compaction fan-out ratio as size ratio between + // the two levels. + + assert(bytes_next_level == 0); + if (level + 1 < num_levels_) { + for (auto* f : files_[level + 1]) { + bytes_next_level += f->fd.GetFileSize(); + } + } + if (bytes_next_level > 0) { + assert(level_size > 0); + estimated_compaction_needed_bytes_ += static_cast( + static_cast(bytes_compact_to_next_level) * + (static_cast(bytes_next_level) / + static_cast(level_size) + + 1)); + } } } }