Set lower-bound on dynamic level sizes

Summary:
Changed dynamic leveling to stop setting the base level's size bound below `max_bytes_for_level_base`.

Behavior for config where `max_bytes_for_level_base == level0_file_num_compaction_trigger * write_buffer_size` and same amount of data in L0 and base-level:

- Before #2027, compaction scoring would favor base-level due to dividing by size smaller than `max_bytes_for_level_base`.
- After #2027, L0 and Lbase get equal scores. The disadvantage is L0 is often compacted before reaching the num files trigger since `write_buffer_size` can be bigger than the dynamically chosen base-level size. This increases write-amp.
- After this diff, L0 and Lbase still get equal scores. Now it takes `level0_file_num_compaction_trigger` files of size `write_buffer_size` to trigger L0 compaction by size, fixing the write-amp problem above.
Closes https://github.com/facebook/rocksdb/pull/2123

Differential Revision: D4861570

Pulled By: ajkr

fbshipit-source-id: 467ddef56ed1f647c14d86bb018bcb044c39b964
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent 7c1c8ce5ac
commit 8c3a180e83
  1. 11
      db/compaction_picker_test.cc
  2. 6
      db/version_set.cc
  3. 10
      db/version_set_test.cc

@ -1013,7 +1013,8 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) {
// Set Last level size 50000
// num_levels - 1 target 5000
// num_levels - 2 is base level with taret 500
// num_levels - 2 is base level with target 1000 (rounded up to
// max_bytes_for_level_base).
Add(num_levels - 1, 10U, "400", "500", 50000);
Add(0, 1U, "150", "200", 200);
@ -1022,16 +1023,16 @@ TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) {
Add(0, 5U, "150", "200", 200);
Add(0, 6U, "150", "200", 200);
// num_levels - 3 is over target by 100 + 1000
Add(num_levels - 3, 7U, "400", "500", 300);
Add(num_levels - 3, 8U, "600", "700", 300);
Add(num_levels - 3, 7U, "400", "500", 550);
Add(num_levels - 3, 8U, "600", "700", 550);
// num_levels - 2 is over target by 1100 + 200
Add(num_levels - 2, 9U, "150", "200", 5200);
UpdateVersionStorageInfo();
// Merging to the second last level: (5200 / 1600 + 1) * 1100
// Merging to the second last level: (5200 / 2100 + 1) * 1100
// Merging to the last level: (50000 / 6300 + 1) * 1300
ASSERT_EQ(1600u + 4675u + 11617u,
ASSERT_EQ(2100u + 3823u + 11617u,
vstorage_->estimated_compaction_needed_bytes());
}

@ -2113,7 +2113,11 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableCFOptions& ioptions,
level_size = MultiplyCheckOverflow(
level_size, options.max_bytes_for_level_multiplier);
}
level_max_bytes_[i] = level_size;
// Don't set any level below base_bytes_max. Otherwise, the LSM can
// assume an hourglass shape where L1+ sizes are smaller than L0. This
// causes compaction scoring, which depends on level sizes, to favor L1+
// at the expense of L0, which may fill up and stall.
level_max_bytes_[i] = std::max(level_size, base_bytes_max);
}
}
}

@ -168,13 +168,13 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic) {
Add(5, 2U, "3", "4", 550U);
vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_);
ASSERT_EQ(0, logger_->log_count);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 210U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U);
ASSERT_EQ(vstorage_.base_level(), 4);
Add(4, 3U, "3", "4", 550U);
vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_);
ASSERT_EQ(0, logger_->log_count);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 210U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U);
ASSERT_EQ(vstorage_.base_level(), 4);
Add(3, 4U, "3", "4", 250U);
@ -182,7 +182,7 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic) {
vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_);
ASSERT_EQ(1, logger_->log_count);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1005U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 201U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 1000U);
ASSERT_EQ(vstorage_.base_level(), 3);
Add(1, 6U, "3", "4", 5U);
@ -193,7 +193,7 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic) {
ASSERT_GT(vstorage_.MaxBytesForLevel(4), 1005U);
ASSERT_GT(vstorage_.MaxBytesForLevel(3), 1005U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 1005U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 201U);
ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 1000U);
ASSERT_EQ(vstorage_.base_level(), 1);
}
@ -231,7 +231,7 @@ TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLargeLevel) {
ASSERT_EQ(vstorage_.MaxBytesForLevel(5), 3000U * kOneGB);
ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 300U * kOneGB);
ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 30U * kOneGB);
ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 3U * kOneGB);
ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 10U * kOneGB);
ASSERT_EQ(vstorage_.base_level(), 2);
ASSERT_EQ(0, logger_->log_count);
}

Loading…
Cancel
Save