diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 9c92707d3..08393c350 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1255,6 +1255,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) { ColumnFamilyOptions default_cf, one, two; db_options_.max_open_files = 20; // only 10 files in file cache + default_cf.level_compaction_dynamic_level_bytes = false; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB @@ -1272,6 +1273,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) { one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; + two.level_compaction_dynamic_level_bytes = false; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; @@ -1326,6 +1328,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; + default_cf.level_compaction_dynamic_level_bytes = false; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB @@ -1342,6 +1345,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) { one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; + two.level_compaction_dynamic_level_bytes = false; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; @@ -1424,13 +1428,14 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; + default_cf.level_compaction_dynamic_level_bytes = false; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; + table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -1441,6 +1446,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) { one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; + two.level_compaction_dynamic_level_bytes = false; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; @@ -1519,13 +1525,14 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; + default_cf.level_compaction_dynamic_level_bytes = false; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; + table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -1536,6 +1543,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) { one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; + two.level_compaction_dynamic_level_bytes = false; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc index eecab196f..5ca80d9aa 100644 --- a/db/compact_files_test.cc +++ b/db/compact_files_test.cc @@ -66,6 +66,7 @@ TEST_F(CompactFilesTest, L0ConflictsFiles) { const int kWriteBufferSize = 10000; const int kLevel0Trigger = 2; options.create_if_missing = true; + options.level_compaction_dynamic_level_bytes = false; options.compaction_style = kCompactionStyleLevel; // Small slowdown and stop trigger for experimental purpose. options.level0_slowdown_writes_trigger = 20; @@ -359,6 +360,7 @@ TEST_F(CompactFilesTest, CompactionFilterWithGetSv) { std::shared_ptr cf(new FilterWithGet()); Options options; + options.level_compaction_dynamic_level_bytes = false; options.create_if_missing = true; options.compaction_filter = cf.get(); @@ -401,6 +403,7 @@ TEST_F(CompactFilesTest, SentinelCompressionType) { CompactionStyle::kCompactionStyleNone}) { ASSERT_OK(DestroyDB(db_name_, Options())); Options options; + options.level_compaction_dynamic_level_bytes = false; options.compaction_style = compaction_style; // L0: Snappy, L1: ZSTD, L2: Snappy options.compression_per_level = {CompressionType::kSnappyCompression, diff --git a/db/compaction/compaction_job_stats_test.cc b/db/compaction/compaction_job_stats_test.cc index 785841201..56fc51d05 100644 --- a/db/compaction/compaction_job_stats_test.cc +++ b/db/compaction/compaction_job_stats_test.cc @@ -616,6 +616,7 @@ TEST_P(CompactionJobStatsTest, CompactionJobStatsTest) { // via AddExpectedStats(). auto* stats_checker = new CompactionJobStatsChecker(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.listeners.emplace_back(stats_checker); options.create_if_missing = true; // just enough setting to hold off auto-compaction. @@ -815,6 +816,7 @@ TEST_P(CompactionJobStatsTest, DeletionStatsTest) { // what we expect. auto* stats_checker = new CompactionJobDeletionStatsChecker(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.listeners.emplace_back(stats_checker); options.create_if_missing = true; options.level0_file_num_compaction_trigger = kTestScale + 1; diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 799f288ca..6b04f4295 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -70,6 +70,11 @@ class CompactionPickerTestBase : public testing::Test { mutable_cf_options_.RefreshDerivedOptions(ioptions_); ioptions_.cf_paths.emplace_back("dummy", std::numeric_limits::max()); + // When the default value of this option is true, universal compaction + // tests can encounter assertion failure since SanitizeOption() is + // not run to set this option to false. So we do the sanitization + // here. Tests that test this option set this option to true explicitly. + ioptions_.level_compaction_dynamic_level_bytes = false; } ~CompactionPickerTestBase() override {} diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 7027181eb..d1cb02258 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -450,6 +450,7 @@ TEST_F(CorruptionTest, TableFile) { TEST_F(CorruptionTest, VerifyChecksumReadahead) { Options options; + options.level_compaction_dynamic_level_bytes = false; SpecialEnv senv(base_env_); options.env = &senv; // Disable block cache as we are going to check checksum for @@ -503,6 +504,7 @@ TEST_F(CorruptionTest, VerifyChecksumReadahead) { TEST_F(CorruptionTest, TableFileIndexData) { Options options; + options.level_compaction_dynamic_level_bytes = false; // very big, we'll trigger flushes manually options.write_buffer_size = 100 * 1024 * 1024; Reopen(&options); @@ -659,6 +661,7 @@ TEST_F(CorruptionTest, CorruptedDescriptor) { TEST_F(CorruptionTest, CompactionInputError) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); Reopen(&options); Build(10); @@ -680,6 +683,7 @@ TEST_F(CorruptionTest, CompactionInputError) { TEST_F(CorruptionTest, CompactionInputErrorParanoid) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.paranoid_checks = true; options.write_buffer_size = 131072; @@ -777,6 +781,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) { TEST_F(CorruptionTest, FileSystemStateCorrupted) { for (int iter = 0; iter < 2; ++iter) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.paranoid_checks = true; options.create_if_missing = true; @@ -816,6 +821,7 @@ static const auto& corruption_modes = { TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.check_flush_compaction_key_order = false; options.paranoid_file_checks = true; @@ -844,6 +850,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) { TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.paranoid_file_checks = true; options.create_if_missing = true; @@ -877,6 +884,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) { TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.check_flush_compaction_key_order = false; options.paranoid_file_checks = true; @@ -913,6 +921,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) { TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.check_flush_compaction_key_order = false; options.paranoid_file_checks = true; @@ -952,6 +961,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) { TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.check_flush_compaction_key_order = false; options.paranoid_file_checks = true; @@ -988,6 +998,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) { TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.create_if_missing = true; options.allow_data_in_errors = true; @@ -1017,6 +1028,7 @@ TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) { TEST_F(CorruptionTest, CompactionKeyOrderCheck) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.paranoid_file_checks = false; options.create_if_missing = true; @@ -1044,6 +1056,7 @@ TEST_F(CorruptionTest, CompactionKeyOrderCheck) { TEST_F(CorruptionTest, FlushKeyOrderCheck) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); options.paranoid_file_checks = false; options.create_if_missing = true; @@ -1097,6 +1110,7 @@ TEST_F(CorruptionTest, DisableKeyOrderCheck) { TEST_F(CorruptionTest, VerifyWholeTableChecksum) { CloseDb(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_.get(); ASSERT_OK(DestroyDB(dbname_, options)); options.create_if_missing = true; @@ -1182,6 +1196,7 @@ INSTANTIATE_TEST_CASE_P(CorruptionTest, CrashDuringRecoveryWithCorruptionTest, TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) { CloseDb(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.track_and_verify_wals_in_manifest = track_and_verify_wals_in_manifest_; options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; @@ -1354,6 +1369,7 @@ TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) { TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) { CloseDb(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; options.track_and_verify_wals_in_manifest = track_and_verify_wals_in_manifest_; @@ -1551,6 +1567,7 @@ TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) { TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecoveryWithFlush) { CloseDb(); Options options; + options.level_compaction_dynamic_level_bytes = false; options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; options.avoid_flush_during_recovery = false; options.env = env_.get(); diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index 7bd4dfda4..dec5c05a3 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -39,6 +39,7 @@ class CuckooTableDBTest : public testing::Test { Options CurrentOptions() { Options options; + options.level_compaction_dynamic_level_bytes = false; options.table_factory.reset(NewCuckooTableFactory()); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); options.allow_mmap_reads = true; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 46ea1fcae..34b2a6f88 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -8001,10 +8001,8 @@ TEST_F(DBCompactionTest, ChangeLevelErrorPathTest) { } TEST_F(DBCompactionTest, CompactionWithBlob) { - Options options; - options.env = env_; + Options options = CurrentOptions(); options.disable_auto_compactions = true; - Reopen(options); constexpr char first_key[] = "first_key"; @@ -8096,10 +8094,8 @@ INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobError, DBCompactionTestBlobError, "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); TEST_P(DBCompactionTestBlobError, CompactionError) { - Options options; + Options options = CurrentOptions(); options.disable_auto_compactions = true; - options.env = env_; - Reopen(options); constexpr char first_key[] = "first_key"; @@ -8265,8 +8261,7 @@ TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGCOverrides) { } TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGC) { - Options options; - options.env = env_; + Options options = CurrentOptions(); options.disable_auto_compactions = true; options.enable_blob_files = true; options.blob_file_size = 32; // one blob per file diff --git a/db/db_merge_operand_test.cc b/db/db_merge_operand_test.cc index 774ae4a96..b6b9ff2af 100644 --- a/db/db_merge_operand_test.cc +++ b/db/db_merge_operand_test.cc @@ -49,10 +49,8 @@ TEST_F(DBMergeOperandTest, CacheEvictedMergeOperandReadAfterFreeBug) { // There was a bug of reading merge operands after they are mistakely freed // in DB::GetMergeOperands, which is surfaced by cache full. // See PR#9507 for more. - Options options; - options.create_if_missing = true; + Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendOperator(); - options.env = env_; BlockBasedTableOptions table_options; // Small cache to simulate cache full @@ -121,11 +119,9 @@ TEST_F(DBMergeOperandTest, FlushedMergeOperandReadAfterFreeBug) { } TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) { - Options options; - options.create_if_missing = true; + Options options = CurrentOptions(); // Use only the latest two merge operands. options.merge_operator = std::make_shared(2, ','); - options.env = env_; Reopen(options); int num_records = 4; int number_of_operands = 0; @@ -309,13 +305,11 @@ TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) { } TEST_F(DBMergeOperandTest, BlobDBGetMergeOperandsBasic) { - Options options; - options.create_if_missing = true; + Options options = CurrentOptions(); options.enable_blob_files = true; options.min_blob_size = 0; // Use only the latest two merge operands. options.merge_operator = std::make_shared(2, ','); - options.env = env_; Reopen(options); int num_records = 4; int number_of_operands = 0; @@ -401,8 +395,7 @@ TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) { const int kNumOperands = 1024; const int kOperandLen = 1024; - Options options; - options.create_if_missing = true; + Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendOperator(); DestroyAndReopen(options); diff --git a/db/db_merge_operator_test.cc b/db/db_merge_operator_test.cc index 0d2687ca5..aa1253a0b 100644 --- a/db/db_merge_operator_test.cc +++ b/db/db_merge_operator_test.cc @@ -81,7 +81,7 @@ TEST_F(DBMergeOperatorTest, LimitMergeOperands) { size_t limit_ = 0; }; - Options options; + Options options = CurrentOptions(); options.create_if_missing = true; // Use only the latest two merge operands. options.merge_operator = std::make_shared(2, ','); @@ -134,7 +134,7 @@ TEST_F(DBMergeOperatorTest, LimitMergeOperands) { } TEST_F(DBMergeOperatorTest, MergeErrorOnRead) { - Options options; + Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.env = env_; @@ -147,7 +147,7 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnRead) { } TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) { - Options options; + Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.max_successive_merges = 3; @@ -163,7 +163,7 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) { } TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) { - Options options; + Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.env = env_; @@ -221,7 +221,7 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { // expect "k0" and "k2" to always be readable. "k1" is expected to be readable // only by APIs that do not require merging, such as `GetMergeOperands()`. const int kNumOperands = 3; - Options options; + Options options = CurrentOptions(); options.merge_operator.reset(new TestPutOperator()); options.env = env_; Reopen(options); @@ -361,7 +361,7 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { TEST_F(DBMergeOperatorTest, DataBlockBinaryAndHash) { // Basic test to check that merge operator works with data block index type // DataBlockBinaryAndHash. - Options options; + Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.env = env_; diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 729afdf3d..533103f3c 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -582,6 +582,7 @@ TEST_F(DBOptionsTest, EnableAutoCompactionAndTriggerStall) { TEST_F(DBOptionsTest, SetOptionsMayTriggerCompaction) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.create_if_missing = true; options.level0_file_num_compaction_trigger = 1000; options.env = env_; diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc index 7be05e93c..61dcf3c1e 100644 --- a/db/db_table_properties_test.cc +++ b/db/db_table_properties_test.cc @@ -282,6 +282,7 @@ TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) { Random rnd(301); Options options; + options.level_compaction_dynamic_level_bytes = false; options.create_if_missing = true; options.write_buffer_size = 4096; options.max_write_buffer_number = 2; diff --git a/db/db_test.cc b/db/db_test.cc index f2e664846..780b1c6ab 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5272,6 +5272,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { const uint64_t k1MB = 1 << 20; const uint64_t k4KB = 1 << 12; Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = env_; options.create_if_missing = true; options.compression = kNoCompression; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 6c4a1db94..5a64b2f3f 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -324,6 +324,12 @@ Options DBTestBase::GetDefaultOptions() const { options.max_open_files = 5000; options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; options.compaction_pri = CompactionPri::kByCompensatedSize; + // The original default value for this option is false, + // and many unit tests assume this value. It also makes + // it easier to create desired LSM shape in unit tests. + // Unit tests for this option sets level_compaction_dynamic_level_bytes=true + // explicitly. + options.level_compaction_dynamic_level_bytes = false; options.env = env_; if (!env_->skip_fsync_) { options.track_and_verify_wals_in_manifest = true; @@ -569,6 +575,8 @@ Options DBTestBase::GetOptions( if (set_block_based_table_factory) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); } + options.level_compaction_dynamic_level_bytes = + options_override.level_compaction_dynamic_level_bytes; options.env = env_; options.create_if_missing = true; options.fail_if_options_file_error = true; diff --git a/db/db_test_util.h b/db/db_test_util.h index 28dd23160..52e856cb3 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -114,6 +114,12 @@ struct OptionsOverride { // Used as a bit mask of individual enums in which to skip an XF test point int skip_policy = 0; + + // The default value for this option is changed from false to true. + // Keeping the default to false for unit tests as old unit tests assume + // this behavior. Tests for level_compaction_dynamic_level_bytes + // will set the option to true explicitly. + bool level_compaction_dynamic_level_bytes = false; }; } // namespace anon diff --git a/db/listener_test.cc b/db/listener_test.cc index d62768a42..206dba973 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -551,6 +551,7 @@ class TestCompactionReasonListener : public EventListener { TEST_F(EventListenerTest, CompactionReasonLevel) { Options options; + options.level_compaction_dynamic_level_bytes = false; options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset(test::NewSpecialSkipListFactory( diff --git a/db/manual_compaction_test.cc b/db/manual_compaction_test.cc index 2214ece0d..e9767ab99 100644 --- a/db/manual_compaction_test.cc +++ b/db/manual_compaction_test.cc @@ -190,6 +190,7 @@ TEST_F(ManualCompactionTest, Test) { TEST_F(ManualCompactionTest, SkipLevel) { DB* db; Options options; + options.level_compaction_dynamic_level_bytes = false; options.num_levels = 3; // Initially, flushed L0 files won't exceed 100. options.level0_file_num_compaction_trigger = 100; diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index cc28b9f19..d117639a4 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -123,6 +123,7 @@ class PlainTableDBTest : public testing::Test, // Return the current option configuration. Options CurrentOptions() { Options options; + options.level_compaction_dynamic_level_bytes = false; PlainTableOptions plain_table_options; plain_table_options.user_key_len = 0; diff --git a/db/version_set.cc b/db/version_set.cc index 7b070892c..f8b5ff6ce 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3460,12 +3460,11 @@ void VersionStorageInfo::ComputeCompactionScore( // Level-based involves L0->L0 compactions that can lead to oversized // L0 files. Take into account size as well to avoid later giant // compactions to the base level. - // If score in L0 is always too high, L0->L1 will always be - // prioritized over L1->L2 compaction and L1 will accumulate to - // too large. But if L0 score isn't high enough, L0 will accumulate - // and data is not moved to L1 fast enough. With potential L0->L0 - // compaction, number of L0 files aren't always an indication of - // L0 oversizing, and we also need to consider total size of L0. + // If score in L0 is always too high, L0->LBase will always be + // prioritized over LBase->LBase+1 compaction and LBase will + // accumulate to too large. But if L0 score isn't high enough, L0 will + // accumulate and data is not moved to LBase fast enough. The score + // calculation below takes into account L0 size vs LBase size. if (immutable_options.level_compaction_dynamic_level_bytes) { if (total_size >= mutable_cf_options.max_bytes_for_level_base) { // When calculating estimated_compaction_needed_bytes, we assume @@ -3477,10 +3476,13 @@ void VersionStorageInfo::ComputeCompactionScore( score = std::max(score, 1.01); } if (total_size > level_max_bytes_[base_level_]) { - // In this case, we compare L0 size with actual L1 size and make - // sure score is more than 1.0 (10.0 after scaled) if L0 is larger - // than L1. Since in this case L1 score is lower than 10.0, L0->L1 - // is prioritized over L1->L2. + // In this case, we compare L0 size with actual LBase size and + // make sure score is more than 1.0 (10.0 after scaled) if L0 is + // larger than LBase. Since LBase score = LBase size / + // (target size + total_downcompact_bytes) where + // total_downcompact_bytes = total_size > LBase size, + // LBase score is lower than 10.0. So L0->LBase is prioritized + // over LBase -> LBase+1. uint64_t base_level_size = 0; for (auto f : files_[base_level_]) { base_level_size += f->compensated_file_size; @@ -4703,7 +4705,7 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions, assert(base_level_ == 1); base_level_size = base_bytes_max; } else { - base_level_size = cur_level_size; + base_level_size = std::max(static_cast(1), cur_level_size); } } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index c0f6d1340..c7925749b 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -3547,6 +3547,7 @@ INSTANTIATE_TEST_CASE_P( TEST_P(ChargeFileMetadataTestWithParam, Basic) { Options options; + options.level_compaction_dynamic_level_bytes = false; BlockBasedTableOptions table_options; CacheEntryRoleOptions::Decision charge_file_metadata = GetParam(); table_options.cache_usage_options.options_overrides.insert( diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 817a72028..796d9be09 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -600,11 +600,11 @@ struct AdvancedColumnFamilyOptions { // 1. target size is in the range of // (max_bytes_for_level_base / max_bytes_for_level_multiplier, // max_bytes_for_level_base] - // 2. target size of the last level (level num_levels-1) equals to extra size - // of the level. - // At the same time max_bytes_for_level_multiplier and - // max_bytes_for_level_multiplier_additional are still satisfied. - // (When L0 is too large, we make some adjustment. See below.) + // 2. target size of the last level (level num_levels-1) equals to the max + // size of a level in the LSM (typically the last level). + // At the same time max_bytes_for_level_multiplier is still satisfied. + // Note that max_bytes_for_level_multiplier_additional is ignored with this + // flag on. // // With this option on, from an empty DB, we make last level the base level, // which means merging L0 data into the last level, until it exceeds @@ -642,60 +642,37 @@ struct AdvancedColumnFamilyOptions { // By doing it, we give max_bytes_for_level_multiplier a priority against // max_bytes_for_level_base, for a more predictable LSM tree shape. It is // useful to limit worse case space amplification. - // - // - // If the compaction from L0 is lagged behind, a special mode will be turned - // on to prioritize write amplification against max_bytes_for_level_multiplier - // or max_bytes_for_level_base. The L0 compaction is lagged behind by looking - // at number of L0 files and total L0 size. If number of L0 files is at least - // the double of level0_file_num_compaction_trigger, or the total size is - // at least max_bytes_for_level_base, this mode is on. The target of L1 grows - // to the actual data size in L0, and then determine the target for each level - // so that each level will have the same level multiplier. - // - // For example, when L0 size is 100MB, the size of last level is 1600MB, - // max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10. - // Since L0 size is larger than max_bytes_for_level_base, this is a L0 - // compaction backlogged mode. So that the L1 size is determined to be 100MB. - // Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will - // be needed. The level multiplier will be calculated to be 4 and the three - // levels' target to be [100MB, 400MB, 1600MB]. - // - // In this mode, The number of levels will be no more than the normal mode, - // and the level multiplier will be lower. The write amplification will - // likely to be reduced. - // - // - // max_bytes_for_level_multiplier_additional is ignored with this flag on. - // - // To make the migration easier, when turning this feature on, files in the - // LSM will be trivially moved down to fill the LSM starting from the - // bottommost level during DB open. For example, if the LSM looks like: - // L0: f0, f1 - // L1: f2, f3 - // L2: f4 - // L3: - // L4: f5 - // and the DB is opened with num_levels = 7 with this feature turned on, - // new LSM after DB open looks like the following: - // L0: f0, f1, (and possibly data flushed from WAL) - // L4: f2, f3 - // L5: f4 - // L6: f5 - // // If `allow_ingest_behind=true` or `preclude_last_level_data_seconds > 0`, // then the last level is reserved, and we will start filling LSM from the - // second last level (L5 in the above example). + // second last level. // + // With this option on, compaction is more adaptive to write traffic: + // Compaction priority will take into account estimated bytes to be compacted + // down to a level and favors compacting lower levels when there is a write + // traffic spike (and hence more compaction debt). Refer to + // https://github.com/facebook/rocksdb/wiki/Leveled-Compactio#option-level_compaction_dynamic_level_bytes-and-levels-target-size + // for more detailed description. See more implementation detail in: + // VersionStorageInfo::ComputeCompactionScore(). + // + // With this option on, unneeded levels will be drained automatically: // Note that there may be excessive levels (where target level size is 0 when - // computed based on this feature) in the LSM after a user migrates to turn - // this feature on. This is especially likely when a user migrates from - // leveled compaction with a smaller multiplier or from universal compaction. - // RocksDB will gradually drain these unnecessary levels by compacting files - // down the LSM. + // computed based on this feature) in the LSM. This can happen after a user + // migrates to turn this feature on or deletes a lot of data. This is + // especially likely when a user migrates from leveled compaction with a + // smaller multiplier or from universal compaction. RocksDB will gradually + // drain these unnecessary levels by compacting files down the LSM. Smaller + // number of levels should help to reduce read amplification. + // + // Migration to turn on this option: + // - Before RocksDB v8.2, users are expected to do a full manual compaction + // and then restart DB to turn on this option. + // - Since RocksDB v8.2, users can just restart DB with this option on, as + // long as num_levels is no smaller than number of non-empty levels in the + // LSM. Migration will be done automatically by RocksDB. See more in + // https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#migrating-from-level_compaction_dynamic_level_bytesfalse-to-level_compaction_dynamic_level_bytestrue // - // Default: false - bool level_compaction_dynamic_level_bytes = false; + // Default: true + bool level_compaction_dynamic_level_bytes = true; // Allows RocksDB to generate files that are not exactly the target_file_size // only for the non-bottommost files. Which can reduce the write-amplification @@ -714,6 +691,8 @@ struct AdvancedColumnFamilyOptions { // Different max-size multipliers for different levels. // These are multiplied by max_bytes_for_level_multiplier to arrive // at the max-size of each level. + // This option only applies to leveled compaction with + // `level_compaction_dynamic_level_bytes = false`. // // Default: 1 // diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index d0e7b4f38..3da65a848 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -818,25 +818,27 @@ public class RocksDBTest { final int NUM_L0_FILES = 10; final int TEST_SCALE = 5; final int KEY_INTERVAL = 100; - try (final Options opt = new Options(). - setCreateIfMissing(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(5). - // a slightly bigger write buffer than L0 file - // so that we can ensure manual flush always - // go before background flush happens. - setWriteBufferSize(L0_FILE_SIZE * 2). - // Disable auto L0 -> L1 compaction - setLevelZeroFileNumCompactionTrigger(20). - setTargetFileSizeBase(L0_FILE_SIZE * 100). - setTargetFileSizeMultiplier(1). - // To disable auto compaction - setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). - setMaxBytesForLevelMultiplier(2). - setDisableAutoCompactions(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()) - ) { + try (final Options opt = new Options() + .setCreateIfMissing(true) + .setCompactionStyle(CompactionStyle.LEVEL) + .setLevelCompactionDynamicLevelBytes(false) + .setNumLevels(5) + . + // a slightly bigger write buffer than L0 file + // so that we can ensure manual flush always + // go before background flush happens. + setWriteBufferSize(L0_FILE_SIZE * 2) + . + // Disable auto L0 -> L1 compaction + setLevelZeroFileNumCompactionTrigger(20) + .setTargetFileSizeBase(L0_FILE_SIZE * 100) + .setTargetFileSizeMultiplier(1) + . + // To disable auto compaction + setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100) + .setMaxBytesForLevelMultiplier(2) + .setDisableAutoCompactions(true); + final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // fill database with key/value pairs final byte[] value = new byte[VALUE_SIZE]; int int_key = 0; @@ -904,7 +906,8 @@ public class RocksDBTest { .setCompressionType(CompressionType.NO_COMPRESSION) .setTargetFileSizeBase(FILE_SIZE) .setWriteBufferSize(FILE_SIZE / 2) - .setDisableAutoCompactions(true); + .setDisableAutoCompactions(true) + .setLevelCompactionDynamicLevelBytes(false); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final int records = FILE_SIZE / (KEY_SIZE + VALUE_SIZE); @@ -954,25 +957,28 @@ public class RocksDBTest { final int TEST_SCALE = 5; final int KEY_INTERVAL = 100; - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(5). + try (final DBOptions opt = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final ColumnFamilyOptions new_cf_opts = + new ColumnFamilyOptions() + .setCompactionStyle(CompactionStyle.LEVEL) + .setLevelCompactionDynamicLevelBytes(false) + .setNumLevels(5) + . // a slightly bigger write buffer than L0 file // so that we can ensure manual flush always // go before background flush happens. - setWriteBufferSize(L0_FILE_SIZE * 2). + setWriteBufferSize(L0_FILE_SIZE * 2) + . // Disable auto L0 -> L1 compaction - setLevelZeroFileNumCompactionTrigger(20). - setTargetFileSizeBase(L0_FILE_SIZE * 100). - setTargetFileSizeMultiplier(1). + setLevelZeroFileNumCompactionTrigger(20) + .setTargetFileSizeBase(L0_FILE_SIZE * 100) + .setTargetFileSizeMultiplier(1) + . // To disable auto compaction - setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). - setMaxBytesForLevelMultiplier(2). - setDisableAutoCompactions(true) - ) { + setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100) + .setMaxBytesForLevelMultiplier(2) + .setDisableAutoCompactions(true)) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), @@ -1267,15 +1273,16 @@ public class RocksDBTest { final byte[] cfName = "pikachu".getBytes(UTF_8); try (final Options options = new Options() - .setCreateIfMissing(true) - .setWriteBufferSize(writeBufferSize) - .setCompactionStyle(CompactionStyle.LEVEL) - .setTargetFileSizeBase(writeBufferSize) - .setMaxBytesForLevelBase(writeBufferSize * 2) - .setLevel0StopWritesTrigger(2) - .setMaxBytesForLevelMultiplier(2) - .setCompressionType(CompressionType.NO_COMPRESSION) - .setMaxSubcompactions(4)) { + .setCreateIfMissing(true) + .setWriteBufferSize(writeBufferSize) + .setCompactionStyle(CompactionStyle.LEVEL) + .setLevelCompactionDynamicLevelBytes(false) + .setTargetFileSizeBase(writeBufferSize) + .setMaxBytesForLevelBase(writeBufferSize * 2) + .setLevel0StopWritesTrigger(2) + .setMaxBytesForLevelMultiplier(2) + .setCompressionType(CompressionType.NO_COMPRESSION) + .setMaxSubcompactions(4)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath); final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) { diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc index 97f8030b7..229911eaa 100644 --- a/tools/reduce_levels_test.cc +++ b/tools/reduce_levels_test.cc @@ -81,6 +81,7 @@ class ReduceLevelTest : public testing::Test { Status ReduceLevelTest::OpenDB(bool create_if_missing, int num_levels) { ROCKSDB_NAMESPACE::Options opt; + opt.level_compaction_dynamic_level_bytes = false; opt.num_levels = num_levels; opt.create_if_missing = create_if_missing; ROCKSDB_NAMESPACE::Status st = diff --git a/unreleased_history/behavior_changes/010_default_level_dynamic.md b/unreleased_history/behavior_changes/010_default_level_dynamic.md new file mode 100644 index 000000000..3382a7e4b --- /dev/null +++ b/unreleased_history/behavior_changes/010_default_level_dynamic.md @@ -0,0 +1 @@ +Change the default value for option `level_compaction_dynamic_level_bytes` to true. This affects users who use leveled compaction and do not set this option explicitly. These users may see additional background compactions following DB open. These compactions help to shape the LSM according to `level_compaction_dynamic_level_bytes` such that the size of each level Ln is approximately size of Ln-1 * `max_bytes_for_level_multiplier`. Turning on this option has other benefits too: see more detail in wiki: https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#option-level_compaction_dynamic_level_bytes-and-levels-target-size and in option comment in advanced_options.h (#11525). \ No newline at end of file