diff --git a/HISTORY.md b/HISTORY.md index c03180849..694b90a53 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,7 +1,8 @@ # Rocksdb Change Log ## Unreleased -### Behavior Changes * When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. +### Default Option Change +* Change default memtable_prefix_bloom_size_ratio from 0 to 0.015 and memtable_whole_key_filtering from false to true. It means that memtable bloom filter will be on, which uses up to 1.5% of memtable space. ### New Features * Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`. diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index 9b76c03d5..87120d147 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -44,6 +44,8 @@ class CuckooTableDBTest : public testing::Test { options.allow_mmap_reads = true; options.create_if_missing = true; options.allow_concurrent_memtable_write = false; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; return options; } diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 7c9277c14..c83b503b4 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1867,6 +1867,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; + // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.filter_policy.reset(new BFP(10, bfp_impl)); diff --git a/db/db_test.cc b/db/db_test.cc index dbe4a161b..df231ccbc 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -4001,6 +4001,8 @@ TEST_F(DBTest, DynamicMemtableOptions) { options.level0_file_num_compaction_trigger = 1024; options.level0_slowdown_writes_trigger = 1024; options.level0_stop_writes_trigger = 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); auto gen_l0_kb = [this](int size) { diff --git a/db/db_test2.cc b/db/db_test2.cc index 33c13e69c..3a1bf71fd 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -156,6 +156,8 @@ class PartitionedIndexTestListener : public EventListener { TEST_F(DBTest2, PartitionedIndexUserToInternalKey) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); + // Hold all data until manual flush. + options.memtable_factory.reset(new SpecialSkipListFactory(5000)); table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; PartitionedIndexTestListener* listener = new PartitionedIndexTestListener(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 3a104346d..46d7c4478 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -344,6 +344,8 @@ Options DBTestBase::GetDefaultOptions() const { if (!env_->skip_fsync_) { options.track_and_verify_wals_in_manifest = true; } + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; return options; } diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 548b8ae0e..f669c1d4a 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -110,6 +110,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) { options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; options.target_file_size_base = 32 << 10; // 32KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // trigger compaction if there are >= 4 files KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); @@ -144,6 +146,8 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); @@ -213,6 +217,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) { options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); options.compaction_filter_factory.reset(filter); @@ -317,6 +323,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -359,6 +367,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionSizeAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.size_ratio = 100; @@ -439,6 +449,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionReadAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.max_size_amplification_percent = 2000; @@ -540,6 +552,8 @@ TEST_P(DBTestUniversalCompaction, CompactFilesOnUniversalCompaction) { options.create_if_missing = true; options.compaction_style = kCompactionStyleLevel; options.num_levels = 1; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = options.write_buffer_size; options.compression = kNoCompression; options = CurrentOptions(options); @@ -605,6 +619,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTargetLevel) { options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB options.num_levels = 7; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.disable_auto_compactions = true; DestroyAndReopen(options); @@ -651,6 +667,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) { options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 * 1024; CreateAndReopenWithCF({"pikachu"}, options); @@ -696,6 +714,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) { options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 2; options.target_file_size_base = 32 * 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -741,6 +761,8 @@ TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) { options.max_background_compactions = 3; options.max_background_flushes = 3; options.target_file_size_base = 1 * 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.compaction_options_universal.max_size_amplification_percent = 110; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -800,6 +822,8 @@ TEST_P(DBTestUniversalCompactionParallel, PickByFileNumberBug) { options.level0_file_num_compaction_trigger = 7; options.max_background_compactions = 2; options.target_file_size_base = 1024 * 1024; // 1MB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Disable size amplifiction compaction options.compaction_options_universal.max_size_amplification_percent = @@ -916,6 +940,8 @@ INSTANTIATE_TEST_CASE_P(Parallel, DBTestUniversalCompactionParallel, TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) { Options options = CurrentOptions(); + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB @@ -951,6 +977,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) { options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; @@ -1037,7 +1065,9 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 100 << 10; // 100KB + options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; @@ -1105,6 +1135,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; @@ -1150,6 +1182,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest1) { options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 2; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 1; options.target_file_size_base = 32 * 1024; @@ -1196,6 +1230,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest2) { options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 15; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 2; options.target_file_size_base = 64 * 1024; @@ -1235,6 +1271,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) { options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; @@ -1339,6 +1377,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCFPathUse) { options.compaction_options_universal.size_ratio = 10; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; @@ -1497,6 +1537,8 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options.write_buffer_size = 200 << 10; // 200KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 3; options.memtable_factory.reset(new SpecialSkipListFactory(KNumKeysPerFile)); options = CurrentOptions(options); @@ -1576,6 +1618,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) { options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; options.memtable_factory.reset( @@ -1679,6 +1723,8 @@ TEST_P(DBTestUniversalCompaction, ConcurrentBottomPriLowPriCompactions) { options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = kNumFilesTrigger; // Trigger compaction if size amplification exceeds 110% @@ -1736,6 +1782,8 @@ TEST_P(DBTestUniversalCompaction, RecalculateScoreAfterPicking) { options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = kNumFilesTrigger; options.num_levels = num_levels_; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Reopen(options); std::atomic num_compactions_attempted(0); @@ -1836,6 +1884,8 @@ TEST_P(DBTestUniversalManualCompactionOutputPathId, options.num_levels = num_levels_; options.target_file_size_base = 1 << 30; // Big size options.level0_file_num_compaction_trigger = 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Destroy(options); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -1903,6 +1953,8 @@ TEST_F(DBTestUniversalCompaction2, BasicL0toL1) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -1946,6 +1998,8 @@ TEST_F(DBTestUniversalCompaction2, SingleLevel) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -1984,6 +2038,8 @@ TEST_F(DBTestUniversalCompaction2, MultipleLevels) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2056,6 +2112,8 @@ TEST_F(DBTestUniversalCompaction2, OverlappingL0) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2098,6 +2156,8 @@ TEST_F(DBTestUniversalCompaction2, IngestBehind) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2160,6 +2220,8 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) { opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; opts.periodic_compaction_seconds = 48 * 60 * 60; // 2 days + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; opts.num_levels = 5; env_->SetMockSleep(); Reopen(opts); diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 7c6d15bfd..af42f8dee 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -30,6 +30,8 @@ class MemTableListTest : public testing::Test { MemTableListTest() : db(nullptr), file_number(1) { dbname = test::PerThreadDBPath("memtable_list_test"); options.create_if_missing = true; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; EXPECT_OK(DestroyDB(dbname, options)); } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index a7d9f542f..f6c012373 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -300,19 +300,19 @@ struct AdvancedColumnFamilyOptions { // write_buffer_size * memtable_prefix_bloom_size_ratio. // If it is larger than 0.25, it is sanitized to 0.25. // - // Default: 0 (disable) + // Default: 0.015 // // Dynamically changeable through SetOptions() API - double memtable_prefix_bloom_size_ratio = 0.0; + double memtable_prefix_bloom_size_ratio = 0.015; // Enable whole key bloom filter in memtable. Note this will only take effect // if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering // can potentially reduce CPU usage for point-look-ups. // - // Default: false (disable) + // Default: true (enable) // // Dynamically changeable through SetOptions() API - bool memtable_whole_key_filtering = false; + bool memtable_whole_key_filtering = true; // Page size for huge page for the arena used by the memtable. If <=0, it // won't allocate from huge page but from malloc. diff --git a/options/options.cc b/options/options.cc index d76a15441..eb0e6c0ed 100644 --- a/options/options.cc +++ b/options/options.cc @@ -484,6 +484,11 @@ DBOptions* DBOptions::OldDefaults(int rocksdb_major_version, ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( int rocksdb_major_version, int rocksdb_minor_version) { + if (rocksdb_major_version < 6 || + (rocksdb_major_version == 6 && rocksdb_minor_version < 18)) { + memtable_prefix_bloom_size_ratio = 0; + memtable_whole_key_filtering = false; + } if (rocksdb_major_version < 5 || (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) { compaction_pri = CompactionPri::kByCompensatedSize; @@ -501,7 +506,6 @@ ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) { level0_stop_writes_trigger = 30; } - return this; } diff --git a/options/options_test.cc b/options/options_test.cc index b15be0206..929695921 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -3052,6 +3052,15 @@ TEST_F(OptionsParserTest, DifferentDefault) { old_default_opts.OldDefaults(5, 18); ASSERT_TRUE(old_default_opts.compaction_pri == CompactionPri::kByCompensatedSize); + ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio); + ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering); + } + + { + Options old_default_opts; + old_default_opts.OldDefaults(6, 17); + ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio); + ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering); } Options small_opts; diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index ad27bd964..ac0b9165b 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -319,6 +319,8 @@ TEST_P(OptimisticTransactionTest, CheckKeySkipOldMemtable) { for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable; attempt++) { options.max_write_buffer_number_to_maintain = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Reopen(); WriteOptions write_options; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 9c4ce5604..06e31a8cd 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -5672,6 +5672,10 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(ReOpen()); std::unique_ptr comp_gc(new ThreeBytewiseComparator()); cf_options.comparator = comp_gc.get(); + // ThreeBytewiseComparator won't work with bloom filters. + cf_options.memtable_whole_key_filtering = false; + cf_options.memtable_prefix_bloom_size_ratio = 0; + ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle)); WriteOptions write_options; WriteBatch batch;