diff --git a/db/builder.cc b/db/builder.cc index 5d3273e78..4b8885f5b 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -30,9 +30,12 @@ TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions, const InternalKeyComparator& internal_comparator, WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) { - return ioptions.table_factory->NewTableBuilder( - ioptions, internal_comparator, file, compression_type, compression_opts); + const CompressionOptions& compression_opts, + const bool skip_filters) { + return ioptions.table_factory->NewTableBuilder(ioptions, internal_comparator, + file, compression_type, + compression_opts, + skip_filters); } Status BuildTable(const std::string& dbname, Env* env, diff --git a/db/builder.h b/db/builder.h index cf3ebd1ae..3da05d8b4 100644 --- a/db/builder.h +++ b/db/builder.h @@ -26,11 +26,12 @@ class VersionEdit; class TableBuilder; class WritableFile; -extern TableBuilder* NewTableBuilder( - const ImmutableCFOptions& options, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts); +TableBuilder* NewTableBuilder(const ImmutableCFOptions& options, + const InternalKeyComparator& internal_comparator, + WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skip_filters = false); // Build a Table file from the contents of *iter. The generated file // will be named according to number specified in meta. On success, the rest of diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 00c7b52f9..36cc46412 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -1062,10 +1062,20 @@ Status CompactionJob::OpenCompactionOutputFile() { compact_->compaction->OutputFilePreallocationSize(mutable_cf_options_))); ColumnFamilyData* cfd = compact_->compaction->column_family_data(); + bool skip_filters = false; + + // If the Column family flag is to only optimize filters for hits, + // we can skip creating filters if this is the bottommost_level where + // data is going to be found + // + if (cfd->ioptions()->optimize_filters_for_hits && bottommost_level_) { + skip_filters = true; + } + compact_->builder.reset(NewTableBuilder( *cfd->ioptions(), cfd->internal_comparator(), compact_->outfile.get(), compact_->compaction->OutputCompressionType(), - cfd->ioptions()->compression_opts)); + cfd->ioptions()->compression_opts, skip_filters)); LogFlush(db_options_.info_log); return s; } diff --git a/db/db_test.cc b/db/db_test.cc index aeb47aeef..884d91e3b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -419,7 +419,8 @@ class DBTest { kInfiniteMaxOpenFiles = 23, kxxHashChecksum = 24, kFIFOCompaction = 25, - kEnd = 26 + kOptimizeFiltersForHits = 26, + kEnd = 27 }; int option_config_; @@ -682,6 +683,12 @@ class DBTest { options.prefix_extractor.reset(NewNoopTransform()); break; } + case kOptimizeFiltersForHits: { + options.optimize_filters_for_hits = true; + set_block_based_table_factory = true; + break; + } + default: break; } @@ -10797,6 +10804,58 @@ TEST(DBTest, DeleteMovedFileAfterCompaction) { } } +TEST(DBTest, OptimizeFiltersForHits) { + Options options = CurrentOptions(); + options.write_buffer_size = 32 * 1024; + options.target_file_size_base = 32 * 1024; + options.level0_file_num_compaction_trigger = 2; + options.level0_slowdown_writes_trigger = 2; + options.level0_stop_writes_trigger = 4; + options.max_bytes_for_level_base = 64 * 1024; + options.max_write_buffer_number = 2; + options.max_background_compactions = 8; + options.max_background_flushes = 8; + options.compaction_style = kCompactionStyleLevel; + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); + bbto.whole_key_filtering = true; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.optimize_filters_for_hits = true; + options.statistics = rocksdb::CreateDBStatistics(); + CreateAndReopenWithCF({"mypikachu"}, options); + + int numkeys = 200000; + for (int i = 0; i < 20; i += 2) { + for (int j = i; j < numkeys; j += 20) { + ASSERT_OK(Put(1, Key(j), "val")); + } + } + + + ASSERT_OK(Flush(1)); + dbfull()->TEST_WaitForCompact(); + + for (int i = 1; i < numkeys; i += 2) { + ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); + } + + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); + ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); + + // When the skip_filters_on_last_level is ON, the last level which has + // most of the keys does not use bloom filters. We end up using + // bloom filters in a very small number of cases. Without the flag. + // this number would be close to 150000 (all the key at the last level) + + // some use in the upper levels + // + ASSERT_GT(90000, TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); + + for (int i = 0; i < numkeys; i += 2) { + ASSERT_EQ(Get(1, Key(i)), "val"); + } +} + TEST(DBTest, L0L1L2AndUpHitCounter) { Options options = CurrentOptions(); options.write_buffer_size = 32 * 1024; diff --git a/include/rocksdb/immutable_options.h b/include/rocksdb/immutable_options.h index adf46d647..790685fbc 100644 --- a/include/rocksdb/immutable_options.h +++ b/include/rocksdb/immutable_options.h @@ -91,6 +91,8 @@ struct ImmutableCFOptions { int num_levels; + bool optimize_filters_for_hits; + #ifndef ROCKSDB_LITE // A vector of EventListeners which call-back functions will be called // when specific RocksDB event happens. diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index bfe88fe0c..cbc0093f5 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -610,6 +610,22 @@ struct ColumnFamilyOptions { // Default: 2 uint32_t min_partial_merge_operands; + // This flag specifies that the implementation should optimize the filters + // mainly for cases where keys are found rather than also optimize for keys + // missed. This would be used in cases where the application knows that + // there are very few misses or the performance in the case of misses is not + // important. + // + // For now, this flag allows us to not store filters for the last level i.e + // the largest level which contains data of the LSM store. For keys which + // are hits, the filters in this level are not useful because we will search + // for the data anyway. NOTE: the filters in other levels are still useful + // even for key hit because they tell us whether to look in that level or go + // to the higher level. + // + // Default: false + bool optimize_filters_for_hits; + #ifndef ROCKSDB_LITE // A vector of EventListeners which call-back functions will be called // when specific RocksDB event happens. diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 655b3926e..f912f682c 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -371,9 +371,10 @@ class TableFactory { // to use in this table. virtual TableBuilder* NewTableBuilder( const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) const = 0; + const InternalKeyComparator& internal_comparator, WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skipFilters = false) const = 0; // Sanitizes the specified DB Options and ColumnFamilyOptions. // diff --git a/table/adaptive_table_factory.cc b/table/adaptive_table_factory.cc index c693064af..ddc691978 100644 --- a/table/adaptive_table_factory.cc +++ b/table/adaptive_table_factory.cc @@ -65,11 +65,13 @@ Status AdaptiveTableFactory::NewTableReader( TableBuilder* AdaptiveTableFactory::NewTableBuilder( const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) const { + const InternalKeyComparator& internal_comparator, WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skip_filters = false) const { return table_factory_to_write_->NewTableBuilder( - ioptions, internal_comparator, file, compression_type, compression_opts); + ioptions, internal_comparator, file, compression_type, compression_opts, + skip_filters); } std::string AdaptiveTableFactory::GetPrintableTableOptions() const { diff --git a/table/adaptive_table_factory.h b/table/adaptive_table_factory.h index 3c6455f90..f7bda301f 100644 --- a/table/adaptive_table_factory.h +++ b/table/adaptive_table_factory.h @@ -39,12 +39,12 @@ class AdaptiveTableFactory : public TableFactory { unique_ptr&& file, uint64_t file_size, unique_ptr* table) const override; - TableBuilder* NewTableBuilder( - const ImmutableCFOptions& ioptions, - const InternalKeyComparator& icomparator, - WritableFile* file, - const CompressionType compression_type, - const CompressionOptions& compression_opts) const override; + TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions, + const InternalKeyComparator& icomparator, + WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skip_filters) const override; // Sanitizes the specified DB Options. Status SanitizeOptions(const DBOptions& db_opts, diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index be22f15bc..bea38d9b5 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -462,7 +462,7 @@ struct BlockBasedTableBuilder::Rep { const BlockBasedTableOptions& table_opt, const InternalKeyComparator& icomparator, WritableFile* f, const CompressionType _compression_type, - const CompressionOptions& _compression_opts) + const CompressionOptions& _compression_opts, const bool skip_filters) : ioptions(_ioptions), table_options(table_opt), internal_comparator(icomparator), @@ -474,7 +474,8 @@ struct BlockBasedTableBuilder::Rep { &this->internal_prefix_transform)), compression_type(_compression_type), compression_opts(_compression_opts), - filter_block(CreateFilterBlockBuilder(_ioptions, table_options)), + filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder( + _ioptions, table_options)), flush_block_policy( table_options.flush_block_policy_factory->NewFlushBlockPolicy( table_options, data_block)) { @@ -495,7 +496,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder( const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) { + const CompressionOptions& compression_opts, const bool skip_filters) { BlockBasedTableOptions sanitized_table_options(table_options); if (sanitized_table_options.format_version == 0 && sanitized_table_options.checksum != kCRC32c) { @@ -508,7 +509,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder( } rep_ = new Rep(ioptions, sanitized_table_options, internal_comparator, file, - compression_type, compression_opts); + compression_type, compression_opts, skip_filters); + if (rep_->filter_block != nullptr) { rep_->filter_block->StartBlock(0); } diff --git a/table/block_based_table_builder.h b/table/block_based_table_builder.h index 6fde32919..929c15f19 100644 --- a/table/block_based_table_builder.h +++ b/table/block_based_table_builder.h @@ -33,7 +33,8 @@ class BlockBasedTableBuilder : public TableBuilder { const InternalKeyComparator& internal_comparator, WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts); + const CompressionOptions& compression_opts, + const bool skip_filters); // REQUIRES: Either Finish() or Abandon() has been called. ~BlockBasedTableBuilder(); diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index f4b6214a1..80053ca53 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -53,13 +53,12 @@ Status BlockBasedTableFactory::NewTableReader( TableBuilder* BlockBasedTableFactory::NewTableBuilder( const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) const { - + const InternalKeyComparator& internal_comparator, WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, const bool skip_filters) const { auto table_builder = new BlockBasedTableBuilder( - ioptions, table_options_, internal_comparator, file, - compression_type, compression_opts); + ioptions, table_options_, internal_comparator, file, compression_type, + compression_opts, skip_filters); return table_builder; } diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 54eaa5a99..f5f398415 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -54,9 +54,10 @@ class BlockBasedTableFactory : public TableFactory { TableBuilder* NewTableBuilder( const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) const override; + const InternalKeyComparator& internal_comparator, WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skip_filters = false) const override; // Sanitizes the specified DB Options. Status SanitizeOptions(const DBOptions& db_opts, diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 4afc9fc2e..682329d5c 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -27,9 +27,12 @@ Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions, TableBuilder* CuckooTableFactory::NewTableBuilder( const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_comparator, - WritableFile* file, const CompressionType, - const CompressionOptions&) const { + const InternalKeyComparator& internal_comparator, WritableFile* file, + const CompressionType, const CompressionOptions&, + const bool skip_filters) const { + // Ignore the skipFIlters flag. Does not apply to this file format + // + // TODO: change builder to take the option struct return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64, table_options_.max_search_depth, internal_comparator.user_comparator(), diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 625fd9995..2a004cc34 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -60,8 +60,10 @@ class CuckooTableFactory : public TableFactory { unique_ptr* table) const override; TableBuilder* NewTableBuilder(const ImmutableCFOptions& options, - const InternalKeyComparator& icomparator, WritableFile* file, - const CompressionType, const CompressionOptions&) const override; + const InternalKeyComparator& icomparator, + WritableFile* file, const CompressionType, + const CompressionOptions&, + const bool skip_filters = false) const override; // Sanitizes the specified DB Options. Status SanitizeOptions(const DBOptions& db_opts, diff --git a/table/mock_table.cc b/table/mock_table.cc index 70adf2da6..9f6c1ed9e 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -65,7 +65,7 @@ TableBuilder* MockTableFactory::NewTableBuilder( const ImmutableCFOptions& ioptions, const InternalKeyComparator& internal_key, WritableFile* file, const CompressionType compression_type, - const CompressionOptions& compression_opts) const { + const CompressionOptions& compression_opts, const bool skip_filters) const { uint32_t id = GetAndWriteNextID(file); return new MockTableBuilder(id, &file_system_); diff --git a/table/mock_table.h b/table/mock_table.h index 175ca79cb..cf6a4fd4a 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -136,16 +136,16 @@ class MockTableFactory : public TableFactory { MockTableFactory(); const char* Name() const override { return "MockTable"; } Status NewTableReader(const ImmutableCFOptions& ioptions, - const EnvOptions& env_options, - const InternalKeyComparator& internal_key, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader) const override; - - TableBuilder* NewTableBuilder( - const ImmutableCFOptions& ioptions, - const InternalKeyComparator& internal_key, WritableFile* file, - const CompressionType compression_type, - const CompressionOptions& compression_opts) const override; + const EnvOptions& env_options, + const InternalKeyComparator& internal_key, + unique_ptr&& file, uint64_t file_size, + unique_ptr* table_reader) const override; + TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions, + const InternalKeyComparator& internal_key, + WritableFile* file, + const CompressionType compression_type, + const CompressionOptions& compression_opts, + const bool skip_filters = false) const override; // This function will directly create mock table instead of going through // MockTableBuilder. MockFileContents has to have a format of && file, uint64_t file_size, unique_ptr* table) const override; - TableBuilder* NewTableBuilder( - const ImmutableCFOptions& options, - const InternalKeyComparator& icomparator, - WritableFile* file, - const CompressionType, - const CompressionOptions&) const override; + TableBuilder* NewTableBuilder(const ImmutableCFOptions& options, + const InternalKeyComparator& icomparator, + WritableFile* file, const CompressionType, + const CompressionOptions&, + const bool skip_filters = false) const override; std::string GetPrintableTableOptions() const override; diff --git a/util/options.cc b/util/options.cc index c16ba1928..cfebae61e 100644 --- a/util/options.cc +++ b/util/options.cc @@ -66,7 +66,8 @@ ImmutableCFOptions::ImmutableCFOptions(const Options& options) compression_per_level(options.compression_per_level), compression_opts(options.compression_opts), access_hint_on_compaction_start(options.access_hint_on_compaction_start), - num_levels(options.num_levels) + num_levels(options.num_levels), + optimize_filters_for_hits(options.optimize_filters_for_hits) #ifndef ROCKSDB_LITE , listeners(options.listeners) {} #else // ROCKSDB_LITE @@ -119,7 +120,8 @@ ColumnFamilyOptions::ColumnFamilyOptions() memtable_prefix_bloom_huge_page_tlb_size(0), bloom_locality(0), max_successive_merges(0), - min_partial_merge_operands(2) + min_partial_merge_operands(2), + optimize_filters_for_hits(false) #ifndef ROCKSDB_LITE , listeners() { #else // ROCKSDB_LITE @@ -184,7 +186,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) options.memtable_prefix_bloom_huge_page_tlb_size), bloom_locality(options.bloom_locality), max_successive_merges(options.max_successive_merges), - min_partial_merge_operands(options.min_partial_merge_operands) + min_partial_merge_operands(options.min_partial_merge_operands), + optimize_filters_for_hits(options.optimize_filters_for_hits) #ifndef ROCKSDB_LITE , listeners(options.listeners) { #else // ROCKSDB_LITE @@ -240,7 +243,8 @@ DBOptions::DBOptions() access_hint_on_compaction_start(NORMAL), use_adaptive_mutex(false), bytes_per_sync(0), - enable_thread_tracking(false) {} + enable_thread_tracking(false) { +} DBOptions::DBOptions(const Options& options) : create_if_missing(options.create_if_missing), @@ -342,7 +346,7 @@ void DBOptions::Dump(Logger* log) const { stats_dump_period_sec); Log(log, " Options.advise_random_on_open: %d", advise_random_on_open); - Log(log, " Options.db_write_buffer_size: %zd", + Log(log, " Options.db_write_buffer_size: %zd", db_write_buffer_size); Log(log, " Options.access_hint_on_compaction_start: %s", access_hints[access_hint_on_compaction_start]); @@ -352,7 +356,7 @@ void DBOptions::Dump(Logger* log) const { rate_limiter.get()); Log(log, " Options.bytes_per_sync: %" PRIu64, bytes_per_sync); - Log(log, " enable_thread_tracking: %d", + Log(log, " Options.enable_thread_tracking: %d", enable_thread_tracking); } // DBOptions::Dump @@ -477,6 +481,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const { bloom_locality); Log(log, " Options.max_successive_merges: %zd", max_successive_merges); + Log(log, " Options.optimize_fllters_for_hits: %d", + optimize_filters_for_hits); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/util/options_helper.cc b/util/options_helper.cc index bed3a8f85..028f8f38b 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -447,6 +447,8 @@ bool ParseColumnFamilyOption(const std::string& name, const std::string& value, } else { return false; } + } else if (name == "optimize_filters_for_hits") { + new_options->optimize_filters_for_hits = ParseBoolean(name, value); } else { return false; } diff --git a/util/options_test.cc b/util/options_test.cc index 25efeadbe..001e6aa86 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -134,7 +134,8 @@ TEST(OptionsTest, GetOptionsFromMapTest) { {"bloom_locality", "29"}, {"max_successive_merges", "30"}, {"min_partial_merge_operands", "31"}, - {"prefix_extractor", "fixed:31"} + {"prefix_extractor", "fixed:31"}, + {"optimize_filters_for_hits", "true"}, }; std::unordered_map db_options_map = { @@ -226,6 +227,7 @@ TEST(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U); ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr); + ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()), "rocksdb.FixedPrefix.31"); @@ -395,6 +397,15 @@ TEST(OptionsTest, GetColumnFamilyOptionsFromStringTest) { "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={xx_block_size=4;}", &new_cf_opt)); + ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, + "optimize_filters_for_hits=true", + &new_cf_opt)); + ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, + "optimize_filters_for_hits=false", + &new_cf_opt)); + ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, + "optimize_filters_for_hits=junk", + &new_cf_opt)); } #endif // !ROCKSDB_LITE