From e089db40f9c8f2a8af466377ed0f6fd8a3c26456 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 23 Dec 2015 10:15:07 -0800 Subject: [PATCH] Skip bottom-level filter block caching when hit-optimized Summary: When Get() or NewIterator() trigger file loads, skip caching the filter block if (1) optimize_filters_for_hits is set and (2) the file is on the bottommost level. Also skip checking filters under the same conditions, which means that for a preloaded file or a file that was trivially-moved to the bottom level, its filter block will eventually expire from the cache. - added parameters/instance variables in various places in order to propagate the config ("skip_filters") from version_set to block_based_table_reader - in BlockBasedTable::Rep, this optimization prevents filter from being loaded when the file is opened simply by setting filter_policy = nullptr - in BlockBasedTable::Get/BlockBasedTable::NewIterator, this optimization prevents filter from being used (even if it was loaded already) by setting filter = nullptr Test Plan: updated unit test: $ ./db_test --gtest_filter=DBTest.OptimizeFiltersForHits will also run 'make check' Reviewers: sdong, igor, paultuckfield, anthony, rven, kradhakrishnan, IslamAbdelRahman, yhchiang Reviewed By: yhchiang Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D51633 --- db/db_test.cc | 108 ++++++++++++++++++++++++++++- db/table_cache.cc | 29 ++++---- db/table_cache.h | 14 ++-- db/version_set.cc | 28 ++++++-- db/version_set.h | 6 ++ table/block_based_table_factory.cc | 3 +- table/block_based_table_reader.cc | 41 ++++++----- table/block_based_table_reader.h | 19 +++-- table/cuckoo_table_reader.cc | 4 +- table/cuckoo_table_reader.h | 6 +- table/mock_table.cc | 6 +- table/mock_table.h | 7 +- table/plain_table_reader.cc | 5 +- table/plain_table_reader.h | 8 +-- table/table_builder.h | 10 ++- table/table_reader.h | 9 ++- tools/sst_dump_tool.cc | 3 +- 17 files changed, 234 insertions(+), 72 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 2dc5c82d3..1af0f936d 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -7986,6 +7986,7 @@ TEST_F(DBTest, OptimizeFiltersForHits) { options.compaction_style = kCompactionStyleLevel; options.level_compaction_dynamic_level_bytes = true; BlockBasedTableOptions bbto; + bbto.cache_index_and_filter_blocks = true; bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); @@ -8034,13 +8035,118 @@ TEST_F(DBTest, OptimizeFiltersForHits) { ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); // Now we have three sorted run, L0, L5 and L6 with most files in L6 have - // no blooom filter. Most keys be checked bloom filters twice. + // no bloom filter. Most keys be checked bloom filters twice. ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); for (int i = 0; i < numkeys; i += 2) { ASSERT_EQ(Get(1, Key(i)), "val"); } + + // Part 2 (read path): rewrite last level with blooms, then verify they get + // cached only if !optimize_filters_for_hits + options.disable_auto_compactions = true; + options.num_levels = 9; + options.optimize_filters_for_hits = false; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + MoveFilesToLevel(7 /* level */, 1 /* column family index */); + + std::string value = Get(1, Key(0)); + int prev_cache_filter_hits = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + value = Get(1, Key(0)); + ASSERT_EQ(prev_cache_filter_hits + 1, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + // Now that we know the filter blocks exist in the last level files, see if + // filter caching is skipped for this optimization + options.optimize_filters_for_hits = true; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + value = Get(1, Key(0)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(2 /* index and data block */, + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + // Check filter block ignored for files preloaded during DB::Open() + options.max_open_files = -1; + options.statistics = CreateDBStatistics(); + bbto.block_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + int prev_cache_filter_misses = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + Get(1, Key(0)); + ASSERT_EQ(prev_cache_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(prev_cache_filter_hits, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + + // Check filter block ignored for file trivially-moved to bottom level + bbto.block_cache.reset(); + options.max_open_files = 100; // setting > -1 makes it not preload all files + options.statistics = CreateDBStatistics(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + ASSERT_OK(Put(1, Key(numkeys + 1), "val")); + ASSERT_OK(Flush(1)); + + int32_t trivial_move = 0; + int32_t non_trivial_move = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* arg) { trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial", + [&](void* arg) { non_trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + CompactRangeOptions compact_options; + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kSkip; + compact_options.change_level = true; + compact_options.target_level = 7; + db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); + + ASSERT_EQ(trivial_move, 1); + ASSERT_EQ(non_trivial_move, 0); + + prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + prev_cache_filter_misses = + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + value = Get(1, Key(numkeys + 1)); + ASSERT_EQ(prev_cache_filter_hits, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(prev_cache_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + + // Check filter block not cached for iterator + bbto.block_cache.reset(); + options.statistics = CreateDBStatistics(); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + ReopenWithColumnFamilies({"default", "mypikachu"}, options); + + std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); + iter->SeekToFirst(); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); + ASSERT_EQ(2 /* index and data block */, + TestGetTickerCount(options, BLOCK_CACHE_ADD)); } #endif // ROCKSDB_LITE diff --git a/db/table_cache.cc b/db/table_cache.cc index 48b8e1b07..663315840 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -88,7 +88,7 @@ Status TableCache::GetTableReader( const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, - unique_ptr* table_reader) { + unique_ptr* table_reader, bool skip_filters) { std::string fname = TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId()); unique_ptr file; @@ -108,7 +108,8 @@ Status TableCache::GetTableReader( ioptions_.statistics, record_read_stats, file_read_hist)); s = ioptions_.table_factory->NewTableReader( - TableReaderOptions(ioptions_, env_options, internal_comparator), + TableReaderOptions(ioptions_, env_options, internal_comparator, + skip_filters), std::move(file_reader), fd.GetFileSize(), table_reader); TEST_SYNC_POINT("TableCache::GetTableReader:0"); } @@ -119,7 +120,7 @@ Status TableCache::FindTable(const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, Cache::Handle** handle, const bool no_io, bool record_read_stats, - HistogramImpl* file_read_hist) { + HistogramImpl* file_read_hist, bool skip_filters) { PERF_TIMER_GUARD(find_table_nanos); Status s; uint64_t number = fd.GetNumber(); @@ -135,7 +136,7 @@ Status TableCache::FindTable(const EnvOptions& env_options, unique_ptr table_reader; s = GetTableReader(env_options, internal_comparator, fd, false /* sequential mode */, record_read_stats, - file_read_hist, &table_reader); + file_read_hist, &table_reader, skip_filters); if (!s.ok()) { assert(table_reader == nullptr); RecordTick(ioptions_.statistics, NO_FILE_ERRORS); @@ -153,7 +154,7 @@ InternalIterator* TableCache::NewIterator( const ReadOptions& options, const EnvOptions& env_options, const InternalKeyComparator& icomparator, const FileDescriptor& fd, TableReader** table_reader_ptr, HistogramImpl* file_read_hist, - bool for_compaction, Arena* arena) { + bool for_compaction, Arena* arena, bool skip_filters) { PERF_TIMER_GUARD(new_table_iterator_nanos); if (table_reader_ptr != nullptr) { @@ -176,10 +177,10 @@ InternalIterator* TableCache::NewIterator( } else { table_reader = fd.table_reader; if (table_reader == nullptr) { - Status s = - FindTable(env_options, icomparator, fd, &handle, - options.read_tier == kBlockCacheTier /* no_io */, - !for_compaction /* record read_stats */, file_read_hist); + Status s = FindTable(env_options, icomparator, fd, &handle, + options.read_tier == kBlockCacheTier /* no_io */, + !for_compaction /* record read_stats */, + file_read_hist, skip_filters); if (!s.ok()) { return NewErrorInternalIterator(s, arena); } @@ -187,7 +188,8 @@ InternalIterator* TableCache::NewIterator( } } - InternalIterator* result = table_reader->NewIterator(options, arena); + InternalIterator* result = + table_reader->NewIterator(options, arena, skip_filters); if (create_new_table_reader) { assert(handle == nullptr); @@ -209,7 +211,8 @@ InternalIterator* TableCache::NewIterator( Status TableCache::Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const Slice& k, - GetContext* get_context, HistogramImpl* file_read_hist) { + GetContext* get_context, HistogramImpl* file_read_hist, + bool skip_filters) { TableReader* t = fd.table_reader; Status s; Cache::Handle* handle = nullptr; @@ -258,14 +261,14 @@ Status TableCache::Get(const ReadOptions& options, if (!t) { s = FindTable(env_options_, internal_comparator, fd, &handle, options.read_tier == kBlockCacheTier /* no_io */, - true /* record_read_stats */, file_read_hist); + true /* record_read_stats */, file_read_hist, skip_filters); if (s.ok()) { t = GetTableReaderFromHandle(handle); } } if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. - s = t->Get(options, k, get_context); + s = t->Get(options, k, get_context, skip_filters); get_context->SetReplayLog(nullptr); if (handle != nullptr) { ReleaseHandle(handle); diff --git a/db/table_cache.h b/db/table_cache.h index 631946e5f..44246fbf4 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -44,30 +44,35 @@ class TableCache { // the returned iterator. The returned "*tableptr" object is owned by // the cache and should not be deleted, and is valid for as long as the // returned iterator is live. + // @param skip_filters Disables loading/accessing the filter block InternalIterator* NewIterator( const ReadOptions& options, const EnvOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr, HistogramImpl* file_read_hist = nullptr, bool for_compaction = false, - Arena* arena = nullptr); + Arena* arena = nullptr, bool skip_filters = false); // If a seek to internal key "k" in specified file finds an entry, // call (*handle_result)(arg, found_key, found_value) repeatedly until // it returns false. + // @param skip_filters Disables loading/accessing the filter block Status Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, const Slice& k, - GetContext* get_context, HistogramImpl* file_read_hist = nullptr); + GetContext* get_context, HistogramImpl* file_read_hist = nullptr, + bool skip_filters = false); // Evict any entry for the specified file number static void Evict(Cache* cache, uint64_t file_number); // Find table reader + // @param skip_filters Disables loading/accessing the filter block Status FindTable(const EnvOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, Cache::Handle**, const bool no_io = false, bool record_read_stats = true, - HistogramImpl* file_read_hist = nullptr); + HistogramImpl* file_read_hist = nullptr, + bool skip_filters = false); // Get TableReader from a cache handle. TableReader* GetTableReaderFromHandle(Cache::Handle* handle); @@ -100,7 +105,8 @@ class TableCache { const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, - unique_ptr* table_reader); + unique_ptr* table_reader, + bool skip_filters = false); const ImmutableCFOptions& ioptions_; const EnvOptions& env_options_; diff --git a/db/version_set.cc b/db/version_set.cc index 612ff30f3..81e51ed31 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -475,19 +475,21 @@ class LevelFileNumIterator : public InternalIterator { class LevelFileIteratorState : public TwoLevelIteratorState { public: + // @param skip_filters Disables loading/accessing the filter block LevelFileIteratorState(TableCache* table_cache, const ReadOptions& read_options, const EnvOptions& env_options, const InternalKeyComparator& icomparator, HistogramImpl* file_read_hist, bool for_compaction, - bool prefix_enabled) + bool prefix_enabled, bool skip_filters) : TwoLevelIteratorState(prefix_enabled), table_cache_(table_cache), read_options_(read_options), env_options_(env_options), icomparator_(icomparator), file_read_hist_(file_read_hist), - for_compaction_(for_compaction) {} + for_compaction_(for_compaction), + skip_filters_(skip_filters) {} InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override { if (meta_handle.size() != sizeof(FileDescriptor)) { @@ -499,7 +501,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState { return table_cache_->NewIterator( read_options_, env_options_, icomparator_, *fd, nullptr /* don't need reference to table*/, file_read_hist_, - for_compaction_); + for_compaction_, nullptr /* arena */, skip_filters_); } } @@ -514,6 +516,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState { const InternalKeyComparator& icomparator_; HistogramImpl* file_read_hist_; bool for_compaction_; + bool skip_filters_; }; // A wrapper of version builder which references the current version in @@ -792,7 +795,8 @@ void Version::AddIterators(const ReadOptions& read_options, cfd_->internal_comparator(), cfd_->internal_stats()->GetFileReadHist(level), false /* for_compaction */, - cfd_->ioptions()->prefix_extractor != nullptr); + cfd_->ioptions()->prefix_extractor != nullptr, + IsFilterSkipped(level)); mem = arena->AllocateAligned(sizeof(LevelFileNumIterator)); auto* first_level_iter = new (mem) LevelFileNumIterator( cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level)); @@ -895,7 +899,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, while (f != nullptr) { *status = table_cache_->Get( read_options, *internal_comparator(), f->fd, ikey, &get_context, - cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel())); + cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), + IsFilterSkipped(static_cast(fp.GetHitFileLevel()))); // TODO: examine the behavior for corrupted key if (!status->ok()) { return; @@ -952,6 +957,13 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, } } +bool Version::IsFilterSkipped(int level) { + // Reaching the bottom level implies misses at all upper levels, so we'll + // skip checking the filters when we predict a hit. + return cfd_->ioptions()->optimize_filters_for_hits && + level == storage_info_.num_non_empty_levels() - 1; +} + void VersionStorageInfo::GenerateLevelFilesBrief() { level_files_brief_.resize(num_non_empty_levels_); for (int level = 0; level < num_non_empty_levels_; level++) { @@ -2124,7 +2136,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, // unlimited table cache. Pre-load table handle now. // Need to do it out of the mutex. builder_guard->version_builder()->LoadTableHandlers( - column_family_data->internal_stats()); + column_family_data->internal_stats(), + column_family_data->ioptions()->optimize_filters_for_hits); } // This is fine because everything inside of this block is serialized -- @@ -3270,7 +3283,8 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) { cfd->table_cache(), read_options, env_options_, cfd->internal_comparator(), nullptr /* no per level latency histogram */, - true /* for_compaction */, false /* prefix enabled */), + true /* for_compaction */, false /* prefix enabled */, + false /* skip_filters */), new LevelFileNumIterator(cfd->internal_comparator(), c->input_levels(which))); } diff --git a/db/version_set.h b/db/version_set.h index 997cd2a80..edbb48404 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -522,6 +522,12 @@ class Version { InternalIterator* level_iter, const Slice& internal_prefix) const; + // Returns true if the filter blocks in the specified level will not be + // checked during read operations. In certain cases (trivial move or preload), + // the filter block may already be cached, but we still do not access it such + // that it eventually expires from the cache. + bool IsFilterSkipped(int level); + // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_mata from its associated TableProperties. // Returns true if it does initialize FileMetaData. diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 3ddb46bf9..dfcb9cde0 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -57,7 +57,8 @@ Status BlockBasedTableFactory::NewTableReader( return BlockBasedTable::Open( table_reader_options.ioptions, table_reader_options.env_options, table_options_, table_reader_options.internal_comparator, std::move(file), - file_size, table_reader, prefetch_enabled); + file_size, table_reader, prefetch_enabled, + table_reader_options.skip_filters); } TableBuilder* BlockBasedTableFactory::NewTableBuilder( diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index ad383726a..023e5c83e 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -338,11 +338,11 @@ class HashIndexReader : public IndexReader { struct BlockBasedTable::Rep { Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, const BlockBasedTableOptions& _table_opt, - const InternalKeyComparator& _internal_comparator) + const InternalKeyComparator& _internal_comparator, bool skip_filters) : ioptions(_ioptions), env_options(_env_options), table_options(_table_opt), - filter_policy(_table_opt.filter_policy.get()), + filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()), internal_comparator(_internal_comparator), filter_type(FilterType::kNoFilter), whole_key_filtering(_table_opt.whole_key_filtering), @@ -486,7 +486,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader, - const bool prefetch_index_and_filter) { + const bool prefetch_index_and_filter, + const bool skip_filters) { table_reader->reset(); Footer footer; @@ -503,8 +504,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // We've successfully read the footer and the index block: we're // ready to serve requests. - Rep* rep = new BlockBasedTable::Rep( - ioptions, env_options, table_options, internal_comparator); + Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options, + internal_comparator, skip_filters); rep->file = std::move(file); rep->footer = footer; rep->index_type = table_options.index_type; @@ -1076,18 +1077,19 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState { public: BlockEntryIteratorState(BlockBasedTable* table, - const ReadOptions& read_options) - : TwoLevelIteratorState( - table->rep_->ioptions.prefix_extractor != nullptr), + const ReadOptions& read_options, bool skip_filters) + : TwoLevelIteratorState(table->rep_->ioptions.prefix_extractor != + nullptr), table_(table), - read_options_(read_options) {} + read_options_(read_options), + skip_filters_(skip_filters) {} InternalIterator* NewSecondaryIterator(const Slice& index_value) override { return NewDataBlockIterator(table_->rep_, read_options_, index_value); } bool PrefixMayMatch(const Slice& internal_key) override { - if (read_options_.total_order_seek) { + if (read_options_.total_order_seek || skip_filters_) { return true; } return table_->PrefixMayMatch(internal_key); @@ -1097,6 +1099,7 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState { // Don't own table_ BlockBasedTable* table_; const ReadOptions read_options_; + bool skip_filters_; }; // This will be broken if the user specifies an unusual implementation @@ -1187,9 +1190,11 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { } InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, - Arena* arena) { - return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options), - NewIndexIterator(read_options), arena); + Arena* arena, + bool skip_filters) { + return NewTwoLevelIterator( + new BlockEntryIteratorState(this, read_options, skip_filters), + NewIndexIterator(read_options), arena); } bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter, @@ -1209,11 +1214,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter, return true; } -Status BlockBasedTable::Get( - const ReadOptions& read_options, const Slice& key, - GetContext* get_context) { +Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, + GetContext* get_context, bool skip_filters) { Status s; - auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier); + CachableEntry filter_entry; + if (!skip_filters) { + filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier); + } FilterBlockReader* filter = filter_entry.value; // First check the full filter diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 4e095cb66..f8d0649e0 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -64,27 +64,32 @@ class BlockBasedTable : public TableReader { // If there was an error while initializing the table, sets "*table_reader" // to nullptr and returns a non-ok status. // - // *file must remain live while this Table is in use. - // *prefetch_blocks can be used to disable prefetching of index and filter - // blocks at statup + // @param file must remain live while this Table is in use. + // @param prefetch_index_and_filter can be used to disable prefetching of + // index and filter blocks at startup + // @param skip_filters Disables loading/accessing the filter block. Overrides + // prefetch_index_and_filter, so filter will be skipped if both are set. static Status Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_key_comparator, unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader, - bool prefetch_index_and_filter = true); + bool prefetch_index_and_filter = true, + bool skip_filters = false); bool PrefixMayMatch(const Slice& internal_key); // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). - InternalIterator* NewIterator(const ReadOptions&, - Arena* arena = nullptr) override; + // @param skip_filters Disables loading/accessing the filter block + InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr, + bool skip_filters = false) override; + // @param skip_filters Disables loading/accessing the filter block Status Get(const ReadOptions& readOptions, const Slice& key, - GetContext* get_context) override; + GetContext* get_context, bool skip_filters = false) override; // Pre-fetch the disk blocks that correspond to the key range specified by // (kbegin, kend). The call will return return error status in the event of diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index d806e77ef..b1f910181 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -128,7 +128,7 @@ CuckooTableReader::CuckooTableReader( } Status CuckooTableReader::Get(const ReadOptions& readOptions, const Slice& key, - GetContext* get_context) { + GetContext* get_context, bool skip_filters) { assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); Slice user_key = ExtractUserKey(key); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { @@ -358,7 +358,7 @@ extern InternalIterator* NewErrorInternalIterator(const Status& status, Arena* arena); InternalIterator* CuckooTableReader::NewIterator( - const ReadOptions& read_options, Arena* arena) { + const ReadOptions& read_options, Arena* arena, bool skip_filters) { if (!status().ok()) { return NewErrorInternalIterator( Status::Corruption("CuckooTableReader status is not okay."), arena); diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index ee17dc44f..b936e70c8 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -42,10 +42,10 @@ class CuckooTableReader: public TableReader { Status status() const { return status_; } Status Get(const ReadOptions& read_options, const Slice& key, - GetContext* get_context) override; + GetContext* get_context, bool skip_filters = false) override; - InternalIterator* NewIterator(const ReadOptions&, - Arena* arena = nullptr) override; + InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr, + bool skip_filters = false) override; void Prepare(const Slice& target) override; // Report an approximation of how much memory has been used. diff --git a/table/mock_table.cc b/table/mock_table.cc index 027e6d134..4525994d3 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -28,13 +28,13 @@ stl_wrappers::KVMap MakeMockFile( return stl_wrappers::KVMap(l, stl_wrappers::LessOfComparator(&icmp_)); } -InternalIterator* MockTableReader::NewIterator(const ReadOptions&, - Arena* arena) { +InternalIterator* MockTableReader::NewIterator(const ReadOptions&, Arena* arena, + bool skip_filters) { return new MockTableIterator(table_); } Status MockTableReader::Get(const ReadOptions&, const Slice& key, - GetContext* get_context) { + GetContext* get_context, bool skip_filters) { std::unique_ptr iter(new MockTableIterator(table_)); for (iter->Seek(key); iter->Valid(); iter->Next()) { ParsedInternalKey parsed_key; diff --git a/table/mock_table.h b/table/mock_table.h index 15ecad872..1b822d783 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -40,10 +40,11 @@ class MockTableReader : public TableReader { public: explicit MockTableReader(const stl_wrappers::KVMap& table) : table_(table) {} - InternalIterator* NewIterator(const ReadOptions&, Arena* arena) override; + InternalIterator* NewIterator(const ReadOptions&, Arena* arena, + bool skip_filters = false) override; - Status Get(const ReadOptions&, const Slice& key, - GetContext* get_context) override; + Status Get(const ReadOptions&, const Slice& key, GetContext* get_context, + bool skip_filters = false) override; uint64_t ApproximateOffsetOf(const Slice& key) override { return 0; } diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 603d60db4..a6db428e4 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -188,7 +188,8 @@ void PlainTableReader::SetupForCompaction() { } InternalIterator* PlainTableReader::NewIterator(const ReadOptions& options, - Arena* arena) { + Arena* arena, + bool skip_filters) { if (options.total_order_seek && !IsTotalOrderMode()) { return NewErrorInternalIterator( Status::InvalidArgument("total_order_seek not supported"), arena); @@ -531,7 +532,7 @@ void PlainTableReader::Prepare(const Slice& target) { } Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target, - GetContext* get_context) { + GetContext* get_context, bool skip_filters) { // Check bloom filter first. Slice prefix_slice; uint32_t prefix_hash; diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index e7d38e608..baa156d79 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -78,13 +78,13 @@ class PlainTableReader: public TableReader { size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode); - InternalIterator* NewIterator(const ReadOptions&, - Arena* arena = nullptr) override; + InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr, + bool skip_filters = false) override; void Prepare(const Slice& target) override; - Status Get(const ReadOptions&, const Slice& key, - GetContext* get_context) override; + Status Get(const ReadOptions&, const Slice& key, GetContext* get_context, + bool skip_filters = false) override; uint64_t ApproximateOffsetOf(const Slice& key) override; diff --git a/table/table_builder.h b/table/table_builder.h index 55a1077fa..930c99f10 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -25,16 +25,21 @@ class Slice; class Status; struct TableReaderOptions { + // @param skip_filters Disables loading/accessing the filter block TableReaderOptions(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, - const InternalKeyComparator& _internal_comparator) + const InternalKeyComparator& _internal_comparator, + bool _skip_filters = false) : ioptions(_ioptions), env_options(_env_options), - internal_comparator(_internal_comparator) {} + internal_comparator(_internal_comparator), + skip_filters(_skip_filters) {} const ImmutableCFOptions& ioptions; const EnvOptions& env_options; const InternalKeyComparator& internal_comparator; + // This is only used for BlockBasedTable (reader) + bool skip_filters; }; struct TableBuilderOptions { @@ -57,6 +62,7 @@ struct TableBuilderOptions { int_tbl_prop_collector_factories; CompressionType compression_type; const CompressionOptions& compression_opts; + // This is only used for BlockBasedTableBuilder bool skip_filters = false; }; diff --git a/table/table_reader.h b/table/table_reader.h index 60a593b42..2fef5df30 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -35,8 +35,11 @@ class TableReader { // When destroying the iterator, the caller will not call "delete" // but Iterator::~Iterator() directly. The destructor needs to destroy // all the states but those allocated in arena. + // skip_filters: disables checking the bloom filters even if they exist. This + // option is effective only for block-based table format. virtual InternalIterator* NewIterator(const ReadOptions&, - Arena* arena = nullptr) = 0; + Arena* arena = nullptr, + bool skip_filters = false) = 0; // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were @@ -67,8 +70,10 @@ class TableReader { // // readOptions is the options for the read // key is the key to search for + // skip_filters: disables checking the bloom filters even if they exist. This + // option is effective only for block-based table format. virtual Status Get(const ReadOptions& readOptions, const Slice& key, - GetContext* get_context) = 0; + GetContext* get_context, bool skip_filters = false) = 0; // Prefetch data corresponding to a give range of keys // Typically this functionality is required for table implementations that diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index ae22da36d..23a33fc1a 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -91,7 +91,8 @@ Status SstFileReader::NewTableReader( if (block_table_factory) { return block_table_factory->NewTableReader( - TableReaderOptions(ioptions_, soptions_, internal_comparator_), + TableReaderOptions(ioptions_, soptions_, internal_comparator_, + /*skip_filters=*/false), std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false); }