Skip bottom-level filter block caching when hit-optimized

Summary:
When Get() or NewIterator() trigger file loads, skip caching the filter block if
(1) optimize_filters_for_hits is set and (2) the file is on the bottommost
level. Also skip checking filters under the same conditions, which means that
for a preloaded file or a file that was trivially-moved to the bottom level, its
filter block will eventually expire from the cache.

- added parameters/instance variables in various places in order to propagate the config ("skip_filters") from version_set to block_based_table_reader
- in BlockBasedTable::Rep, this optimization prevents filter from being loaded when the file is opened simply by setting filter_policy = nullptr
- in BlockBasedTable::Get/BlockBasedTable::NewIterator, this optimization prevents filter from being used (even if it was loaded already) by setting filter = nullptr

Test Plan:
updated unit test:

  $ ./db_test --gtest_filter=DBTest.OptimizeFiltersForHits

will also run 'make check'

Reviewers: sdong, igor, paultuckfield, anthony, rven, kradhakrishnan, IslamAbdelRahman, yhchiang

Reviewed By: yhchiang

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D51633
main
Andrew Kryczka 9 years ago
parent 06c05495e8
commit e089db40f9
  1. 108
      db/db_test.cc
  2. 29
      db/table_cache.cc
  3. 14
      db/table_cache.h
  4. 28
      db/version_set.cc
  5. 6
      db/version_set.h
  6. 3
      table/block_based_table_factory.cc
  7. 41
      table/block_based_table_reader.cc
  8. 19
      table/block_based_table_reader.h
  9. 4
      table/cuckoo_table_reader.cc
  10. 6
      table/cuckoo_table_reader.h
  11. 6
      table/mock_table.cc
  12. 7
      table/mock_table.h
  13. 5
      table/plain_table_reader.cc
  14. 8
      table/plain_table_reader.h
  15. 10
      table/table_builder.h
  16. 9
      table/table_reader.h
  17. 3
      tools/sst_dump_tool.cc

@ -7986,6 +7986,7 @@ TEST_F(DBTest, OptimizeFiltersForHits) {
options.compaction_style = kCompactionStyleLevel; options.compaction_style = kCompactionStyleLevel;
options.level_compaction_dynamic_level_bytes = true; options.level_compaction_dynamic_level_bytes = true;
BlockBasedTableOptions bbto; BlockBasedTableOptions bbto;
bbto.cache_index_and_filter_blocks = true;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
bbto.whole_key_filtering = true; bbto.whole_key_filtering = true;
options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.table_factory.reset(NewBlockBasedTableFactory(bbto));
@ -8034,13 +8035,118 @@ TEST_F(DBTest, OptimizeFiltersForHits) {
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
// Now we have three sorted run, L0, L5 and L6 with most files in L6 have // Now we have three sorted run, L0, L5 and L6 with most files in L6 have
// no blooom filter. Most keys be checked bloom filters twice. // no bloom filter. Most keys be checked bloom filters twice.
ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2);
ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2);
for (int i = 0; i < numkeys; i += 2) { for (int i = 0; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "val"); ASSERT_EQ(Get(1, Key(i)), "val");
} }
// Part 2 (read path): rewrite last level with blooms, then verify they get
// cached only if !optimize_filters_for_hits
options.disable_auto_compactions = true;
options.num_levels = 9;
options.optimize_filters_for_hits = false;
options.statistics = CreateDBStatistics();
bbto.block_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
ReopenWithColumnFamilies({"default", "mypikachu"}, options);
MoveFilesToLevel(7 /* level */, 1 /* column family index */);
std::string value = Get(1, Key(0));
int prev_cache_filter_hits =
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
value = Get(1, Key(0));
ASSERT_EQ(prev_cache_filter_hits + 1,
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
// Now that we know the filter blocks exist in the last level files, see if
// filter caching is skipped for this optimization
options.optimize_filters_for_hits = true;
options.statistics = CreateDBStatistics();
bbto.block_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
ReopenWithColumnFamilies({"default", "mypikachu"}, options);
value = Get(1, Key(0));
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
ASSERT_EQ(2 /* index and data block */,
TestGetTickerCount(options, BLOCK_CACHE_ADD));
// Check filter block ignored for files preloaded during DB::Open()
options.max_open_files = -1;
options.statistics = CreateDBStatistics();
bbto.block_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
ReopenWithColumnFamilies({"default", "mypikachu"}, options);
int prev_cache_filter_misses =
TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
Get(1, Key(0));
ASSERT_EQ(prev_cache_filter_misses,
TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
ASSERT_EQ(prev_cache_filter_hits,
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
// Check filter block ignored for file trivially-moved to bottom level
bbto.block_cache.reset();
options.max_open_files = 100; // setting > -1 makes it not preload all files
options.statistics = CreateDBStatistics();
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
ReopenWithColumnFamilies({"default", "mypikachu"}, options);
ASSERT_OK(Put(1, Key(numkeys + 1), "val"));
ASSERT_OK(Flush(1));
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:TrivialMove",
[&](void* arg) { trivial_move++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:NonTrivial",
[&](void* arg) { non_trivial_move++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
CompactRangeOptions compact_options;
compact_options.bottommost_level_compaction =
BottommostLevelCompaction::kSkip;
compact_options.change_level = true;
compact_options.target_level = 7;
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr);
ASSERT_EQ(trivial_move, 1);
ASSERT_EQ(non_trivial_move, 0);
prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
prev_cache_filter_misses =
TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
value = Get(1, Key(numkeys + 1));
ASSERT_EQ(prev_cache_filter_hits,
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
ASSERT_EQ(prev_cache_filter_misses,
TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
// Check filter block not cached for iterator
bbto.block_cache.reset();
options.statistics = CreateDBStatistics();
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
ReopenWithColumnFamilies({"default", "mypikachu"}, options);
std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions(), handles_[1]));
iter->SeekToFirst();
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
ASSERT_EQ(2 /* index and data block */,
TestGetTickerCount(options, BLOCK_CACHE_ADD));
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -88,7 +88,7 @@ Status TableCache::GetTableReader(
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
unique_ptr<TableReader>* table_reader) { unique_ptr<TableReader>* table_reader, bool skip_filters) {
std::string fname = std::string fname =
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId()); TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFile> file;
@ -108,7 +108,8 @@ Status TableCache::GetTableReader(
ioptions_.statistics, record_read_stats, ioptions_.statistics, record_read_stats,
file_read_hist)); file_read_hist));
s = ioptions_.table_factory->NewTableReader( s = ioptions_.table_factory->NewTableReader(
TableReaderOptions(ioptions_, env_options, internal_comparator), TableReaderOptions(ioptions_, env_options, internal_comparator,
skip_filters),
std::move(file_reader), fd.GetFileSize(), table_reader); std::move(file_reader), fd.GetFileSize(), table_reader);
TEST_SYNC_POINT("TableCache::GetTableReader:0"); TEST_SYNC_POINT("TableCache::GetTableReader:0");
} }
@ -119,7 +120,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, Cache::Handle** handle, const FileDescriptor& fd, Cache::Handle** handle,
const bool no_io, bool record_read_stats, const bool no_io, bool record_read_stats,
HistogramImpl* file_read_hist) { HistogramImpl* file_read_hist, bool skip_filters) {
PERF_TIMER_GUARD(find_table_nanos); PERF_TIMER_GUARD(find_table_nanos);
Status s; Status s;
uint64_t number = fd.GetNumber(); uint64_t number = fd.GetNumber();
@ -135,7 +136,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
unique_ptr<TableReader> table_reader; unique_ptr<TableReader> table_reader;
s = GetTableReader(env_options, internal_comparator, fd, s = GetTableReader(env_options, internal_comparator, fd,
false /* sequential mode */, record_read_stats, false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader); file_read_hist, &table_reader, skip_filters);
if (!s.ok()) { if (!s.ok()) {
assert(table_reader == nullptr); assert(table_reader == nullptr);
RecordTick(ioptions_.statistics, NO_FILE_ERRORS); RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
@ -153,7 +154,7 @@ InternalIterator* TableCache::NewIterator(
const ReadOptions& options, const EnvOptions& env_options, const ReadOptions& options, const EnvOptions& env_options,
const InternalKeyComparator& icomparator, const FileDescriptor& fd, const InternalKeyComparator& icomparator, const FileDescriptor& fd,
TableReader** table_reader_ptr, HistogramImpl* file_read_hist, TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
bool for_compaction, Arena* arena) { bool for_compaction, Arena* arena, bool skip_filters) {
PERF_TIMER_GUARD(new_table_iterator_nanos); PERF_TIMER_GUARD(new_table_iterator_nanos);
if (table_reader_ptr != nullptr) { if (table_reader_ptr != nullptr) {
@ -176,10 +177,10 @@ InternalIterator* TableCache::NewIterator(
} else { } else {
table_reader = fd.table_reader; table_reader = fd.table_reader;
if (table_reader == nullptr) { if (table_reader == nullptr) {
Status s = Status s = FindTable(env_options, icomparator, fd, &handle,
FindTable(env_options, icomparator, fd, &handle, options.read_tier == kBlockCacheTier /* no_io */,
options.read_tier == kBlockCacheTier /* no_io */, !for_compaction /* record read_stats */,
!for_compaction /* record read_stats */, file_read_hist); file_read_hist, skip_filters);
if (!s.ok()) { if (!s.ok()) {
return NewErrorInternalIterator(s, arena); return NewErrorInternalIterator(s, arena);
} }
@ -187,7 +188,8 @@ InternalIterator* TableCache::NewIterator(
} }
} }
InternalIterator* result = table_reader->NewIterator(options, arena); InternalIterator* result =
table_reader->NewIterator(options, arena, skip_filters);
if (create_new_table_reader) { if (create_new_table_reader) {
assert(handle == nullptr); assert(handle == nullptr);
@ -209,7 +211,8 @@ InternalIterator* TableCache::NewIterator(
Status TableCache::Get(const ReadOptions& options, Status TableCache::Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, const Slice& k, const FileDescriptor& fd, const Slice& k,
GetContext* get_context, HistogramImpl* file_read_hist) { GetContext* get_context, HistogramImpl* file_read_hist,
bool skip_filters) {
TableReader* t = fd.table_reader; TableReader* t = fd.table_reader;
Status s; Status s;
Cache::Handle* handle = nullptr; Cache::Handle* handle = nullptr;
@ -258,14 +261,14 @@ Status TableCache::Get(const ReadOptions& options,
if (!t) { if (!t) {
s = FindTable(env_options_, internal_comparator, fd, &handle, s = FindTable(env_options_, internal_comparator, fd, &handle,
options.read_tier == kBlockCacheTier /* no_io */, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist); true /* record_read_stats */, file_read_hist, skip_filters);
if (s.ok()) { if (s.ok()) {
t = GetTableReaderFromHandle(handle); t = GetTableReaderFromHandle(handle);
} }
} }
if (s.ok()) { if (s.ok()) {
get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. get_context->SetReplayLog(row_cache_entry); // nullptr if no cache.
s = t->Get(options, k, get_context); s = t->Get(options, k, get_context, skip_filters);
get_context->SetReplayLog(nullptr); get_context->SetReplayLog(nullptr);
if (handle != nullptr) { if (handle != nullptr) {
ReleaseHandle(handle); ReleaseHandle(handle);

@ -44,30 +44,35 @@ class TableCache {
// the returned iterator. The returned "*tableptr" object is owned by // the returned iterator. The returned "*tableptr" object is owned by
// the cache and should not be deleted, and is valid for as long as the // the cache and should not be deleted, and is valid for as long as the
// returned iterator is live. // returned iterator is live.
// @param skip_filters Disables loading/accessing the filter block
InternalIterator* NewIterator( InternalIterator* NewIterator(
const ReadOptions& options, const EnvOptions& toptions, const ReadOptions& options, const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr, const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr,
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false, HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
Arena* arena = nullptr); Arena* arena = nullptr, bool skip_filters = false);
// If a seek to internal key "k" in specified file finds an entry, // If a seek to internal key "k" in specified file finds an entry,
// call (*handle_result)(arg, found_key, found_value) repeatedly until // call (*handle_result)(arg, found_key, found_value) repeatedly until
// it returns false. // it returns false.
// @param skip_filters Disables loading/accessing the filter block
Status Get(const ReadOptions& options, Status Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& file_fd, const Slice& k, const FileDescriptor& file_fd, const Slice& k,
GetContext* get_context, HistogramImpl* file_read_hist = nullptr); GetContext* get_context, HistogramImpl* file_read_hist = nullptr,
bool skip_filters = false);
// Evict any entry for the specified file number // Evict any entry for the specified file number
static void Evict(Cache* cache, uint64_t file_number); static void Evict(Cache* cache, uint64_t file_number);
// Find table reader // Find table reader
// @param skip_filters Disables loading/accessing the filter block
Status FindTable(const EnvOptions& toptions, Status FindTable(const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& file_fd, Cache::Handle**, const FileDescriptor& file_fd, Cache::Handle**,
const bool no_io = false, bool record_read_stats = true, const bool no_io = false, bool record_read_stats = true,
HistogramImpl* file_read_hist = nullptr); HistogramImpl* file_read_hist = nullptr,
bool skip_filters = false);
// Get TableReader from a cache handle. // Get TableReader from a cache handle.
TableReader* GetTableReaderFromHandle(Cache::Handle* handle); TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
@ -100,7 +105,8 @@ class TableCache {
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, bool sequential_mode, const FileDescriptor& fd, bool sequential_mode,
bool record_read_stats, HistogramImpl* file_read_hist, bool record_read_stats, HistogramImpl* file_read_hist,
unique_ptr<TableReader>* table_reader); unique_ptr<TableReader>* table_reader,
bool skip_filters = false);
const ImmutableCFOptions& ioptions_; const ImmutableCFOptions& ioptions_;
const EnvOptions& env_options_; const EnvOptions& env_options_;

@ -475,19 +475,21 @@ class LevelFileNumIterator : public InternalIterator {
class LevelFileIteratorState : public TwoLevelIteratorState { class LevelFileIteratorState : public TwoLevelIteratorState {
public: public:
// @param skip_filters Disables loading/accessing the filter block
LevelFileIteratorState(TableCache* table_cache, LevelFileIteratorState(TableCache* table_cache,
const ReadOptions& read_options, const ReadOptions& read_options,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& icomparator, const InternalKeyComparator& icomparator,
HistogramImpl* file_read_hist, bool for_compaction, HistogramImpl* file_read_hist, bool for_compaction,
bool prefix_enabled) bool prefix_enabled, bool skip_filters)
: TwoLevelIteratorState(prefix_enabled), : TwoLevelIteratorState(prefix_enabled),
table_cache_(table_cache), table_cache_(table_cache),
read_options_(read_options), read_options_(read_options),
env_options_(env_options), env_options_(env_options),
icomparator_(icomparator), icomparator_(icomparator),
file_read_hist_(file_read_hist), file_read_hist_(file_read_hist),
for_compaction_(for_compaction) {} for_compaction_(for_compaction),
skip_filters_(skip_filters) {}
InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override { InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override {
if (meta_handle.size() != sizeof(FileDescriptor)) { if (meta_handle.size() != sizeof(FileDescriptor)) {
@ -499,7 +501,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
return table_cache_->NewIterator( return table_cache_->NewIterator(
read_options_, env_options_, icomparator_, *fd, read_options_, env_options_, icomparator_, *fd,
nullptr /* don't need reference to table*/, file_read_hist_, nullptr /* don't need reference to table*/, file_read_hist_,
for_compaction_); for_compaction_, nullptr /* arena */, skip_filters_);
} }
} }
@ -514,6 +516,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
const InternalKeyComparator& icomparator_; const InternalKeyComparator& icomparator_;
HistogramImpl* file_read_hist_; HistogramImpl* file_read_hist_;
bool for_compaction_; bool for_compaction_;
bool skip_filters_;
}; };
// A wrapper of version builder which references the current version in // A wrapper of version builder which references the current version in
@ -792,7 +795,8 @@ void Version::AddIterators(const ReadOptions& read_options,
cfd_->internal_comparator(), cfd_->internal_comparator(),
cfd_->internal_stats()->GetFileReadHist(level), cfd_->internal_stats()->GetFileReadHist(level),
false /* for_compaction */, false /* for_compaction */,
cfd_->ioptions()->prefix_extractor != nullptr); cfd_->ioptions()->prefix_extractor != nullptr,
IsFilterSkipped(level));
mem = arena->AllocateAligned(sizeof(LevelFileNumIterator)); mem = arena->AllocateAligned(sizeof(LevelFileNumIterator));
auto* first_level_iter = new (mem) LevelFileNumIterator( auto* first_level_iter = new (mem) LevelFileNumIterator(
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level)); cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level));
@ -895,7 +899,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
while (f != nullptr) { while (f != nullptr) {
*status = table_cache_->Get( *status = table_cache_->Get(
read_options, *internal_comparator(), f->fd, ikey, &get_context, read_options, *internal_comparator(), f->fd, ikey, &get_context,
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel())); cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel())));
// TODO: examine the behavior for corrupted key // TODO: examine the behavior for corrupted key
if (!status->ok()) { if (!status->ok()) {
return; return;
@ -952,6 +957,13 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
} }
} }
bool Version::IsFilterSkipped(int level) {
// Reaching the bottom level implies misses at all upper levels, so we'll
// skip checking the filters when we predict a hit.
return cfd_->ioptions()->optimize_filters_for_hits &&
level == storage_info_.num_non_empty_levels() - 1;
}
void VersionStorageInfo::GenerateLevelFilesBrief() { void VersionStorageInfo::GenerateLevelFilesBrief() {
level_files_brief_.resize(num_non_empty_levels_); level_files_brief_.resize(num_non_empty_levels_);
for (int level = 0; level < num_non_empty_levels_; level++) { for (int level = 0; level < num_non_empty_levels_; level++) {
@ -2124,7 +2136,8 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
// unlimited table cache. Pre-load table handle now. // unlimited table cache. Pre-load table handle now.
// Need to do it out of the mutex. // Need to do it out of the mutex.
builder_guard->version_builder()->LoadTableHandlers( builder_guard->version_builder()->LoadTableHandlers(
column_family_data->internal_stats()); column_family_data->internal_stats(),
column_family_data->ioptions()->optimize_filters_for_hits);
} }
// This is fine because everything inside of this block is serialized -- // This is fine because everything inside of this block is serialized --
@ -3270,7 +3283,8 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) {
cfd->table_cache(), read_options, env_options_, cfd->table_cache(), read_options, env_options_,
cfd->internal_comparator(), cfd->internal_comparator(),
nullptr /* no per level latency histogram */, nullptr /* no per level latency histogram */,
true /* for_compaction */, false /* prefix enabled */), true /* for_compaction */, false /* prefix enabled */,
false /* skip_filters */),
new LevelFileNumIterator(cfd->internal_comparator(), new LevelFileNumIterator(cfd->internal_comparator(),
c->input_levels(which))); c->input_levels(which)));
} }

@ -522,6 +522,12 @@ class Version {
InternalIterator* level_iter, InternalIterator* level_iter,
const Slice& internal_prefix) const; const Slice& internal_prefix) const;
// Returns true if the filter blocks in the specified level will not be
// checked during read operations. In certain cases (trivial move or preload),
// the filter block may already be cached, but we still do not access it such
// that it eventually expires from the cache.
bool IsFilterSkipped(int level);
// The helper function of UpdateAccumulatedStats, which may fill the missing // The helper function of UpdateAccumulatedStats, which may fill the missing
// fields of file_mata from its associated TableProperties. // fields of file_mata from its associated TableProperties.
// Returns true if it does initialize FileMetaData. // Returns true if it does initialize FileMetaData.

@ -57,7 +57,8 @@ Status BlockBasedTableFactory::NewTableReader(
return BlockBasedTable::Open( return BlockBasedTable::Open(
table_reader_options.ioptions, table_reader_options.env_options, table_reader_options.ioptions, table_reader_options.env_options,
table_options_, table_reader_options.internal_comparator, std::move(file), table_options_, table_reader_options.internal_comparator, std::move(file),
file_size, table_reader, prefetch_enabled); file_size, table_reader, prefetch_enabled,
table_reader_options.skip_filters);
} }
TableBuilder* BlockBasedTableFactory::NewTableBuilder( TableBuilder* BlockBasedTableFactory::NewTableBuilder(

@ -338,11 +338,11 @@ class HashIndexReader : public IndexReader {
struct BlockBasedTable::Rep { struct BlockBasedTable::Rep {
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
const BlockBasedTableOptions& _table_opt, const BlockBasedTableOptions& _table_opt,
const InternalKeyComparator& _internal_comparator) const InternalKeyComparator& _internal_comparator, bool skip_filters)
: ioptions(_ioptions), : ioptions(_ioptions),
env_options(_env_options), env_options(_env_options),
table_options(_table_opt), table_options(_table_opt),
filter_policy(_table_opt.filter_policy.get()), filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()),
internal_comparator(_internal_comparator), internal_comparator(_internal_comparator),
filter_type(FilterType::kNoFilter), filter_type(FilterType::kNoFilter),
whole_key_filtering(_table_opt.whole_key_filtering), whole_key_filtering(_table_opt.whole_key_filtering),
@ -486,7 +486,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
unique_ptr<RandomAccessFileReader>&& file, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size, uint64_t file_size,
unique_ptr<TableReader>* table_reader, unique_ptr<TableReader>* table_reader,
const bool prefetch_index_and_filter) { const bool prefetch_index_and_filter,
const bool skip_filters) {
table_reader->reset(); table_reader->reset();
Footer footer; Footer footer;
@ -503,8 +504,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
// We've successfully read the footer and the index block: we're // We've successfully read the footer and the index block: we're
// ready to serve requests. // ready to serve requests.
Rep* rep = new BlockBasedTable::Rep( Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
ioptions, env_options, table_options, internal_comparator); internal_comparator, skip_filters);
rep->file = std::move(file); rep->file = std::move(file);
rep->footer = footer; rep->footer = footer;
rep->index_type = table_options.index_type; rep->index_type = table_options.index_type;
@ -1076,18 +1077,19 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState { class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
public: public:
BlockEntryIteratorState(BlockBasedTable* table, BlockEntryIteratorState(BlockBasedTable* table,
const ReadOptions& read_options) const ReadOptions& read_options, bool skip_filters)
: TwoLevelIteratorState( : TwoLevelIteratorState(table->rep_->ioptions.prefix_extractor !=
table->rep_->ioptions.prefix_extractor != nullptr), nullptr),
table_(table), table_(table),
read_options_(read_options) {} read_options_(read_options),
skip_filters_(skip_filters) {}
InternalIterator* NewSecondaryIterator(const Slice& index_value) override { InternalIterator* NewSecondaryIterator(const Slice& index_value) override {
return NewDataBlockIterator(table_->rep_, read_options_, index_value); return NewDataBlockIterator(table_->rep_, read_options_, index_value);
} }
bool PrefixMayMatch(const Slice& internal_key) override { bool PrefixMayMatch(const Slice& internal_key) override {
if (read_options_.total_order_seek) { if (read_options_.total_order_seek || skip_filters_) {
return true; return true;
} }
return table_->PrefixMayMatch(internal_key); return table_->PrefixMayMatch(internal_key);
@ -1097,6 +1099,7 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
// Don't own table_ // Don't own table_
BlockBasedTable* table_; BlockBasedTable* table_;
const ReadOptions read_options_; const ReadOptions read_options_;
bool skip_filters_;
}; };
// This will be broken if the user specifies an unusual implementation // This will be broken if the user specifies an unusual implementation
@ -1187,9 +1190,11 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
} }
InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
Arena* arena) { Arena* arena,
return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options), bool skip_filters) {
NewIndexIterator(read_options), arena); return NewTwoLevelIterator(
new BlockEntryIteratorState(this, read_options, skip_filters),
NewIndexIterator(read_options), arena);
} }
bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter, bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
@ -1209,11 +1214,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
return true; return true;
} }
Status BlockBasedTable::Get( Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
const ReadOptions& read_options, const Slice& key, GetContext* get_context, bool skip_filters) {
GetContext* get_context) {
Status s; Status s;
auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier); CachableEntry<FilterBlockReader> filter_entry;
if (!skip_filters) {
filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
}
FilterBlockReader* filter = filter_entry.value; FilterBlockReader* filter = filter_entry.value;
// First check the full filter // First check the full filter

@ -64,27 +64,32 @@ class BlockBasedTable : public TableReader {
// If there was an error while initializing the table, sets "*table_reader" // If there was an error while initializing the table, sets "*table_reader"
// to nullptr and returns a non-ok status. // to nullptr and returns a non-ok status.
// //
// *file must remain live while this Table is in use. // @param file must remain live while this Table is in use.
// *prefetch_blocks can be used to disable prefetching of index and filter // @param prefetch_index_and_filter can be used to disable prefetching of
// blocks at statup // index and filter blocks at startup
// @param skip_filters Disables loading/accessing the filter block. Overrides
// prefetch_index_and_filter, so filter will be skipped if both are set.
static Status Open(const ImmutableCFOptions& ioptions, static Status Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const BlockBasedTableOptions& table_options, const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_key_comparator, const InternalKeyComparator& internal_key_comparator,
unique_ptr<RandomAccessFileReader>&& file, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size, unique_ptr<TableReader>* table_reader, uint64_t file_size, unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter = true); bool prefetch_index_and_filter = true,
bool skip_filters = false);
bool PrefixMayMatch(const Slice& internal_key); bool PrefixMayMatch(const Slice& internal_key);
// Returns a new iterator over the table contents. // Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must // The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it). // call one of the Seek methods on the iterator before using it).
InternalIterator* NewIterator(const ReadOptions&, // @param skip_filters Disables loading/accessing the filter block
Arena* arena = nullptr) override; InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr,
bool skip_filters = false) override;
// @param skip_filters Disables loading/accessing the filter block
Status Get(const ReadOptions& readOptions, const Slice& key, Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) override; GetContext* get_context, bool skip_filters = false) override;
// Pre-fetch the disk blocks that correspond to the key range specified by // Pre-fetch the disk blocks that correspond to the key range specified by
// (kbegin, kend). The call will return return error status in the event of // (kbegin, kend). The call will return return error status in the event of

@ -128,7 +128,7 @@ CuckooTableReader::CuckooTableReader(
} }
Status CuckooTableReader::Get(const ReadOptions& readOptions, const Slice& key, Status CuckooTableReader::Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) { GetContext* get_context, bool skip_filters) {
assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
Slice user_key = ExtractUserKey(key); Slice user_key = ExtractUserKey(key);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
@ -358,7 +358,7 @@ extern InternalIterator* NewErrorInternalIterator(const Status& status,
Arena* arena); Arena* arena);
InternalIterator* CuckooTableReader::NewIterator( InternalIterator* CuckooTableReader::NewIterator(
const ReadOptions& read_options, Arena* arena) { const ReadOptions& read_options, Arena* arena, bool skip_filters) {
if (!status().ok()) { if (!status().ok()) {
return NewErrorInternalIterator( return NewErrorInternalIterator(
Status::Corruption("CuckooTableReader status is not okay."), arena); Status::Corruption("CuckooTableReader status is not okay."), arena);

@ -42,10 +42,10 @@ class CuckooTableReader: public TableReader {
Status status() const { return status_; } Status status() const { return status_; }
Status Get(const ReadOptions& read_options, const Slice& key, Status Get(const ReadOptions& read_options, const Slice& key,
GetContext* get_context) override; GetContext* get_context, bool skip_filters = false) override;
InternalIterator* NewIterator(const ReadOptions&, InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr,
Arena* arena = nullptr) override; bool skip_filters = false) override;
void Prepare(const Slice& target) override; void Prepare(const Slice& target) override;
// Report an approximation of how much memory has been used. // Report an approximation of how much memory has been used.

@ -28,13 +28,13 @@ stl_wrappers::KVMap MakeMockFile(
return stl_wrappers::KVMap(l, stl_wrappers::LessOfComparator(&icmp_)); return stl_wrappers::KVMap(l, stl_wrappers::LessOfComparator(&icmp_));
} }
InternalIterator* MockTableReader::NewIterator(const ReadOptions&, InternalIterator* MockTableReader::NewIterator(const ReadOptions&, Arena* arena,
Arena* arena) { bool skip_filters) {
return new MockTableIterator(table_); return new MockTableIterator(table_);
} }
Status MockTableReader::Get(const ReadOptions&, const Slice& key, Status MockTableReader::Get(const ReadOptions&, const Slice& key,
GetContext* get_context) { GetContext* get_context, bool skip_filters) {
std::unique_ptr<MockTableIterator> iter(new MockTableIterator(table_)); std::unique_ptr<MockTableIterator> iter(new MockTableIterator(table_));
for (iter->Seek(key); iter->Valid(); iter->Next()) { for (iter->Seek(key); iter->Valid(); iter->Next()) {
ParsedInternalKey parsed_key; ParsedInternalKey parsed_key;

@ -40,10 +40,11 @@ class MockTableReader : public TableReader {
public: public:
explicit MockTableReader(const stl_wrappers::KVMap& table) : table_(table) {} explicit MockTableReader(const stl_wrappers::KVMap& table) : table_(table) {}
InternalIterator* NewIterator(const ReadOptions&, Arena* arena) override; InternalIterator* NewIterator(const ReadOptions&, Arena* arena,
bool skip_filters = false) override;
Status Get(const ReadOptions&, const Slice& key, Status Get(const ReadOptions&, const Slice& key, GetContext* get_context,
GetContext* get_context) override; bool skip_filters = false) override;
uint64_t ApproximateOffsetOf(const Slice& key) override { return 0; } uint64_t ApproximateOffsetOf(const Slice& key) override { return 0; }

@ -188,7 +188,8 @@ void PlainTableReader::SetupForCompaction() {
} }
InternalIterator* PlainTableReader::NewIterator(const ReadOptions& options, InternalIterator* PlainTableReader::NewIterator(const ReadOptions& options,
Arena* arena) { Arena* arena,
bool skip_filters) {
if (options.total_order_seek && !IsTotalOrderMode()) { if (options.total_order_seek && !IsTotalOrderMode()) {
return NewErrorInternalIterator( return NewErrorInternalIterator(
Status::InvalidArgument("total_order_seek not supported"), arena); Status::InvalidArgument("total_order_seek not supported"), arena);
@ -531,7 +532,7 @@ void PlainTableReader::Prepare(const Slice& target) {
} }
Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target, Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
GetContext* get_context) { GetContext* get_context, bool skip_filters) {
// Check bloom filter first. // Check bloom filter first.
Slice prefix_slice; Slice prefix_slice;
uint32_t prefix_hash; uint32_t prefix_hash;

@ -78,13 +78,13 @@ class PlainTableReader: public TableReader {
size_t index_sparseness, size_t huge_page_tlb_size, size_t index_sparseness, size_t huge_page_tlb_size,
bool full_scan_mode); bool full_scan_mode);
InternalIterator* NewIterator(const ReadOptions&, InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr,
Arena* arena = nullptr) override; bool skip_filters = false) override;
void Prepare(const Slice& target) override; void Prepare(const Slice& target) override;
Status Get(const ReadOptions&, const Slice& key, Status Get(const ReadOptions&, const Slice& key, GetContext* get_context,
GetContext* get_context) override; bool skip_filters = false) override;
uint64_t ApproximateOffsetOf(const Slice& key) override; uint64_t ApproximateOffsetOf(const Slice& key) override;

@ -25,16 +25,21 @@ class Slice;
class Status; class Status;
struct TableReaderOptions { struct TableReaderOptions {
// @param skip_filters Disables loading/accessing the filter block
TableReaderOptions(const ImmutableCFOptions& _ioptions, TableReaderOptions(const ImmutableCFOptions& _ioptions,
const EnvOptions& _env_options, const EnvOptions& _env_options,
const InternalKeyComparator& _internal_comparator) const InternalKeyComparator& _internal_comparator,
bool _skip_filters = false)
: ioptions(_ioptions), : ioptions(_ioptions),
env_options(_env_options), env_options(_env_options),
internal_comparator(_internal_comparator) {} internal_comparator(_internal_comparator),
skip_filters(_skip_filters) {}
const ImmutableCFOptions& ioptions; const ImmutableCFOptions& ioptions;
const EnvOptions& env_options; const EnvOptions& env_options;
const InternalKeyComparator& internal_comparator; const InternalKeyComparator& internal_comparator;
// This is only used for BlockBasedTable (reader)
bool skip_filters;
}; };
struct TableBuilderOptions { struct TableBuilderOptions {
@ -57,6 +62,7 @@ struct TableBuilderOptions {
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
CompressionType compression_type; CompressionType compression_type;
const CompressionOptions& compression_opts; const CompressionOptions& compression_opts;
// This is only used for BlockBasedTableBuilder
bool skip_filters = false; bool skip_filters = false;
}; };

@ -35,8 +35,11 @@ class TableReader {
// When destroying the iterator, the caller will not call "delete" // When destroying the iterator, the caller will not call "delete"
// but Iterator::~Iterator() directly. The destructor needs to destroy // but Iterator::~Iterator() directly. The destructor needs to destroy
// all the states but those allocated in arena. // all the states but those allocated in arena.
// skip_filters: disables checking the bloom filters even if they exist. This
// option is effective only for block-based table format.
virtual InternalIterator* NewIterator(const ReadOptions&, virtual InternalIterator* NewIterator(const ReadOptions&,
Arena* arena = nullptr) = 0; Arena* arena = nullptr,
bool skip_filters = false) = 0;
// Given a key, return an approximate byte offset in the file where // Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were // the data for that key begins (or would begin if the key were
@ -67,8 +70,10 @@ class TableReader {
// //
// readOptions is the options for the read // readOptions is the options for the read
// key is the key to search for // key is the key to search for
// skip_filters: disables checking the bloom filters even if they exist. This
// option is effective only for block-based table format.
virtual Status Get(const ReadOptions& readOptions, const Slice& key, virtual Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) = 0; GetContext* get_context, bool skip_filters = false) = 0;
// Prefetch data corresponding to a give range of keys // Prefetch data corresponding to a give range of keys
// Typically this functionality is required for table implementations that // Typically this functionality is required for table implementations that

@ -91,7 +91,8 @@ Status SstFileReader::NewTableReader(
if (block_table_factory) { if (block_table_factory) {
return block_table_factory->NewTableReader( return block_table_factory->NewTableReader(
TableReaderOptions(ioptions_, soptions_, internal_comparator_), TableReaderOptions(ioptions_, soptions_, internal_comparator_,
/*skip_filters=*/false),
std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false); std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false);
} }

Loading…
Cancel
Save