Create a BlockCacheLookupContext to enable fine-grained block cache tracing. (#5421)

Summary:
BlockCacheLookupContext only contains the caller for now.
We will trace block accesses at five places:
1. BlockBasedTable::GetFilter.
2. BlockBasedTable::GetUncompressedDict.
3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index, and range deletion block.)
4. BlockBasedTable::Get. (To trace the referenced key and whether the referenced key exists in a fetched data block.)
5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the referenced key exists in a fetched data block.)

We create the context at:
1. BlockBasedTable::Get. (kUserGet)
2. BlockBasedTable::MultiGet. (kUserMGet)
3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or external SST ingestion calls this function.)
4. BlockBasedTable::Open. (kPrefetch)
5. Index/Filter::CacheDependencies. (kPrefetch)
6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or kUserApproximateSize).

I loaded 1 million key-value pairs into the database and ran the readrandom benchmark with a single thread. I gave the block cache 10 GB to make sure all reads hit the block cache after warmup. The throughput is comparable.
Throughput of this PR: 231334 ops/s.
Throughput of the master branch: 238428 ops/s.

Experiment setup:
RocksDB:    version 6.2
Date:       Mon Jun 10 10:42:51 2019
CPU:        24 * Intel Core Processor (Skylake)
CPUCache:   16384 KB
Keys:       20 bytes each
Values:     100 bytes each (100 bytes after compression)
Entries:    1000000
Prefix:    20 bytes
Keys per prefix:    0
RawSize:    114.4 MB (estimated)
FileSize:   114.4 MB (estimated)
Write rate: 0 bytes/second
Read rate: 0 ops/second
Compression: NoCompression
Compression sampling rate: 0
Memtablerep: skip_list
Perf Level: 1

Load command: ./db_bench --benchmarks="fillseq" --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000

Run command: ./db_bench --benchmarks="readrandom,stats" --use_existing_db --threads=1 --duration=120 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 --duration=120

TODOs:
1. Create a caller for external SST file ingestion and differentiate the callers for iterator.
2. Integrate tracer to trace block cache accesses.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5421

Differential Revision: D15704258

Pulled By: HaoyuHuang

fbshipit-source-id: 4aa8a55f8cb1576ffb367bfa3186a91d8f06d93a
main
haoyuhuang 6 years ago committed by Facebook Github Bot
parent 63ace8ef0e
commit 5efa0d6b0d
  1. 3
      db/compaction/compaction_job.cc
  2. 4
      db/db_impl/db_impl.cc
  3. 21
      db/version_set.cc
  4. 7
      db/version_set.h
  5. 6
      table/block_based/block_based_filter_block.cc
  6. 23
      table/block_based/block_based_filter_block.h
  7. 200
      table/block_based/block_based_filter_block_test.cc
  8. 300
      table/block_based/block_based_table_reader.cc
  9. 89
      table/block_based/block_based_table_reader.h
  10. 34
      table/block_based/filter_block.h
  11. 23
      table/block_based/full_filter_block.cc
  12. 59
      table/block_based/full_filter_block.h
  13. 64
      table/block_based/full_filter_block_test.cc
  14. 37
      table/block_based/partitioned_filter_block.cc
  15. 30
      table/block_based/partitioned_filter_block.h
  16. 18
      table/block_based/partitioned_filter_block_test.cc
  17. 5
      table/cuckoo/cuckoo_table_reader.h
  18. 7
      table/mock_table.h
  19. 3
      table/plain/plain_table_reader.cc
  20. 3
      table/plain/plain_table_reader.h
  21. 3
      table/table_reader.h
  22. 30
      trace_replay/block_cache_tracer.h

@ -520,7 +520,8 @@ void CompactionJob::GenSubcompactionBoundaries() {
// to the index block and may incur I/O cost in the process. Unlock db
// mutex to reduce contention
db_mutex_->Unlock();
uint64_t size = versions_->ApproximateSize(v, a, b, start_lvl, out_lvl + 1);
uint64_t size = versions_->ApproximateSize(v, a, b, start_lvl, out_lvl + 1,
/*for_compaction*/ true);
db_mutex_->Lock();
ranges.emplace_back(a, b, size);
sum += size;

@ -2717,7 +2717,9 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family,
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
sizes[i] = 0;
if (include_flags & DB::SizeApproximationFlags::INCLUDE_FILES) {
sizes[i] += versions_->ApproximateSize(v, k1.Encode(), k2.Encode());
sizes[i] += versions_->ApproximateSize(
v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1,
/*for_compaction=*/false);
}
if (include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES) {
sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size;

@ -4827,7 +4827,7 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
// maintain state of where they first appear in the files.
uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start,
const Slice& end, int start_level,
int end_level) {
int end_level, bool for_compaction) {
// pre-condition
assert(v->cfd_->internal_comparator().Compare(start, end) <= 0);
@ -4848,7 +4848,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start,
if (!level) {
// level 0 data is sorted order, handle the use case explicitly
size += ApproximateSizeLevel0(v, files_brief, start, end);
size += ApproximateSizeLevel0(v, files_brief, start, end, for_compaction);
continue;
}
@ -4865,7 +4865,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start,
// inferred from the sorted order
for (uint64_t i = idx_start; i < files_brief.num_files; i++) {
uint64_t val;
val = ApproximateSize(v, files_brief.files[i], end);
val = ApproximateSize(v, files_brief.files[i], end, for_compaction);
if (!val) {
// the files after this will not have the range
break;
@ -4876,7 +4876,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start,
if (i == idx_start) {
// subtract the bytes needed to be scanned to get to the starting
// key
val = ApproximateSize(v, files_brief.files[i], start);
val = ApproximateSize(v, files_brief.files[i], start, for_compaction);
assert(size >= val);
size -= val;
}
@ -4889,13 +4889,16 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start,
uint64_t VersionSet::ApproximateSizeLevel0(Version* v,
const LevelFilesBrief& files_brief,
const Slice& key_start,
const Slice& key_end) {
const Slice& key_end,
bool for_compaction) {
// level 0 files are not in sorted order, we need to iterate through
// the list to compute the total bytes that require scanning
uint64_t size = 0;
for (size_t i = 0; i < files_brief.num_files; i++) {
const uint64_t start = ApproximateSize(v, files_brief.files[i], key_start);
const uint64_t end = ApproximateSize(v, files_brief.files[i], key_end);
const uint64_t start =
ApproximateSize(v, files_brief.files[i], key_start, for_compaction);
const uint64_t end =
ApproximateSize(v, files_brief.files[i], key_end, for_compaction);
assert(end >= start);
size += end - start;
}
@ -4903,7 +4906,7 @@ uint64_t VersionSet::ApproximateSizeLevel0(Version* v,
}
uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
const Slice& key) {
const Slice& key, bool for_compaction) {
// pre-condition
assert(v);
@ -4923,7 +4926,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
*f.file_metadata, nullptr /* range_del_agg */,
v->GetMutableCFOptions().prefix_extractor.get(), &table_reader_ptr);
if (table_reader_ptr != nullptr) {
result = table_reader_ptr->ApproximateOffsetOf(key);
result = table_reader_ptr->ApproximateOffsetOf(key, for_compaction);
}
delete iter;
}

@ -982,7 +982,7 @@ class VersionSet {
// in levels [start_level, end_level). If end_level == 0 it will search
// through all non-empty levels
uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end,
int start_level = 0, int end_level = -1);
int start_level, int end_level, bool for_compaction);
// Return the size of the current manifest file
uint64_t manifest_file_size() const { return manifest_file_size_; }
@ -1032,10 +1032,11 @@ class VersionSet {
// ApproximateSize helper
uint64_t ApproximateSizeLevel0(Version* v, const LevelFilesBrief& files_brief,
const Slice& start, const Slice& end);
const Slice& start, const Slice& end,
bool for_compaction);
uint64_t ApproximateSize(Version* v, const FdWithKeyRange& f,
const Slice& key);
const Slice& key, bool for_compaction);
// Save current contents to *log
Status WriteSnapshot(log::Writer* log);

@ -187,7 +187,8 @@ BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
bool BlockBasedFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool /*no_io*/,
const Slice* const /*const_ikey_ptr*/) {
const Slice* const /*const_ikey_ptr*/,
BlockCacheLookupContext* /*context*/) {
assert(block_offset != kNotValid);
if (!whole_key_filtering_) {
return true;
@ -198,7 +199,8 @@ bool BlockBasedFilterBlockReader::KeyMayMatch(
bool BlockBasedFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool /*no_io*/,
const Slice* const /*const_ikey_ptr*/) {
const Slice* const /*const_ikey_ptr*/,
BlockCacheLookupContext* /*context*/) {
assert(block_offset != kNotValid);
return MayMatch(prefix, block_offset);
}

@ -82,17 +82,18 @@ class BlockBasedFilterBlockReader : public FilterBlockReader {
const BlockBasedTableOptions& table_opt,
bool whole_key_filtering,
BlockContents&& contents, Statistics* statistics);
virtual bool IsBlockBased() override { return true; }
virtual bool KeyMayMatch(
const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual bool PrefixMayMatch(
const Slice& prefix, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual size_t ApproximateMemoryUsage() const override;
bool IsBlockBased() override { return true; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
size_t ApproximateMemoryUsage() const override;
// convert this object to a human readable form
std::string ToString() const override;

@ -57,8 +57,12 @@ TEST_F(FilterBlockTest, EmptyBuilder) {
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data));
BlockBasedFilterBlockReader reader(nullptr, table_options_, true,
std::move(block), nullptr);
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, uint64_t{0}));
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100000));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
TEST_F(FilterBlockTest, SingleChunk) {
@ -76,13 +80,27 @@ TEST_F(FilterBlockTest, SingleChunk) {
BlockContents block(builder.Finish());
BlockBasedFilterBlockReader reader(nullptr, table_options_, true,
std::move(block), nullptr);
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100));
ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr, 100));
ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 100));
ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr, 100));
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100));
ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr, 100));
ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr, 100));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
TEST_F(FilterBlockTest, MultiChunk) {
@ -110,28 +128,60 @@ TEST_F(FilterBlockTest, MultiChunk) {
std::move(block), nullptr);
// Check first filter
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, uint64_t{0}));
ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr, 2000));
ASSERT_TRUE(!reader.KeyMayMatch("box", nullptr, uint64_t{0}));
ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, uint64_t{0}));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check second filter
ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 3100));
ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 3100));
ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 3100));
ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, 3100));
ASSERT_TRUE(reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check third filter (empty)
ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 4100));
ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 4100));
ASSERT_TRUE(!reader.KeyMayMatch("box", nullptr, 4100));
ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, 4100));
ASSERT_TRUE(!reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check last filter
ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 9000));
ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr, 9000));
ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 9000));
ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 9000));
ASSERT_TRUE(reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
// Test for block based filter block
@ -154,8 +204,12 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) {
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data));
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
nullptr, table_options_, true, std::move(block), nullptr);
ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, uint64_t{0}));
ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100000));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
delete builder;
delete reader;
@ -175,13 +229,27 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) {
BlockContents block(builder->Finish());
FilterBlockReader* reader = new BlockBasedFilterBlockReader(
nullptr, table_options_, true, std::move(block), nullptr);
ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100));
ASSERT_TRUE(reader->KeyMayMatch("bar", nullptr, 100));
ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 100));
ASSERT_TRUE(reader->KeyMayMatch("hello", nullptr, 100));
ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100));
ASSERT_TRUE(!reader->KeyMayMatch("missing", nullptr, 100));
ASSERT_TRUE(!reader->KeyMayMatch("other", nullptr, 100));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
delete builder;
delete reader;
@ -213,28 +281,60 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) {
nullptr, table_options_, true, std::move(block), nullptr);
// Check first filter
ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, uint64_t{0}));
ASSERT_TRUE(reader->KeyMayMatch("bar", nullptr, 2000));
ASSERT_TRUE(!reader->KeyMayMatch("box", nullptr, uint64_t{0}));
ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, uint64_t{0}));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check second filter
ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 3100));
ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 3100));
ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 3100));
ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, 3100));
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check third filter (empty)
ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 4100));
ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 4100));
ASSERT_TRUE(!reader->KeyMayMatch("box", nullptr, 4100));
ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, 4100));
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
// Check last filter
ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 9000));
ASSERT_TRUE(reader->KeyMayMatch("hello", nullptr, 9000));
ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 9000));
ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 9000));
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
delete builder;
delete reader;

@ -178,6 +178,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options,
GetContext* get_context,
BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block);
const BlockBasedTable* table() const { return table_; }
@ -211,6 +212,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
Status GetOrReadIndexBlock(const ReadOptions& read_options,
GetContext* get_context,
BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block) const;
size_t ApproximateIndexBlockMemoryUsage() const {
@ -228,6 +230,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, GetContext* get_context,
BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block) {
PERF_TIMER_GUARD(read_index_block_nanos);
@ -241,13 +244,14 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
const Status s = table->RetrieveBlock(
prefetch_buffer, read_options, rep->footer.index_handle(),
UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex,
get_context);
get_context, lookup_context);
return s;
}
Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock(
const ReadOptions& read_options, GetContext* get_context,
BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block) const {
assert(index_block != nullptr);
@ -256,8 +260,8 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock(
return Status::OK();
}
return ReadIndexBlock(table_, nullptr /* prefetch_buffer */, read_options,
get_context, index_block);
return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options,
get_context, lookup_context, index_block);
}
// Index that allows binary search lookup in a two-level index structure.
@ -269,7 +273,8 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
// unmodified.
static Status Create(const BlockBasedTable* table,
FilePrefetchBuffer* prefetch_buffer, bool use_cache,
bool prefetch, bool pin, IndexReader** index_reader) {
bool prefetch, bool pin, IndexReader** index_reader,
BlockCacheLookupContext* lookup_context) {
assert(table != nullptr);
assert(table->get_rep());
assert(!pin || prefetch);
@ -277,8 +282,9 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block;
if (prefetch || !use_cache) {
const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, &index_block);
const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
/*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) {
return s;
}
@ -296,10 +302,11 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
// return a two-level iterator: first level is on the partition index
InternalIteratorBase<BlockHandle>* NewIterator(
const ReadOptions& read_options, bool /* disable_prefix_seek */,
IndexBlockIter* iter, GetContext* get_context) override {
IndexBlockIter* iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override {
CachableEntry<Block> index_block;
const Status s =
GetOrReadIndexBlock(read_options, get_context, &index_block);
const Status s = GetOrReadIndexBlock(read_options, get_context,
lookup_context, &index_block);
if (!s.ok()) {
if (iter != nullptr) {
iter->Invalidate(s);
@ -352,6 +359,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
void CacheDependencies(bool pin) override {
// Before read partitions, prefetch them to avoid lots of IOs
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch};
auto rep = table()->rep_;
IndexBlockIter biter;
BlockHandle handle;
@ -359,7 +367,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block;
Status s = GetOrReadIndexBlock(ReadOptions(), nullptr /* get_context */,
&index_block);
&lookup_context, &index_block);
if (!s.ok()) {
ROCKS_LOG_WARN(rep->ioptions.info_log,
"Error retrieving top-level index block while trying to "
@ -408,7 +416,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
// filter blocks
s = table()->MaybeReadBlockAndLoadToCache(
prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
&block, BlockType::kIndex, nullptr /* get_context */);
&block, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context);
assert(s.ok() || block.GetValue() == nullptr);
if (s.ok() && block.GetValue() != nullptr) {
@ -451,7 +459,8 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
// unmodified.
static Status Create(const BlockBasedTable* table,
FilePrefetchBuffer* prefetch_buffer, bool use_cache,
bool prefetch, bool pin, IndexReader** index_reader) {
bool prefetch, bool pin, IndexReader** index_reader,
BlockCacheLookupContext* lookup_context) {
assert(table != nullptr);
assert(table->get_rep());
assert(!pin || prefetch);
@ -459,8 +468,9 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block;
if (prefetch || !use_cache) {
const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, &index_block);
const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
/*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) {
return s;
}
@ -477,10 +487,11 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
InternalIteratorBase<BlockHandle>* NewIterator(
const ReadOptions& read_options, bool /* disable_prefix_seek */,
IndexBlockIter* iter, GetContext* get_context) override {
IndexBlockIter* iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override {
CachableEntry<Block> index_block;
const Status s =
GetOrReadIndexBlock(read_options, get_context, &index_block);
const Status s = GetOrReadIndexBlock(read_options, get_context,
lookup_context, &index_block);
if (!s.ok()) {
if (iter != nullptr) {
iter->Invalidate(s);
@ -526,7 +537,8 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
static Status Create(const BlockBasedTable* table,
FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_index_iter, bool use_cache,
bool prefetch, bool pin, IndexReader** index_reader) {
bool prefetch, bool pin, IndexReader** index_reader,
BlockCacheLookupContext* lookup_context) {
assert(table != nullptr);
assert(index_reader != nullptr);
assert(!pin || prefetch);
@ -536,8 +548,9 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block;
if (prefetch || !use_cache) {
const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, &index_block);
const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(),
/*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) {
return s;
}
@ -616,10 +629,11 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
InternalIteratorBase<BlockHandle>* NewIterator(
const ReadOptions& read_options, bool disable_prefix_seek,
IndexBlockIter* iter, GetContext* get_context) override {
IndexBlockIter* iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override {
CachableEntry<Block> index_block;
const Status s =
GetOrReadIndexBlock(read_options, get_context, &index_block);
const Status s = GetOrReadIndexBlock(read_options, get_context,
lookup_context, &index_block);
if (!s.ok()) {
if (iter != nullptr) {
iter->Invalidate(s);
@ -1055,6 +1069,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
// raw pointer will be used to create HashIndexReader, whose reset may
// access a dangling pointer.
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch};
Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
internal_comparator, skip_filters, level,
immortal_table);
@ -1095,13 +1110,13 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
return s;
}
s = new_table->ReadRangeDelBlock(prefetch_buffer.get(), meta_iter.get(),
internal_comparator);
internal_comparator, &lookup_context);
if (!s.ok()) {
return s;
}
s = new_table->PrefetchIndexAndFilterBlocks(
prefetch_buffer.get(), meta_iter.get(), new_table.get(), prefetch_all,
table_options, level);
table_options, level, &lookup_context);
if (s.ok()) {
// Update tail prefetch stats
@ -1304,7 +1319,8 @@ Status BlockBasedTable::ReadPropertiesBlock(
Status BlockBasedTable::ReadRangeDelBlock(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter,
const InternalKeyComparator& internal_comparator) {
const InternalKeyComparator& internal_comparator,
BlockCacheLookupContext* lookup_context) {
Status s;
bool found_range_del_block;
BlockHandle range_del_handle;
@ -1317,10 +1333,10 @@ Status BlockBasedTable::ReadRangeDelBlock(
} else if (found_range_del_block && !range_del_handle.IsNull()) {
ReadOptions read_options;
std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
read_options, range_del_handle, nullptr /* input_iter */,
BlockType::kRangeDeletion, true /* key_includes_seq */,
true /* index_key_is_full */, nullptr /* get_context */, Status(),
prefetch_buffer));
read_options, range_del_handle,
/*input_iter=*/nullptr, BlockType::kRangeDeletion,
/*key_includes_seq=*/true, /*index_key_is_full=*/true,
/*get_context=*/nullptr, lookup_context, Status(), prefetch_buffer));
assert(iter != nullptr);
s = iter->status();
if (!s.ok()) {
@ -1370,7 +1386,8 @@ Status BlockBasedTable::ReadCompressionDictBlock(
Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter,
BlockBasedTable* new_table, bool prefetch_all,
const BlockBasedTableOptions& table_options, const int level) {
const BlockBasedTableOptions& table_options, const int level,
BlockCacheLookupContext* lookup_context) {
Status s;
// Find filter handle and filter type
@ -1440,7 +1457,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
IndexReader* index_reader = nullptr;
if (s.ok()) {
s = new_table->CreateIndexReader(prefetch_buffer, meta_iter, use_cache,
prefetch_index, pin_index, &index_reader);
prefetch_index, pin_index, &index_reader,
lookup_context);
if (s.ok()) {
assert(index_reader != nullptr);
rep_->index_reader.reset(index_reader);
@ -1467,7 +1485,9 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
if (s.ok() && prefetch_filter) {
// Hack: Call GetFilter() to implicitly add filter to the block_cache
auto filter_entry =
new_table->GetFilter(rep_->table_prefix_extractor.get());
new_table->GetFilter(rep_->table_prefix_extractor.get(),
/*prefetch_buffer=*/nullptr, /*no_io=*/false,
/*get_context=*/nullptr, lookup_context);
if (filter_entry.GetValue() != nullptr && prefetch_all) {
filter_entry.GetValue()->CacheDependencies(
pin_all, rep_->table_prefix_extractor.get());
@ -1653,8 +1673,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
size_t charge = block_holder->ApproximateMemoryUsage();
Cache::Handle* cache_handle = nullptr;
s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
&DeleteCachedEntry<Block>,
&cache_handle);
&DeleteCachedEntry<Block>, &cache_handle);
#ifndef NDEBUG
block_cache->TEST_mark_as_data_block(block_cache_key, charge);
#endif // NDEBUG
@ -1758,8 +1777,7 @@ Status BlockBasedTable::PutDataBlockToCache(
size_t charge = block_holder->ApproximateMemoryUsage();
Cache::Handle* cache_handle = nullptr;
s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
&DeleteCachedEntry<Block>,
&cache_handle, priority);
&DeleteCachedEntry<Block>, &cache_handle, priority);
#ifndef NDEBUG
block_cache->TEST_mark_as_data_block(block_cache_key, charge);
#endif // NDEBUG
@ -1849,25 +1867,28 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
const SliceTransform* prefix_extractor, FilePrefetchBuffer* prefetch_buffer,
bool no_io, GetContext* get_context) const {
bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const {
const BlockHandle& filter_blk_handle = rep_->filter_handle;
const bool is_a_filter_partition = true;
return GetFilter(prefetch_buffer, filter_blk_handle, !is_a_filter_partition,
no_io, get_context, prefix_extractor);
no_io, get_context, lookup_context, prefix_extractor);
}
CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
const bool is_a_filter_partition, bool no_io, GetContext* get_context,
BlockCacheLookupContext* /*lookup_context*/,
const SliceTransform* prefix_extractor) const {
// TODO(haoyu): Trace filter block access here.
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
// read fails at Open() time. We don't want to reload again since it will
// most probably fail again.
if (!is_a_filter_partition &&
!rep_->table_options.cache_index_and_filter_blocks) {
return {rep_->filter.get(), nullptr /* cache */,
nullptr /* cache_handle */, false /* own_value */};
return {rep_->filter.get(), /*cache=*/nullptr, /*cache_handle=*/nullptr,
/*own_value=*/false};
}
Cache* block_cache = rep_->table_options.block_cache.get();
@ -1877,8 +1898,8 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
}
if (!is_a_filter_partition && rep_->filter_entry.IsCached()) {
return {rep_->filter_entry.GetValue(), nullptr /* cache */,
nullptr /* cache_handle */, false /* own_value */};
return {rep_->filter_entry.GetValue(), /*cache=*/nullptr,
/*cache_handle=*/nullptr, /*own_value=*/false};
}
PERF_TIMER_GUARD(read_filter_block_nanos);
@ -1920,12 +1941,13 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
}
return {filter, cache_handle ? block_cache : nullptr, cache_handle,
false /* own_value */};
/*own_value=*/false};
}
CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
FilePrefetchBuffer* prefetch_buffer, bool no_io,
GetContext* get_context) const {
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
BlockCacheLookupContext* /*lookup_context*/) const {
// TODO(haoyu): Trace the access on the uncompression dictionary here.
if (!rep_->table_options.cache_index_and_filter_blocks) {
// block cache is either disabled or not used for meta-blocks. In either
// case, BlockBasedTableReader is the owner of the uncompression dictionary.
@ -1987,14 +2009,16 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
// differs from the one in mutable_cf_options and index type is HashBasedIndex
InternalIteratorBase<BlockHandle>* BlockBasedTable::NewIndexIterator(
const ReadOptions& read_options, bool disable_prefix_seek,
IndexBlockIter* input_iter, GetContext* get_context) const {
IndexBlockIter* input_iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const {
assert(rep_ != nullptr);
assert(rep_->index_reader != nullptr);
// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
return rep_->index_reader->NewIterator(read_options, disable_prefix_seek,
input_iter, get_context);
input_iter, get_context,
lookup_context);
}
// Convert an index iterator value (i.e., an encoded BlockHandle)
@ -2005,7 +2029,7 @@ template <typename TBlockIter>
TBlockIter* BlockBasedTable::NewDataBlockIterator(
const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
BlockType block_type, bool key_includes_seq, bool index_key_is_full,
GetContext* get_context, Status s,
GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s,
FilePrefetchBuffer* prefetch_buffer) const {
PERF_TIMER_GUARD(new_table_block_iter_nanos);
@ -2017,7 +2041,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
const bool no_io = (ro.read_tier == kBlockCacheTier);
auto uncompression_dict_storage =
GetUncompressionDict(prefetch_buffer, no_io, get_context);
GetUncompressionDict(prefetch_buffer, no_io, get_context, lookup_context);
const UncompressionDict& uncompression_dict =
uncompression_dict_storage.GetValue() == nullptr
? UncompressionDict::GetEmptyDict()
@ -2025,7 +2049,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
CachableEntry<Block> block;
s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block,
block_type, get_context);
block_type, get_context, lookup_context);
if (!s.ok()) {
assert(block.IsEmpty());
@ -2093,7 +2117,9 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context) const {
GetContext* get_context,
BlockCacheLookupContext* /*lookup_context*/) const {
// TODO(haoyu): Trace data/index/range deletion block access here.
assert(block_entry != nullptr);
const bool no_io = (ro.read_tier == kBlockCacheTier);
Cache* block_cache = rep_->table_options.block_cache.get();
@ -2169,7 +2195,7 @@ Status BlockBasedTable::RetrieveBlock(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context) const {
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
assert(block_entry);
assert(block_entry->IsEmpty());
@ -2180,7 +2206,7 @@ Status BlockBasedTable::RetrieveBlock(
block_type != BlockType::kIndex)) {
s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle,
uncompression_dict, block_entry,
block_type, get_context);
block_type, get_context, lookup_context);
if (!s.ok()) {
return s;
@ -2271,7 +2297,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
bool BlockBasedTable::PrefixMayMatch(
const Slice& internal_key, const ReadOptions& read_options,
const SliceTransform* options_prefix_extractor,
const bool need_upper_bound_check) const {
const bool need_upper_bound_check,
BlockCacheLookupContext* lookup_context) const {
if (!rep_->filter_policy) {
return true;
}
@ -2295,7 +2322,9 @@ bool BlockBasedTable::PrefixMayMatch(
Status s;
// First, try check with full filter
auto filter_entry = GetFilter(prefix_extractor);
auto filter_entry =
GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, /*no_io=*/false,
/*get_context=*/nullptr, lookup_context);
FilterBlockReader* filter = filter_entry.GetValue();
bool filter_checked = true;
if (filter != nullptr) {
@ -2304,7 +2333,7 @@ bool BlockBasedTable::PrefixMayMatch(
may_match = filter->RangeMayExist(
read_options.iterate_upper_bound, user_key, prefix_extractor,
rep_->internal_comparator.user_comparator(), const_ikey_ptr,
&filter_checked, need_upper_bound_check);
&filter_checked, need_upper_bound_check, lookup_context);
} else {
// if prefix_extractor changed for block based filter, skip filter
if (need_upper_bound_check) {
@ -2323,9 +2352,10 @@ bool BlockBasedTable::PrefixMayMatch(
// Then, try find it within each block
// we already know prefix_extractor and prefix_extractor_name must match
// because `CheckPrefixMayMatch` first checks `check_filter_ == true`
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter(
NewIndexIterator(no_io_read_options,
/* need_upper_bound_check */ false));
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter(NewIndexIterator(
no_io_read_options,
/*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
/*need_upper_bound_check=*/nullptr, lookup_context));
iiter->Seek(internal_prefix);
if (!iiter->Valid()) {
@ -2357,8 +2387,9 @@ bool BlockBasedTable::PrefixMayMatch(
// possibly contain the key. Thus, the corresponding data block
// is the only on could potentially contain the prefix.
BlockHandle handle = iiter->value();
may_match =
filter->PrefixMayMatch(prefix, prefix_extractor, handle.offset());
may_match = filter->PrefixMayMatch(
prefix, prefix_extractor, handle.offset(), /*no_io=*/false,
/*const_key_ptr=*/nullptr, lookup_context);
}
}
}
@ -2588,7 +2619,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
table_->NewDataBlockIterator<TBlockIter>(
read_options_, data_block_handle, &block_iter_, block_type_,
key_includes_seq_, index_key_is_full_,
/* get_context */ nullptr, s, prefetch_buffer_.get());
/*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get());
block_iter_points_to_real_block_ = true;
if (read_options_.iterate_upper_bound != nullptr) {
data_block_within_upper_bound_ =
@ -2682,6 +2713,9 @@ void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
InternalIterator* BlockBasedTable::NewIterator(
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
Arena* arena, bool skip_filters, bool for_compaction) {
BlockCacheLookupContext lookup_context{
for_compaction ? BlockCacheLookupCaller::kCompaction
: BlockCacheLookupCaller::kUserIterator};
bool need_upper_bound_check =
PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
if (arena == nullptr) {
@ -2690,7 +2724,8 @@ InternalIterator* BlockBasedTable::NewIterator(
NewIndexIterator(
read_options,
need_upper_bound_check &&
rep_->index_type == BlockBasedTableOptions::kHashSearch),
rep_->index_type == BlockBasedTableOptions::kHashSearch,
/*input_iter=*/nullptr, /*get_context=*/nullptr, &lookup_context),
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, BlockType::kData,
@ -2700,7 +2735,9 @@ InternalIterator* BlockBasedTable::NewIterator(
arena->AllocateAligned(sizeof(BlockBasedTableIterator<DataBlockIter>));
return new (mem) BlockBasedTableIterator<DataBlockIter>(
this, read_options, rep_->internal_comparator,
NewIndexIterator(read_options, need_upper_bound_check),
NewIndexIterator(read_options, need_upper_bound_check,
/*input_iter=*/nullptr, /*get_context=*/nullptr,
&lookup_context),
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, BlockType::kData,
@ -2724,7 +2761,8 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
bool BlockBasedTable::FullFilterKeyMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter,
const Slice& internal_key, const bool no_io,
const SliceTransform* prefix_extractor) const {
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) {
return true;
}
@ -2735,15 +2773,16 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
size_t ts_sz =
rep_->internal_comparator.user_comparator()->timestamp_size();
Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
may_match = filter->KeyMayMatch(user_key_without_ts, prefix_extractor,
kNotValid, no_io, const_ikey_ptr);
may_match =
filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
no_io, const_ikey_ptr, lookup_context);
} else if (!read_options.total_order_seek && prefix_extractor &&
rep_->table_properties->prefix_extractor_name.compare(
prefix_extractor->Name()) == 0 &&
prefix_extractor->InDomain(user_key) &&
!filter->PrefixMayMatch(prefix_extractor->Transform(user_key),
prefix_extractor, kNotValid, false,
const_ikey_ptr)) {
const_ikey_ptr, lookup_context)) {
may_match = false;
}
if (may_match) {
@ -2756,12 +2795,14 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
void BlockBasedTable::FullFilterKeysMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter,
MultiGetRange* range, const bool no_io,
const SliceTransform* prefix_extractor) const {
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) {
return;
}
if (filter->whole_key_filtering()) {
filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io);
filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io,
lookup_context);
} else if (!read_options.total_order_seek && prefix_extractor &&
rep_->table_properties->prefix_extractor_name.compare(
prefix_extractor->Name()) == 0) {
@ -2772,7 +2813,8 @@ void BlockBasedTable::FullFilterKeysMayMatch(
range->SkipKey(iter);
}
}
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false);
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
lookup_context);
}
}
@ -2786,18 +2828,19 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
CachableEntry<FilterBlockReader> filter_entry;
bool may_match;
FilterBlockReader* filter = nullptr;
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kUserGet};
{
if (!skip_filters) {
filter_entry =
GetFilter(prefix_extractor, /*prefetch_buffer*/ nullptr,
read_options.read_tier == kBlockCacheTier, get_context);
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
read_options.read_tier == kBlockCacheTier,
get_context, &lookup_context);
}
filter = filter_entry.GetValue();
// First check the full filter
// If full filter not useful, Then go into each block
may_match = FullFilterKeyMayMatch(read_options, filter, key, no_io,
prefix_extractor);
prefix_extractor, &lookup_context);
}
if (!may_match) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
@ -2811,8 +2854,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
need_upper_bound_check = PrefixExtractorChanged(
rep_->table_properties.get(), prefix_extractor);
}
auto iiter = NewIndexIterator(read_options, need_upper_bound_check,
&iiter_on_stack, get_context);
auto iiter =
NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
get_context, &lookup_context);
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter_unique_ptr;
if (iiter != &iiter_on_stack) {
iiter_unique_ptr.reset(iiter);
@ -2828,7 +2872,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
bool not_exist_in_filter =
filter != nullptr && filter->IsBlockBased() == true &&
!filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz),
prefix_extractor, handle.offset(), no_io);
prefix_extractor, handle.offset(), no_io,
/*const_ikey_ptr=*/nullptr, &lookup_context);
if (not_exist_in_filter) {
// Not found
@ -2841,8 +2886,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
DataBlockIter biter;
NewDataBlockIterator<DataBlockIter>(
read_options, iiter->value(), &biter, BlockType::kData,
true /* key_includes_seq */, true /* index_key_is_full */,
get_context);
/*key_includes_seq=*/true,
/*index_key_is_full=*/true, get_context, &lookup_context,
/*s=*/Status(), /*prefetch_buffer*/ nullptr);
if (read_options.read_tier == kBlockCacheTier &&
biter.status().IsIncomplete()) {
@ -2907,6 +2953,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
const MultiGetRange* mget_range,
const SliceTransform* prefix_extractor,
bool skip_filters) {
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kUserMGet};
const bool no_io = read_options.read_tier == kBlockCacheTier;
CachableEntry<FilterBlockReader> filter_entry;
FilterBlockReader* filter = nullptr;
@ -2915,16 +2962,16 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
{
if (!skip_filters) {
// TODO: Figure out where the stats should go
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer*/ nullptr,
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
read_options.read_tier == kBlockCacheTier,
nullptr /*get_context*/);
/*get_context=*/nullptr, &lookup_context);
}
filter = filter_entry.GetValue();
// First check the full filter
// If full filter not useful, Then go into each block
FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io,
prefix_extractor);
prefix_extractor, &lookup_context);
}
if (skip_filters || !sst_file_range.empty()) {
IndexBlockIter iiter_on_stack;
@ -2937,7 +2984,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
}
auto iiter =
NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
sst_file_range.begin()->get_context);
sst_file_range.begin()->get_context, &lookup_context);
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter_unique_ptr;
if (iiter != &iiter_on_stack) {
iiter_unique_ptr.reset(iiter);
@ -2958,11 +3005,12 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
offset = iiter->value().offset();
biter.Invalidate(Status::OK());
NewDataBlockIterator<DataBlockIter>(
read_options, iiter->value(), &biter, BlockType::kData, false,
true /* key_includes_seq */, get_context);
read_options, iiter->value(), &biter, BlockType::kData,
/*key_includes_seq=*/false,
/*index_key_is_full=*/true, get_context, &lookup_context,
Status(), nullptr);
reusing_block = false;
}
if (read_options.read_tier == kBlockCacheTier &&
biter.status().IsIncomplete()) {
// couldn't get block from block_cache
@ -3040,9 +3088,11 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
if (begin && end && comparator.Compare(*begin, *end) > 0) {
return Status::InvalidArgument(*begin, *end);
}
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch};
IndexBlockIter iiter_on_stack;
auto iiter = NewIndexIterator(ReadOptions(), false, &iiter_on_stack);
auto iiter = NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
&iiter_on_stack, /*get_context=*/nullptr,
&lookup_context);
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter_unique_ptr;
if (iiter != &iiter_on_stack) {
iiter_unique_ptr =
@ -3077,7 +3127,12 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
// Load the block specified by the block_handle into the block cache
DataBlockIter biter;
NewDataBlockIterator<DataBlockIter>(ReadOptions(), block_handle, &biter);
NewDataBlockIterator<DataBlockIter>(
ReadOptions(), block_handle, &biter, /*type=*/BlockType::kData,
/*key_includes_seq=*/true, /*index_key_is_full=*/true,
/*get_context=*/nullptr, &lookup_context, Status(),
/*prefetch_buffer=*/nullptr);
if (!biter.status().ok()) {
// there was an unexpected error while pre-fetching
@ -3089,6 +3144,8 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
}
Status BlockBasedTable::VerifyChecksum() {
// TODO(haoyu): This function is called by external sst ingestion and the
// verify checksum public API. We don't log its block cache accesses for now.
Status s;
// Check Meta blocks
std::unique_ptr<Block> meta;
@ -3104,8 +3161,9 @@ Status BlockBasedTable::VerifyChecksum() {
}
// Check Data blocks
IndexBlockIter iiter_on_stack;
InternalIteratorBase<BlockHandle>* iiter =
NewIndexIterator(ReadOptions(), false, &iiter_on_stack);
InternalIteratorBase<BlockHandle>* iiter = NewIndexIterator(
ReadOptions(), /*need_upper_bound_check=*/false, &iiter_on_stack,
/*get_context=*/nullptr, /*lookup_contex=*/nullptr);
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter_unique_ptr;
if (iiter != &iiter_on_stack) {
iiter_unique_ptr =
@ -3199,8 +3257,9 @@ bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter(
NewIndexIterator(options));
std::unique_ptr<InternalIteratorBase<BlockHandle>> iiter(NewIndexIterator(
options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
/*get_context=*/nullptr, /*lookup_contex=*/nullptr));
iiter->Seek(key);
assert(iiter->Valid());
@ -3234,7 +3293,8 @@ BlockBasedTableOptions::IndexType BlockBasedTable::UpdateIndexType() {
Status BlockBasedTable::CreateIndexReader(
FilePrefetchBuffer* prefetch_buffer,
InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch,
bool pin, IndexReader** index_reader) {
bool pin, IndexReader** index_reader,
BlockCacheLookupContext* lookup_context) {
auto index_type_on_file = rep_->index_type;
// kHashSearch requires non-empty prefix_extractor but bypass checking
@ -3246,11 +3306,13 @@ Status BlockBasedTable::CreateIndexReader(
switch (index_type_on_file) {
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
return PartitionIndexReader::Create(this, prefetch_buffer, use_cache,
prefetch, pin, index_reader);
prefetch, pin, index_reader,
lookup_context);
}
case BlockBasedTableOptions::kBinarySearch: {
return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache,
prefetch, pin, index_reader);
prefetch, pin, index_reader,
lookup_context);
}
case BlockBasedTableOptions::kHashSearch: {
std::unique_ptr<Block> meta_guard;
@ -3264,14 +3326,16 @@ Status BlockBasedTable::CreateIndexReader(
ROCKS_LOG_WARN(rep_->ioptions.info_log,
"Unable to read the metaindex block."
" Fall back to binary search index.");
return BinarySearchIndexReader::Create(
this, prefetch_buffer, use_cache, prefetch, pin, index_reader);
return BinarySearchIndexReader::Create(this, prefetch_buffer,
use_cache, prefetch, pin,
index_reader, lookup_context);
}
meta_index_iter = meta_iter_guard.get();
}
return HashIndexReader::Create(this, prefetch_buffer, meta_index_iter,
use_cache, prefetch, pin, index_reader);
use_cache, prefetch, pin, index_reader,
lookup_context);
}
default: {
std::string error_message =
@ -3281,9 +3345,15 @@ Status BlockBasedTable::CreateIndexReader(
}
}
uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
bool for_compaction) {
BlockCacheLookupContext context(
for_compaction ? BlockCacheLookupCaller::kCompaction
: BlockCacheLookupCaller::kUserApproximateSize);
std::unique_ptr<InternalIteratorBase<BlockHandle>> index_iter(
NewIndexIterator(ReadOptions()));
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/nullptr, /*get_context=*/nullptr,
/*lookup_contex=*/&context));
index_iter->Seek(key);
uint64_t result;
@ -3319,7 +3389,9 @@ bool BlockBasedTable::TEST_IndexBlockInCache() const {
Status BlockBasedTable::GetKVPairsFromDataBlocks(
std::vector<KVPairBlock>* kv_pair_blocks) {
std::unique_ptr<InternalIteratorBase<BlockHandle>> blockhandles_iter(
NewIndexIterator(ReadOptions()));
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/nullptr, /*get_context=*/nullptr,
/*lookup_contex=*/nullptr));
Status s = blockhandles_iter->status();
if (!s.ok()) {
@ -3337,7 +3409,11 @@ Status BlockBasedTable::GetKVPairsFromDataBlocks(
std::unique_ptr<InternalIterator> datablock_iter;
datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
ReadOptions(), blockhandles_iter->value()));
ReadOptions(), blockhandles_iter->value(), /*input_iter=*/nullptr,
/*type=*/BlockType::kData,
/*key_includes_seq=*/true, /*index_key_is_full=*/true,
/*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(),
/*prefetch_buffer=*/nullptr));
s = datablock_iter->status();
if (!s.ok()) {
@ -3545,7 +3621,9 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
"Index Details:\n"
"--------------------------------------\n");
std::unique_ptr<InternalIteratorBase<BlockHandle>> blockhandles_iter(
NewIndexIterator(ReadOptions()));
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/nullptr, /*get_context=*/nullptr,
/*lookup_contex=*/nullptr));
Status s = blockhandles_iter->status();
if (!s.ok()) {
out_file->Append("Can not read Index Block \n\n");
@ -3594,7 +3672,9 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
std::unique_ptr<InternalIteratorBase<BlockHandle>> blockhandles_iter(
NewIndexIterator(ReadOptions()));
NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
/*input_iter=*/nullptr, /*get_context=*/nullptr,
/*lookup_contex=*/nullptr));
Status s = blockhandles_iter->status();
if (!s.ok()) {
out_file->Append("Can not read Index Block \n\n");
@ -3628,7 +3708,11 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
std::unique_ptr<InternalIterator> datablock_iter;
datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
ReadOptions(), blockhandles_iter->value()));
ReadOptions(), blockhandles_iter->value(), /*input_iter=*/nullptr,
/*type=*/BlockType::kData,
/*key_includes_seq=*/true, /*index_key_is_full=*/true,
/*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(),
/*prefetch_buffer=*/nullptr));
s = datablock_iter->status();
if (!s.ok()) {

@ -113,17 +113,22 @@ class BlockBasedTable : public TableReader {
bool PrefixMayMatch(const Slice& internal_key,
const ReadOptions& read_options,
const SliceTransform* options_prefix_extractor,
const bool need_upper_bound_check) const;
const bool need_upper_bound_check,
BlockCacheLookupContext* lookup_context) const;
// Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it).
// @param skip_filters Disables loading/accessing the filter block
InternalIterator* NewIterator(const ReadOptions&,
const SliceTransform* prefix_extractor,
Arena* arena = nullptr,
bool skip_filters = false,
bool for_compaction = false) override;
InternalIterator* NewIterator(
const ReadOptions&, const SliceTransform* prefix_extractor,
Arena* arena = nullptr, bool skip_filters = false,
// TODO(haoyu) 1. External SST ingestion sets for_compaction as false. 2.
// Compaction also sets it to false when paranoid_file_checks is true,
// i.e., it will populate the block cache with blocks in the new SST
// files. We treat those as a user is calling iterator for now. We should
// differentiate the callers.
bool for_compaction = false) override;
FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
const ReadOptions& read_options) override;
@ -149,7 +154,7 @@ class BlockBasedTable : public TableReader {
// bytes, and so includes effects like compression of the underlying data.
// E.g., the approximate offset of the last key in the table will
// be close to the file length.
uint64_t ApproximateOffsetOf(const Slice& key) override;
uint64_t ApproximateOffsetOf(const Slice& key, bool for_compaction) override;
bool TEST_BlockInCache(const BlockHandle& handle) const;
@ -193,7 +198,8 @@ class BlockBasedTable : public TableReader {
// returned object.
virtual InternalIteratorBase<BlockHandle>* NewIterator(
const ReadOptions& read_options, bool disable_prefix_seek,
IndexBlockIter* iter, GetContext* get_context) = 0;
IndexBlockIter* iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) = 0;
// Report an approximation of how much memory has been used other than
// memory that was allocated in block cache.
@ -222,10 +228,10 @@ class BlockBasedTable : public TableReader {
template <typename TBlockIter>
TBlockIter* NewDataBlockIterator(
const ReadOptions& ro, const BlockHandle& block_handle,
TBlockIter* input_iter = nullptr, BlockType block_type = BlockType::kData,
bool key_includes_seq = true, bool index_key_is_full = true,
GetContext* get_context = nullptr, Status s = Status(),
FilePrefetchBuffer* prefetch_buffer = nullptr) const;
TBlockIter* input_iter, BlockType block_type, bool key_includes_seq,
bool index_key_is_full, GetContext* get_context,
BlockCacheLookupContext* lookup_context, Status s,
FilePrefetchBuffer* prefetch_buffer) const;
class PartitionedIndexIteratorState;
@ -262,7 +268,7 @@ class BlockBasedTable : public TableReader {
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context = nullptr) const;
GetContext* get_context, BlockCacheLookupContext* lookup_context) const;
// Similar to the above, with one crucial difference: it will retrieve the
// block from the file even if there are no caches configured (assuming the
@ -271,23 +277,25 @@ class BlockBasedTable : public TableReader {
const ReadOptions& ro, const BlockHandle& handle,
const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context) const;
GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
// For the following two functions:
// if `no_io == true`, we will not try to read filter/index from sst file
// were they not present in cache yet.
CachableEntry<FilterBlockReader> GetFilter(
const SliceTransform* prefix_extractor = nullptr,
FilePrefetchBuffer* prefetch_buffer = nullptr, bool no_io = false,
GetContext* get_context = nullptr) const;
const SliceTransform* prefix_extractor,
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
virtual CachableEntry<FilterBlockReader> GetFilter(
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
const bool is_a_filter_partition, bool no_io, GetContext* get_context,
const SliceTransform* prefix_extractor = nullptr) const;
BlockCacheLookupContext* lookup_context,
const SliceTransform* prefix_extractor) const;
CachableEntry<UncompressionDict> GetUncompressionDict(
FilePrefetchBuffer* prefetch_buffer, bool no_io,
GetContext* get_context) const;
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
// Get the iterator from the index reader.
// If input_iter is not set, return new Iterator
@ -300,9 +308,9 @@ class BlockBasedTable : public TableReader {
// 3. We disallowed any io to be performed, that is, read_options ==
// kBlockCacheTier
InternalIteratorBase<BlockHandle>* NewIndexIterator(
const ReadOptions& read_options, bool need_upper_bound_check = false,
IndexBlockIter* input_iter = nullptr,
GetContext* get_context = nullptr) const;
const ReadOptions& read_options, bool need_upper_bound_check,
IndexBlockIter* input_iter, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
// Read block cache from block caches (if set): block_cache and
// block_cache_compressed.
@ -352,17 +360,20 @@ class BlockBasedTable : public TableReader {
Status CreateIndexReader(FilePrefetchBuffer* prefetch_buffer,
InternalIterator* preloaded_meta_index_iter,
bool use_cache, bool prefetch, bool pin,
IndexReader** index_reader);
IndexReader** index_reader,
BlockCacheLookupContext* lookup_context);
bool FullFilterKeyMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter,
const Slice& user_key, const bool no_io,
const SliceTransform* prefix_extractor = nullptr) const;
bool FullFilterKeyMayMatch(const ReadOptions& read_options,
FilterBlockReader* filter, const Slice& user_key,
const bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const;
void FullFilterKeysMayMatch(
const ReadOptions& read_options, FilterBlockReader* filter,
MultiGetRange* range, const bool no_io,
const SliceTransform* prefix_extractor = nullptr) const;
void FullFilterKeysMayMatch(const ReadOptions& read_options,
FilterBlockReader* filter, MultiGetRange* range,
const bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const;
static Status PrefetchTail(
RandomAccessFileReader* file, uint64_t file_size,
@ -380,14 +391,16 @@ class BlockBasedTable : public TableReader {
const SequenceNumber largest_seqno);
Status ReadRangeDelBlock(FilePrefetchBuffer* prefetch_buffer,
InternalIterator* meta_iter,
const InternalKeyComparator& internal_comparator);
const InternalKeyComparator& internal_comparator,
BlockCacheLookupContext* lookup_context);
Status ReadCompressionDictBlock(
FilePrefetchBuffer* prefetch_buffer,
std::unique_ptr<const BlockContents>* compression_dict_block) const;
Status PrefetchIndexAndFilterBlocks(
FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter,
BlockBasedTable* new_table, bool prefetch_all,
const BlockBasedTableOptions& table_options, const int level);
const BlockBasedTableOptions& table_options, const int level,
BlockCacheLookupContext* lookup_context);
Status VerifyChecksumInMetaBlocks(InternalIteratorBase<Slice>* index_iter);
Status VerifyChecksumInBlocks(InternalIteratorBase<BlockHandle>* index_iter);
@ -583,7 +596,10 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
block_type_(block_type),
key_includes_seq_(key_includes_seq),
index_key_is_full_(index_key_is_full),
for_compaction_(for_compaction) {}
for_compaction_(for_compaction),
lookup_context_(for_compaction
? BlockCacheLookupCaller::kCompaction
: BlockCacheLookupCaller::kUserIterator) {}
~BlockBasedTableIterator() { delete index_iter_; }
@ -644,7 +660,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
bool CheckPrefixMayMatch(const Slice& ikey) {
if (check_filter_ &&
!table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_,
need_upper_bound_check_)) {
need_upper_bound_check_, &lookup_context_)) {
// TODO remember the iterator is invalidated because of prefix
// match. This can avoid the upper level file iterator to falsely
// believe the position is the end of the SST file and move to
@ -702,6 +718,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
// If this iterator is created for compaction
bool for_compaction_;
BlockHandle prev_index_value_;
BlockCacheLookupContext lookup_context_;
// All the below fields control iterator readahead
static const size_t kInitAutoReadaheadSize = 8 * 1024;

@ -30,6 +30,7 @@
#include "rocksdb/table.h"
#include "table/format.h"
#include "table/multiget_context.h"
#include "trace_replay/block_cache_tracer.h"
#include "util/hash.h"
namespace rocksdb {
@ -99,18 +100,19 @@ class FilterBlockReader {
*/
virtual bool KeyMayMatch(const Slice& key,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) = 0;
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) = 0;
virtual void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false) {
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* context) {
for (auto iter = range->begin(); iter != range->end(); ++iter) {
const Slice ukey = iter->ukey;
const Slice ikey = iter->ikey;
if (!KeyMayMatch(ukey, prefix_extractor, block_offset, no_io, &ikey)) {
if (!KeyMayMatch(ukey, prefix_extractor, block_offset, no_io, &ikey,
context)) {
range->SkipKey(iter);
}
}
@ -121,19 +123,19 @@ class FilterBlockReader {
*/
virtual bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) = 0;
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) = 0;
virtual void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false) {
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* context) {
for (auto iter = range->begin(); iter != range->end(); ++iter) {
const Slice ukey = iter->ukey;
const Slice ikey = iter->ikey;
if (!KeyMayMatch(prefix_extractor->Transform(ukey), prefix_extractor,
block_offset, no_io, &ikey)) {
block_offset, no_io, &ikey, context)) {
range->SkipKey(iter);
}
}
@ -156,13 +158,13 @@ class FilterBlockReader {
virtual bool RangeMayExist(
const Slice* /*iterate_upper_bound*/, const Slice& user_key,
const SliceTransform* prefix_extractor,
const Comparator* /*comparator*/, const Slice* const const_ikey_ptr,
bool* filter_checked, bool /*need_upper_bound_check*/) {
const SliceTransform* prefix_extractor, const Comparator* /*comparator*/,
const Slice* const const_ikey_ptr, bool* filter_checked,
bool /*need_upper_bound_check*/, BlockCacheLookupContext* context) {
*filter_checked = true;
Slice prefix = prefix_extractor->Transform(user_key);
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false,
const_ikey_ptr);
const_ikey_ptr, context);
}
protected:

@ -124,7 +124,8 @@ FullFilterBlockReader::FullFilterBlockReader(
bool FullFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* /*prefix_extractor*/,
uint64_t block_offset, const bool /*no_io*/,
const Slice* const /*const_ikey_ptr*/) {
const Slice* const /*const_ikey_ptr*/,
BlockCacheLookupContext* /*context*/) {
#ifdef NDEBUG
(void)block_offset;
#endif
@ -138,7 +139,8 @@ bool FullFilterBlockReader::KeyMayMatch(
bool FullFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool /*no_io*/,
const Slice* const /*const_ikey_ptr*/) {
const Slice* const /*const_ikey_ptr*/,
BlockCacheLookupContext* /*context*/) {
#ifdef NDEBUG
(void)block_offset;
#endif
@ -161,7 +163,8 @@ bool FullFilterBlockReader::MayMatch(const Slice& entry) {
void FullFilterBlockReader::KeysMayMatch(
MultiGetRange* range, const SliceTransform* /*prefix_extractor*/,
uint64_t block_offset, const bool /*no_io*/) {
uint64_t block_offset, const bool /*no_io*/,
BlockCacheLookupContext* /*context*/) {
#ifdef NDEBUG
(void)range;
(void)block_offset;
@ -177,7 +180,8 @@ void FullFilterBlockReader::KeysMayMatch(
void FullFilterBlockReader::PrefixesMayMatch(
MultiGetRange* range, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool /*no_io*/) {
uint64_t block_offset, const bool /*no_io*/,
BlockCacheLookupContext* /*context*/) {
#ifdef NDEBUG
(void)range;
(void)block_offset;
@ -224,10 +228,11 @@ size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
return usage;
}
bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound,
const Slice& user_key, const SliceTransform* prefix_extractor,
const Comparator* comparator, const Slice* const const_ikey_ptr,
bool* filter_checked, bool need_upper_bound_check) {
bool FullFilterBlockReader::RangeMayExist(
const Slice* iterate_upper_bound, const Slice& user_key,
const SliceTransform* prefix_extractor, const Comparator* comparator,
const Slice* const const_ikey_ptr, bool* filter_checked,
bool need_upper_bound_check, BlockCacheLookupContext* context) {
if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) {
*filter_checked = false;
return true;
@ -240,7 +245,7 @@ bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound,
} else {
*filter_checked = true;
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false,
const_ikey_ptr);
const_ikey_ptr, context);
}
}

@ -95,35 +95,38 @@ class FullFilterBlockReader : public FilterBlockReader {
// bits_reader is created in filter_policy, it should be passed in here
// directly. and be deleted here
~FullFilterBlockReader() {}
~FullFilterBlockReader() override {}
bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* context) override;
void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* context) override;
size_t ApproximateMemoryUsage() const override;
bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
const SliceTransform* prefix_extractor,
const Comparator* comparator,
const Slice* const const_ikey_ptr, bool* filter_checked,
bool need_upper_bound_check,
BlockCacheLookupContext* context) override;
virtual bool IsBlockBased() override { return false; }
virtual bool KeyMayMatch(
const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual bool PrefixMayMatch(
const Slice& prefix, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false) override;
virtual void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid,
const bool no_io = false) override;
virtual size_t ApproximateMemoryUsage() const override;
virtual bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
const SliceTransform* prefix_extractor,
const Comparator* comparator,
const Slice* const const_ikey_ptr, bool* filter_checked,
bool need_upper_bound_check) override;
private:
const SliceTransform* prefix_extractor_;
Slice contents_;

@ -112,7 +112,9 @@ TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) {
nullptr, true, block,
table_options_.filter_policy->GetFilterBitsReader(block), nullptr);
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) {
@ -127,13 +129,27 @@ TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) {
FullFilterBlockReader reader(
nullptr, true, block,
table_options_.filter_policy->GetFilterBitsReader(block), nullptr);
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr));
ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
class FullFilterBlockTest : public testing::Test {
@ -157,7 +173,9 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
nullptr, true, block,
table_options_.filter_policy->GetFilterBitsReader(block), nullptr);
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
TEST_F(FullFilterBlockTest, DuplicateEntries) {
@ -207,13 +225,27 @@ TEST_F(FullFilterBlockTest, SingleChunk) {
FullFilterBlockReader reader(
nullptr, true, block,
table_options_.filter_policy->GetFilterBitsReader(block), nullptr);
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr));
ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr));
ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr));
}
} // namespace rocksdb

@ -162,8 +162,8 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
bool PartitionedFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr) {
uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) {
assert(const_ikey_ptr != nullptr);
assert(block_offset == kNotValid);
if (!whole_key_filtering_) {
@ -177,19 +177,20 @@ bool PartitionedFilterBlockReader::KeyMayMatch(
return false;
}
auto filter_partition =
GetFilterPartition(nullptr /* prefetch_buffer */, filter_handle, no_io,
prefix_extractor);
GetFilterPartition(/*prefetch_buffer=*/nullptr, filter_handle, no_io,
prefix_extractor, context);
if (UNLIKELY(!filter_partition.GetValue())) {
return true;
}
return filter_partition.GetValue()->KeyMayMatch(key, prefix_extractor,
block_offset, no_io);
return filter_partition.GetValue()->KeyMayMatch(
key, prefix_extractor, block_offset, no_io, /*const_ikey_ptr=*/nullptr,
context);
}
bool PartitionedFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr) {
uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) {
#ifdef NDEBUG
(void)block_offset;
#endif
@ -206,13 +207,14 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
return false;
}
auto filter_partition =
GetFilterPartition(nullptr /* prefetch_buffer */, filter_handle, no_io,
prefix_extractor);
GetFilterPartition(/*prefetch_buffer=*/nullptr, filter_handle, no_io,
prefix_extractor, context);
if (UNLIKELY(!filter_partition.GetValue())) {
return true;
}
return filter_partition.GetValue()->PrefixMayMatch(prefix, prefix_extractor,
kNotValid, no_io);
return filter_partition.GetValue()->PrefixMayMatch(
prefix, prefix_extractor, kNotValid, no_io, /*const_ikey_ptr=*/nullptr,
context);
}
BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle(
@ -234,7 +236,8 @@ BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle(
CachableEntry<FilterBlockReader>
PartitionedFilterBlockReader::GetFilterPartition(
FilePrefetchBuffer* prefetch_buffer, BlockHandle& fltr_blk_handle,
const bool no_io, const SliceTransform* prefix_extractor) {
const bool no_io, const SliceTransform* prefix_extractor,
BlockCacheLookupContext* context) {
const bool is_a_filter_partition = true;
auto block_cache = table_->rep_->table_options.block_cache.get();
if (LIKELY(block_cache != nullptr)) {
@ -247,9 +250,10 @@ PartitionedFilterBlockReader::GetFilterPartition(
nullptr /* cache_handle */, false /* own_value */};
}
}
return table_->GetFilter(/*prefetch_buffer*/ nullptr, fltr_blk_handle,
return table_->GetFilter(/*prefetch_buffer=*/nullptr, fltr_blk_handle,
is_a_filter_partition, no_io,
/* get_context */ nullptr, prefix_extractor);
/*get_context=*/nullptr, context,
prefix_extractor);
} else {
auto filter = table_->ReadFilter(prefetch_buffer, fltr_blk_handle,
is_a_filter_partition, prefix_extractor);
@ -273,6 +277,7 @@ size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
void PartitionedFilterBlockReader::CacheDependencies(
bool pin, const SliceTransform* prefix_extractor) {
// Before read partitions, prefetch them to avoid lots of IOs
BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch};
IndexBlockIter biter;
Statistics* kNullStats = nullptr;
idx_on_fltr_blk_->NewIterator<IndexBlockIter>(
@ -304,7 +309,7 @@ void PartitionedFilterBlockReader::CacheDependencies(
const bool is_a_filter_partition = true;
auto filter = table_->GetFilter(
prefetch_buffer.get(), handle, is_a_filter_partition, !no_io,
/* get_context */ nullptr, prefix_extractor);
/*get_context=*/nullptr, &lookup_context, prefix_extractor);
if (LIKELY(filter.IsCached())) {
if (pin) {
filter_map_[handle.offset()] = std::move(filter);

@ -77,26 +77,28 @@ class PartitionedFilterBlockReader : public FilterBlockReader {
Statistics* stats, const InternalKeyComparator comparator,
const BlockBasedTable* table, const bool index_key_includes_seq,
const bool index_value_is_full);
virtual ~PartitionedFilterBlockReader();
~PartitionedFilterBlockReader() override;
virtual bool IsBlockBased() override { return false; }
virtual bool KeyMayMatch(
const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual bool PrefixMayMatch(
const Slice& prefix, const SliceTransform* prefix_extractor,
uint64_t block_offset = kNotValid, const bool no_io = false,
const Slice* const const_ikey_ptr = nullptr) override;
virtual size_t ApproximateMemoryUsage() const override;
bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
BlockCacheLookupContext* context) override;
size_t ApproximateMemoryUsage() const override;
private:
BlockHandle GetFilterPartitionHandle(const Slice& entry);
CachableEntry<FilterBlockReader> GetFilterPartition(
FilePrefetchBuffer* prefetch_buffer, BlockHandle& handle,
const bool no_io, const SliceTransform* prefix_extractor = nullptr);
virtual void CacheDependencies(
bool bin, const SliceTransform* prefix_extractor) override;
const bool no_io, const SliceTransform* prefix_extractor,
BlockCacheLookupContext* context);
void CacheDependencies(bool bin,
const SliceTransform* prefix_extractor) override;
const SliceTransform* prefix_extractor_;
std::unique_ptr<Block> idx_on_fltr_blk_;

@ -31,6 +31,7 @@ class MockedBlockBasedTable : public BlockBasedTable {
CachableEntry<FilterBlockReader> GetFilter(
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
const bool /* unused */, bool /* unused */, GetContext* /* unused */,
BlockCacheLookupContext* /*context*/,
const SliceTransform* prefix_extractor) const override {
Slice slice = slices[filter_blk_handle.offset()];
auto obj = new FullFilterBlockReader(
@ -168,14 +169,15 @@ class PartitionedFilterBlockTest
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
&ikey_slice));
&ikey_slice, /*context=*/nullptr));
}
{
// querying a key twice
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid,
!no_io, &ikey_slice));
!no_io, &ikey_slice,
/*context=*/nullptr));
}
// querying missing keys
for (auto key : missing_keys) {
@ -183,11 +185,13 @@ class PartitionedFilterBlockTest
const Slice ikey_slice = Slice(*ikey.rep());
if (empty) {
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
!no_io, &ikey_slice));
!no_io, &ikey_slice,
/*context=*/nullptr));
} else {
// assuming a good hash function
ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
!no_io, &ikey_slice));
!no_io, &ikey_slice,
/*context=*/nullptr));
}
}
}
@ -335,9 +339,9 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
for (auto key : pkeys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
prefix_extractor.get(), kNotValid,
false /*no_io*/, &ikey_slice));
ASSERT_TRUE(reader->PrefixMayMatch(
prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
/*no_io=*/false, &ikey_slice, /*context=*/nullptr));
}
}

@ -56,7 +56,10 @@ class CuckooTableReader: public TableReader {
size_t ApproximateMemoryUsage() const override;
// Following methods are not implemented for Cuckoo Table Reader
uint64_t ApproximateOffsetOf(const Slice& /*key*/) override { return 0; }
uint64_t ApproximateOffsetOf(const Slice& /*key*/,
bool /*for_compaction*/ = false) override {
return 0;
}
void SetupForCompaction() override {}
// End of methods not implemented.

@ -50,9 +50,12 @@ class MockTableReader : public TableReader {
GetContext* get_context, const SliceTransform* prefix_extractor,
bool skip_filters = false) override;
uint64_t ApproximateOffsetOf(const Slice& /*key*/) override { return 0; }
uint64_t ApproximateOffsetOf(const Slice& /*key*/,
bool /*for_compaction*/ = false) override {
return 0;
}
virtual size_t ApproximateMemoryUsage() const override { return 0; }
size_t ApproximateMemoryUsage() const override { return 0; }
void SetupForCompaction() override {}

@ -613,7 +613,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
return Status::OK();
}
uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/) {
uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/,
bool /*for_compaction*/) {
return 0;
}

@ -89,7 +89,8 @@ class PlainTableReader: public TableReader {
GetContext* get_context, const SliceTransform* prefix_extractor,
bool skip_filters = false) override;
uint64_t ApproximateOffsetOf(const Slice& key) override;
uint64_t ApproximateOffsetOf(const Slice& key,
bool for_compaction = false) override;
uint32_t GetIndexSize() const { return index_.GetIndexSize(); }
void SetupForCompaction() override;

@ -61,7 +61,8 @@ class TableReader {
// bytes, and so includes effects like compression of the underlying data.
// E.g., the approximate offset of the last key in the table will
// be close to the file length.
virtual uint64_t ApproximateOffsetOf(const Slice& key) = 0;
virtual uint64_t ApproximateOffsetOf(const Slice& key,
bool for_compaction = false) = 0;
// Set up the table for Compaction. Might change some parameters with
// posix_fadvise

@ -17,12 +17,38 @@ enum BlockCacheLookupCaller : char {
kUserGet = 1,
kUserMGet = 2,
kUserIterator = 3,
kPrefetch = 4,
kCompaction = 5,
kUserApproximateSize = 4,
kPrefetch = 5,
kCompaction = 6,
// All callers should be added before kMaxBlockCacheLookupCaller.
kMaxBlockCacheLookupCaller
};
// Lookup context for tracing block cache accesses.
// We trace block accesses at five places:
// 1. BlockBasedTable::GetFilter
// 2. BlockBasedTable::GetUncompressedDict.
// 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index,
// and range deletion block.)
// 4. BlockBasedTable::Get. (To trace the referenced key and whether the
// referenced key exists in a fetched data block.)
// 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the
// referenced key exists in a fetched data block.)
// The context is created at:
// 1. BlockBasedTable::Get. (kUserGet)
// 2. BlockBasedTable::MultiGet. (kUserMGet)
// 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or
// external SST ingestion calls this function.)
// 4. BlockBasedTable::Open. (kPrefetch)
// 5. Index/Filter::CacheDependencies. (kPrefetch)
// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
// kUserApproximateSize).
struct BlockCacheLookupContext {
BlockCacheLookupContext(const BlockCacheLookupCaller& _caller)
: caller(_caller) {}
const BlockCacheLookupCaller caller;
};
enum Boolean : char { kTrue = 1, kFalse = 0 };
struct BlockCacheTraceRecord {

Loading…
Cancel
Save