From 5efa0d6b0df1f3aea2ea8720c48c2b918b47ead1 Mon Sep 17 00:00:00 2001 From: haoyuhuang Date: Mon, 10 Jun 2019 15:30:05 -0700 Subject: [PATCH] Create a BlockCacheLookupContext to enable fine-grained block cache tracing. (#5421) Summary: BlockCacheLookupContext only contains the caller for now. We will trace block accesses at five places: 1. BlockBasedTable::GetFilter. 2. BlockBasedTable::GetUncompressedDict. 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index, and range deletion block.) 4. BlockBasedTable::Get. (To trace the referenced key and whether the referenced key exists in a fetched data block.) 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the referenced key exists in a fetched data block.) We create the context at: 1. BlockBasedTable::Get. (kUserGet) 2. BlockBasedTable::MultiGet. (kUserMGet) 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or external SST ingestion calls this function.) 4. BlockBasedTable::Open. (kPrefetch) 5. Index/Filter::CacheDependencies. (kPrefetch) 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or kUserApproximateSize). I loaded 1 million key-value pairs into the database and ran the readrandom benchmark with a single thread. I gave the block cache 10 GB to make sure all reads hit the block cache after warmup. The throughput is comparable. Throughput of this PR: 231334 ops/s. Throughput of the master branch: 238428 ops/s. Experiment setup: RocksDB: version 6.2 Date: Mon Jun 10 10:42:51 2019 CPU: 24 * Intel Core Processor (Skylake) CPUCache: 16384 KB Keys: 20 bytes each Values: 100 bytes each (100 bytes after compression) Entries: 1000000 Prefix: 20 bytes Keys per prefix: 0 RawSize: 114.4 MB (estimated) FileSize: 114.4 MB (estimated) Write rate: 0 bytes/second Read rate: 0 ops/second Compression: NoCompression Compression sampling rate: 0 Memtablerep: skip_list Perf Level: 1 Load command: ./db_bench --benchmarks="fillseq" --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 Run command: ./db_bench --benchmarks="readrandom,stats" --use_existing_db --threads=1 --duration=120 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 --duration=120 TODOs: 1. Create a caller for external SST file ingestion and differentiate the callers for iterator. 2. Integrate tracer to trace block cache accesses. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5421 Differential Revision: D15704258 Pulled By: HaoyuHuang fbshipit-source-id: 4aa8a55f8cb1576ffb367bfa3186a91d8f06d93a --- db/compaction/compaction_job.cc | 3 +- db/db_impl/db_impl.cc | 4 +- db/version_set.cc | 21 +- db/version_set.h | 7 +- table/block_based/block_based_filter_block.cc | 6 +- table/block_based/block_based_filter_block.h | 23 +- .../block_based_filter_block_test.cc | 200 +++++++++--- table/block_based/block_based_table_reader.cc | 300 +++++++++++------- table/block_based/block_based_table_reader.h | 89 +++--- table/block_based/filter_block.h | 34 +- table/block_based/full_filter_block.cc | 23 +- table/block_based/full_filter_block.h | 59 ++-- table/block_based/full_filter_block_test.cc | 64 +++- table/block_based/partitioned_filter_block.cc | 37 ++- table/block_based/partitioned_filter_block.h | 30 +- .../partitioned_filter_block_test.cc | 18 +- table/cuckoo/cuckoo_table_reader.h | 5 +- table/mock_table.h | 7 +- table/plain/plain_table_reader.cc | 3 +- table/plain/plain_table_reader.h | 3 +- table/table_reader.h | 3 +- trace_replay/block_cache_tracer.h | 30 +- 22 files changed, 634 insertions(+), 335 deletions(-) diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index ca8575a0d..65efedad5 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -520,7 +520,8 @@ void CompactionJob::GenSubcompactionBoundaries() { // to the index block and may incur I/O cost in the process. Unlock db // mutex to reduce contention db_mutex_->Unlock(); - uint64_t size = versions_->ApproximateSize(v, a, b, start_lvl, out_lvl + 1); + uint64_t size = versions_->ApproximateSize(v, a, b, start_lvl, out_lvl + 1, + /*for_compaction*/ true); db_mutex_->Lock(); ranges.emplace_back(a, b, size); sum += size; diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index bb6ec7db4..b1a828f9f 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2717,7 +2717,9 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); sizes[i] = 0; if (include_flags & DB::SizeApproximationFlags::INCLUDE_FILES) { - sizes[i] += versions_->ApproximateSize(v, k1.Encode(), k2.Encode()); + sizes[i] += versions_->ApproximateSize( + v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1, + /*for_compaction=*/false); } if (include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES) { sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size; diff --git a/db/version_set.cc b/db/version_set.cc index 96bf22e57..8895879bf 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4827,7 +4827,7 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { // maintain state of where they first appear in the files. uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, const Slice& end, int start_level, - int end_level) { + int end_level, bool for_compaction) { // pre-condition assert(v->cfd_->internal_comparator().Compare(start, end) <= 0); @@ -4848,7 +4848,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, if (!level) { // level 0 data is sorted order, handle the use case explicitly - size += ApproximateSizeLevel0(v, files_brief, start, end); + size += ApproximateSizeLevel0(v, files_brief, start, end, for_compaction); continue; } @@ -4865,7 +4865,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, // inferred from the sorted order for (uint64_t i = idx_start; i < files_brief.num_files; i++) { uint64_t val; - val = ApproximateSize(v, files_brief.files[i], end); + val = ApproximateSize(v, files_brief.files[i], end, for_compaction); if (!val) { // the files after this will not have the range break; @@ -4876,7 +4876,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, if (i == idx_start) { // subtract the bytes needed to be scanned to get to the starting // key - val = ApproximateSize(v, files_brief.files[i], start); + val = ApproximateSize(v, files_brief.files[i], start, for_compaction); assert(size >= val); size -= val; } @@ -4889,13 +4889,16 @@ uint64_t VersionSet::ApproximateSize(Version* v, const Slice& start, uint64_t VersionSet::ApproximateSizeLevel0(Version* v, const LevelFilesBrief& files_brief, const Slice& key_start, - const Slice& key_end) { + const Slice& key_end, + bool for_compaction) { // level 0 files are not in sorted order, we need to iterate through // the list to compute the total bytes that require scanning uint64_t size = 0; for (size_t i = 0; i < files_brief.num_files; i++) { - const uint64_t start = ApproximateSize(v, files_brief.files[i], key_start); - const uint64_t end = ApproximateSize(v, files_brief.files[i], key_end); + const uint64_t start = + ApproximateSize(v, files_brief.files[i], key_start, for_compaction); + const uint64_t end = + ApproximateSize(v, files_brief.files[i], key_end, for_compaction); assert(end >= start); size += end - start; } @@ -4903,7 +4906,7 @@ uint64_t VersionSet::ApproximateSizeLevel0(Version* v, } uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, - const Slice& key) { + const Slice& key, bool for_compaction) { // pre-condition assert(v); @@ -4923,7 +4926,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, *f.file_metadata, nullptr /* range_del_agg */, v->GetMutableCFOptions().prefix_extractor.get(), &table_reader_ptr); if (table_reader_ptr != nullptr) { - result = table_reader_ptr->ApproximateOffsetOf(key); + result = table_reader_ptr->ApproximateOffsetOf(key, for_compaction); } delete iter; } diff --git a/db/version_set.h b/db/version_set.h index dc9e75965..8a43b9823 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -982,7 +982,7 @@ class VersionSet { // in levels [start_level, end_level). If end_level == 0 it will search // through all non-empty levels uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end, - int start_level = 0, int end_level = -1); + int start_level, int end_level, bool for_compaction); // Return the size of the current manifest file uint64_t manifest_file_size() const { return manifest_file_size_; } @@ -1032,10 +1032,11 @@ class VersionSet { // ApproximateSize helper uint64_t ApproximateSizeLevel0(Version* v, const LevelFilesBrief& files_brief, - const Slice& start, const Slice& end); + const Slice& start, const Slice& end, + bool for_compaction); uint64_t ApproximateSize(Version* v, const FdWithKeyRange& f, - const Slice& key); + const Slice& key, bool for_compaction); // Save current contents to *log Status WriteSnapshot(log::Writer* log); diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index fb366b5d3..e5a32e463 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -187,7 +187,8 @@ BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( bool BlockBasedFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool /*no_io*/, - const Slice* const /*const_ikey_ptr*/) { + const Slice* const /*const_ikey_ptr*/, + BlockCacheLookupContext* /*context*/) { assert(block_offset != kNotValid); if (!whole_key_filtering_) { return true; @@ -198,7 +199,8 @@ bool BlockBasedFilterBlockReader::KeyMayMatch( bool BlockBasedFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool /*no_io*/, - const Slice* const /*const_ikey_ptr*/) { + const Slice* const /*const_ikey_ptr*/, + BlockCacheLookupContext* /*context*/) { assert(block_offset != kNotValid); return MayMatch(prefix, block_offset); } diff --git a/table/block_based/block_based_filter_block.h b/table/block_based/block_based_filter_block.h index 74a2285e1..cd86ff5c8 100644 --- a/table/block_based/block_based_filter_block.h +++ b/table/block_based/block_based_filter_block.h @@ -82,17 +82,18 @@ class BlockBasedFilterBlockReader : public FilterBlockReader { const BlockBasedTableOptions& table_opt, bool whole_key_filtering, BlockContents&& contents, Statistics* statistics); - virtual bool IsBlockBased() override { return true; } - - virtual bool KeyMayMatch( - const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - virtual bool PrefixMayMatch( - const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - virtual size_t ApproximateMemoryUsage() const override; + bool IsBlockBased() override { return true; } + + bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + bool PrefixMayMatch(const Slice& prefix, + const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + size_t ApproximateMemoryUsage() const override; // convert this object to a human readable form std::string ToString() const override; diff --git a/table/block_based/block_based_filter_block_test.cc b/table/block_based/block_based_filter_block_test.cc index e0ca57f1c..220888dd2 100644 --- a/table/block_based/block_based_filter_block_test.cc +++ b/table/block_based/block_based_filter_block_test.cc @@ -57,8 +57,12 @@ TEST_F(FilterBlockTest, EmptyBuilder) { ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); BlockBasedFilterBlockReader reader(nullptr, table_options_, true, std::move(block), nullptr); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, uint64_t{0})); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100000)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } TEST_F(FilterBlockTest, SingleChunk) { @@ -76,13 +80,27 @@ TEST_F(FilterBlockTest, SingleChunk) { BlockContents block(builder.Finish()); BlockBasedFilterBlockReader reader(nullptr, table_options_, true, std::move(block), nullptr); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100)); - ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr, 100)); - ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 100)); - ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr, 100)); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100)); - ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr, 100)); - ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr, 100)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } TEST_F(FilterBlockTest, MultiChunk) { @@ -110,28 +128,60 @@ TEST_F(FilterBlockTest, MultiChunk) { std::move(block), nullptr); // Check first filter - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, uint64_t{0})); - ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr, 2000)); - ASSERT_TRUE(!reader.KeyMayMatch("box", nullptr, uint64_t{0})); - ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, uint64_t{0})); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check second filter - ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 3100)); - ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 3100)); - ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 3100)); - ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, 3100)); + ASSERT_TRUE(reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check third filter (empty) - ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 4100)); - ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 4100)); - ASSERT_TRUE(!reader.KeyMayMatch("box", nullptr, 4100)); - ASSERT_TRUE(!reader.KeyMayMatch("hello", nullptr, 4100)); + ASSERT_TRUE(!reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check last filter - ASSERT_TRUE(reader.KeyMayMatch("box", nullptr, 9000)); - ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr, 9000)); - ASSERT_TRUE(!reader.KeyMayMatch("foo", nullptr, 9000)); - ASSERT_TRUE(!reader.KeyMayMatch("bar", nullptr, 9000)); + ASSERT_TRUE(reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } // Test for block based filter block @@ -154,8 +204,12 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); FilterBlockReader* reader = new BlockBasedFilterBlockReader( nullptr, table_options_, true, std::move(block), nullptr); - ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, uint64_t{0})); - ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100000)); + ASSERT_TRUE(reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); delete builder; delete reader; @@ -175,13 +229,27 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { BlockContents block(builder->Finish()); FilterBlockReader* reader = new BlockBasedFilterBlockReader( nullptr, table_options_, true, std::move(block), nullptr); - ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100)); - ASSERT_TRUE(reader->KeyMayMatch("bar", nullptr, 100)); - ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 100)); - ASSERT_TRUE(reader->KeyMayMatch("hello", nullptr, 100)); - ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100)); - ASSERT_TRUE(!reader->KeyMayMatch("missing", nullptr, 100)); - ASSERT_TRUE(!reader->KeyMayMatch("other", nullptr, 100)); + ASSERT_TRUE(reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); delete builder; delete reader; @@ -213,28 +281,60 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { nullptr, table_options_, true, std::move(block), nullptr); // Check first filter - ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, uint64_t{0})); - ASSERT_TRUE(reader->KeyMayMatch("bar", nullptr, 2000)); - ASSERT_TRUE(!reader->KeyMayMatch("box", nullptr, uint64_t{0})); - ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, uint64_t{0})); + ASSERT_TRUE(reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check second filter - ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 3100)); - ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 3100)); - ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 3100)); - ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, 3100)); + ASSERT_TRUE(reader->KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check third filter (empty) - ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 4100)); - ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 4100)); - ASSERT_TRUE(!reader->KeyMayMatch("box", nullptr, 4100)); - ASSERT_TRUE(!reader->KeyMayMatch("hello", nullptr, 4100)); + ASSERT_TRUE(!reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); // Check last filter - ASSERT_TRUE(reader->KeyMayMatch("box", nullptr, 9000)); - ASSERT_TRUE(reader->KeyMayMatch("hello", nullptr, 9000)); - ASSERT_TRUE(!reader->KeyMayMatch("foo", nullptr, 9000)); - ASSERT_TRUE(!reader->KeyMayMatch("bar", nullptr, 9000)); + ASSERT_TRUE(reader->KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader->KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); delete builder; delete reader; diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index a8e4e1d40..d1beafed6 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -178,6 +178,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, GetContext* get_context, + BlockCacheLookupContext* lookup_context, CachableEntry* index_block); const BlockBasedTable* table() const { return table_; } @@ -211,6 +212,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { Status GetOrReadIndexBlock(const ReadOptions& read_options, GetContext* get_context, + BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const; size_t ApproximateIndexBlockMemoryUsage() const { @@ -228,6 +230,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, GetContext* get_context, + BlockCacheLookupContext* lookup_context, CachableEntry* index_block) { PERF_TIMER_GUARD(read_index_block_nanos); @@ -241,13 +244,14 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->footer.index_handle(), UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, - get_context); + get_context, lookup_context); return s; } Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( const ReadOptions& read_options, GetContext* get_context, + BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const { assert(index_block != nullptr); @@ -256,8 +260,8 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( return Status::OK(); } - return ReadIndexBlock(table_, nullptr /* prefetch_buffer */, read_options, - get_context, index_block); + return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options, + get_context, lookup_context, index_block); } // Index that allows binary search lookup in a two-level index structure. @@ -269,7 +273,8 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { // unmodified. static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, - bool prefetch, bool pin, IndexReader** index_reader) { + bool prefetch, bool pin, IndexReader** index_reader, + BlockCacheLookupContext* lookup_context) { assert(table != nullptr); assert(table->get_rep()); assert(!pin || prefetch); @@ -277,8 +282,9 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { - const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, &index_block); + const Status s = + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } @@ -296,10 +302,11 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { // return a two-level iterator: first level is on the partition index InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, - IndexBlockIter* iter, GetContext* get_context) override { + IndexBlockIter* iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) override { CachableEntry index_block; - const Status s = - GetOrReadIndexBlock(read_options, get_context, &index_block); + const Status s = GetOrReadIndexBlock(read_options, get_context, + lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); @@ -352,6 +359,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { void CacheDependencies(bool pin) override { // Before read partitions, prefetch them to avoid lots of IOs + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch}; auto rep = table()->rep_; IndexBlockIter biter; BlockHandle handle; @@ -359,7 +367,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; Status s = GetOrReadIndexBlock(ReadOptions(), nullptr /* get_context */, - &index_block); + &lookup_context, &index_block); if (!s.ok()) { ROCKS_LOG_WARN(rep->ioptions.info_log, "Error retrieving top-level index block while trying to " @@ -408,7 +416,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { // filter blocks s = table()->MaybeReadBlockAndLoadToCache( prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(), - &block, BlockType::kIndex, nullptr /* get_context */); + &block, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context); assert(s.ok() || block.GetValue() == nullptr); if (s.ok() && block.GetValue() != nullptr) { @@ -451,7 +459,8 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { // unmodified. static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, - bool prefetch, bool pin, IndexReader** index_reader) { + bool prefetch, bool pin, IndexReader** index_reader, + BlockCacheLookupContext* lookup_context) { assert(table != nullptr); assert(table->get_rep()); assert(!pin || prefetch); @@ -459,8 +468,9 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { - const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, &index_block); + const Status s = + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } @@ -477,10 +487,11 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, - IndexBlockIter* iter, GetContext* get_context) override { + IndexBlockIter* iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) override { CachableEntry index_block; - const Status s = - GetOrReadIndexBlock(read_options, get_context, &index_block); + const Status s = GetOrReadIndexBlock(read_options, get_context, + lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); @@ -526,7 +537,8 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_index_iter, bool use_cache, - bool prefetch, bool pin, IndexReader** index_reader) { + bool prefetch, bool pin, IndexReader** index_reader, + BlockCacheLookupContext* lookup_context) { assert(table != nullptr); assert(index_reader != nullptr); assert(!pin || prefetch); @@ -536,8 +548,9 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { - const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, &index_block); + const Status s = + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } @@ -616,10 +629,11 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool disable_prefix_seek, - IndexBlockIter* iter, GetContext* get_context) override { + IndexBlockIter* iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) override { CachableEntry index_block; - const Status s = - GetOrReadIndexBlock(read_options, get_context, &index_block); + const Status s = GetOrReadIndexBlock(read_options, get_context, + lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); @@ -1055,6 +1069,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // Better not mutate rep_ after the creation. eg. internal_prefix_transform // raw pointer will be used to create HashIndexReader, whose reset may // access a dangling pointer. + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch}; Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options, internal_comparator, skip_filters, level, immortal_table); @@ -1095,13 +1110,13 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, return s; } s = new_table->ReadRangeDelBlock(prefetch_buffer.get(), meta_iter.get(), - internal_comparator); + internal_comparator, &lookup_context); if (!s.ok()) { return s; } s = new_table->PrefetchIndexAndFilterBlocks( prefetch_buffer.get(), meta_iter.get(), new_table.get(), prefetch_all, - table_options, level); + table_options, level, &lookup_context); if (s.ok()) { // Update tail prefetch stats @@ -1304,7 +1319,8 @@ Status BlockBasedTable::ReadPropertiesBlock( Status BlockBasedTable::ReadRangeDelBlock( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, - const InternalKeyComparator& internal_comparator) { + const InternalKeyComparator& internal_comparator, + BlockCacheLookupContext* lookup_context) { Status s; bool found_range_del_block; BlockHandle range_del_handle; @@ -1317,10 +1333,10 @@ Status BlockBasedTable::ReadRangeDelBlock( } else if (found_range_del_block && !range_del_handle.IsNull()) { ReadOptions read_options; std::unique_ptr iter(NewDataBlockIterator( - read_options, range_del_handle, nullptr /* input_iter */, - BlockType::kRangeDeletion, true /* key_includes_seq */, - true /* index_key_is_full */, nullptr /* get_context */, Status(), - prefetch_buffer)); + read_options, range_del_handle, + /*input_iter=*/nullptr, BlockType::kRangeDeletion, + /*key_includes_seq=*/true, /*index_key_is_full=*/true, + /*get_context=*/nullptr, lookup_context, Status(), prefetch_buffer)); assert(iter != nullptr); s = iter->status(); if (!s.ok()) { @@ -1370,7 +1386,8 @@ Status BlockBasedTable::ReadCompressionDictBlock( Status BlockBasedTable::PrefetchIndexAndFilterBlocks( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all, - const BlockBasedTableOptions& table_options, const int level) { + const BlockBasedTableOptions& table_options, const int level, + BlockCacheLookupContext* lookup_context) { Status s; // Find filter handle and filter type @@ -1440,7 +1457,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( IndexReader* index_reader = nullptr; if (s.ok()) { s = new_table->CreateIndexReader(prefetch_buffer, meta_iter, use_cache, - prefetch_index, pin_index, &index_reader); + prefetch_index, pin_index, &index_reader, + lookup_context); if (s.ok()) { assert(index_reader != nullptr); rep_->index_reader.reset(index_reader); @@ -1467,7 +1485,9 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( if (s.ok() && prefetch_filter) { // Hack: Call GetFilter() to implicitly add filter to the block_cache auto filter_entry = - new_table->GetFilter(rep_->table_prefix_extractor.get()); + new_table->GetFilter(rep_->table_prefix_extractor.get(), + /*prefetch_buffer=*/nullptr, /*no_io=*/false, + /*get_context=*/nullptr, lookup_context); if (filter_entry.GetValue() != nullptr && prefetch_all) { filter_entry.GetValue()->CacheDependencies( pin_all, rep_->table_prefix_extractor.get()); @@ -1653,8 +1673,7 @@ Status BlockBasedTable::GetDataBlockFromCache( size_t charge = block_holder->ApproximateMemoryUsage(); Cache::Handle* cache_handle = nullptr; s = block_cache->Insert(block_cache_key, block_holder.get(), charge, - &DeleteCachedEntry, - &cache_handle); + &DeleteCachedEntry, &cache_handle); #ifndef NDEBUG block_cache->TEST_mark_as_data_block(block_cache_key, charge); #endif // NDEBUG @@ -1758,8 +1777,7 @@ Status BlockBasedTable::PutDataBlockToCache( size_t charge = block_holder->ApproximateMemoryUsage(); Cache::Handle* cache_handle = nullptr; s = block_cache->Insert(block_cache_key, block_holder.get(), charge, - &DeleteCachedEntry, - &cache_handle, priority); + &DeleteCachedEntry, &cache_handle, priority); #ifndef NDEBUG block_cache->TEST_mark_as_data_block(block_cache_key, charge); #endif // NDEBUG @@ -1849,25 +1867,28 @@ FilterBlockReader* BlockBasedTable::ReadFilter( CachableEntry BlockBasedTable::GetFilter( const SliceTransform* prefix_extractor, FilePrefetchBuffer* prefetch_buffer, - bool no_io, GetContext* get_context) const { + bool no_io, GetContext* get_context, + BlockCacheLookupContext* lookup_context) const { const BlockHandle& filter_blk_handle = rep_->filter_handle; const bool is_a_filter_partition = true; return GetFilter(prefetch_buffer, filter_blk_handle, !is_a_filter_partition, - no_io, get_context, prefix_extractor); + no_io, get_context, lookup_context, prefix_extractor); } CachableEntry BlockBasedTable::GetFilter( FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle, const bool is_a_filter_partition, bool no_io, GetContext* get_context, + BlockCacheLookupContext* /*lookup_context*/, const SliceTransform* prefix_extractor) const { + // TODO(haoyu): Trace filter block access here. // If cache_index_and_filter_blocks is false, filter should be pre-populated. // We will return rep_->filter anyway. rep_->filter can be nullptr if filter // read fails at Open() time. We don't want to reload again since it will // most probably fail again. if (!is_a_filter_partition && !rep_->table_options.cache_index_and_filter_blocks) { - return {rep_->filter.get(), nullptr /* cache */, - nullptr /* cache_handle */, false /* own_value */}; + return {rep_->filter.get(), /*cache=*/nullptr, /*cache_handle=*/nullptr, + /*own_value=*/false}; } Cache* block_cache = rep_->table_options.block_cache.get(); @@ -1877,8 +1898,8 @@ CachableEntry BlockBasedTable::GetFilter( } if (!is_a_filter_partition && rep_->filter_entry.IsCached()) { - return {rep_->filter_entry.GetValue(), nullptr /* cache */, - nullptr /* cache_handle */, false /* own_value */}; + return {rep_->filter_entry.GetValue(), /*cache=*/nullptr, + /*cache_handle=*/nullptr, /*own_value=*/false}; } PERF_TIMER_GUARD(read_filter_block_nanos); @@ -1920,12 +1941,13 @@ CachableEntry BlockBasedTable::GetFilter( } return {filter, cache_handle ? block_cache : nullptr, cache_handle, - false /* own_value */}; + /*own_value=*/false}; } CachableEntry BlockBasedTable::GetUncompressionDict( - FilePrefetchBuffer* prefetch_buffer, bool no_io, - GetContext* get_context) const { + FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, + BlockCacheLookupContext* /*lookup_context*/) const { + // TODO(haoyu): Trace the access on the uncompression dictionary here. if (!rep_->table_options.cache_index_and_filter_blocks) { // block cache is either disabled or not used for meta-blocks. In either // case, BlockBasedTableReader is the owner of the uncompression dictionary. @@ -1987,14 +2009,16 @@ CachableEntry BlockBasedTable::GetUncompressionDict( // differs from the one in mutable_cf_options and index type is HashBasedIndex InternalIteratorBase* BlockBasedTable::NewIndexIterator( const ReadOptions& read_options, bool disable_prefix_seek, - IndexBlockIter* input_iter, GetContext* get_context) const { + IndexBlockIter* input_iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) const { assert(rep_ != nullptr); assert(rep_->index_reader != nullptr); // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. return rep_->index_reader->NewIterator(read_options, disable_prefix_seek, - input_iter, get_context); + input_iter, get_context, + lookup_context); } // Convert an index iterator value (i.e., an encoded BlockHandle) @@ -2005,7 +2029,7 @@ template TBlockIter* BlockBasedTable::NewDataBlockIterator( const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter, BlockType block_type, bool key_includes_seq, bool index_key_is_full, - GetContext* get_context, Status s, + GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s, FilePrefetchBuffer* prefetch_buffer) const { PERF_TIMER_GUARD(new_table_block_iter_nanos); @@ -2017,7 +2041,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( const bool no_io = (ro.read_tier == kBlockCacheTier); auto uncompression_dict_storage = - GetUncompressionDict(prefetch_buffer, no_io, get_context); + GetUncompressionDict(prefetch_buffer, no_io, get_context, lookup_context); const UncompressionDict& uncompression_dict = uncompression_dict_storage.GetValue() == nullptr ? UncompressionDict::GetEmptyDict() @@ -2025,7 +2049,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( CachableEntry block; s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block, - block_type, get_context); + block_type, get_context, lookup_context); if (!s.ok()) { assert(block.IsEmpty()); @@ -2093,7 +2117,9 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, - GetContext* get_context) const { + GetContext* get_context, + BlockCacheLookupContext* /*lookup_context*/) const { + // TODO(haoyu): Trace data/index/range deletion block access here. assert(block_entry != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); Cache* block_cache = rep_->table_options.block_cache.get(); @@ -2169,7 +2195,7 @@ Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, - GetContext* get_context) const { + GetContext* get_context, BlockCacheLookupContext* lookup_context) const { assert(block_entry); assert(block_entry->IsEmpty()); @@ -2180,7 +2206,7 @@ Status BlockBasedTable::RetrieveBlock( block_type != BlockType::kIndex)) { s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, uncompression_dict, block_entry, - block_type, get_context); + block_type, get_context, lookup_context); if (!s.ok()) { return s; @@ -2271,7 +2297,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( bool BlockBasedTable::PrefixMayMatch( const Slice& internal_key, const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, - const bool need_upper_bound_check) const { + const bool need_upper_bound_check, + BlockCacheLookupContext* lookup_context) const { if (!rep_->filter_policy) { return true; } @@ -2295,7 +2322,9 @@ bool BlockBasedTable::PrefixMayMatch( Status s; // First, try check with full filter - auto filter_entry = GetFilter(prefix_extractor); + auto filter_entry = + GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, /*no_io=*/false, + /*get_context=*/nullptr, lookup_context); FilterBlockReader* filter = filter_entry.GetValue(); bool filter_checked = true; if (filter != nullptr) { @@ -2304,7 +2333,7 @@ bool BlockBasedTable::PrefixMayMatch( may_match = filter->RangeMayExist( read_options.iterate_upper_bound, user_key, prefix_extractor, rep_->internal_comparator.user_comparator(), const_ikey_ptr, - &filter_checked, need_upper_bound_check); + &filter_checked, need_upper_bound_check, lookup_context); } else { // if prefix_extractor changed for block based filter, skip filter if (need_upper_bound_check) { @@ -2323,9 +2352,10 @@ bool BlockBasedTable::PrefixMayMatch( // Then, try find it within each block // we already know prefix_extractor and prefix_extractor_name must match // because `CheckPrefixMayMatch` first checks `check_filter_ == true` - std::unique_ptr> iiter( - NewIndexIterator(no_io_read_options, - /* need_upper_bound_check */ false)); + std::unique_ptr> iiter(NewIndexIterator( + no_io_read_options, + /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, + /*need_upper_bound_check=*/nullptr, lookup_context)); iiter->Seek(internal_prefix); if (!iiter->Valid()) { @@ -2357,8 +2387,9 @@ bool BlockBasedTable::PrefixMayMatch( // possibly contain the key. Thus, the corresponding data block // is the only on could potentially contain the prefix. BlockHandle handle = iiter->value(); - may_match = - filter->PrefixMayMatch(prefix, prefix_extractor, handle.offset()); + may_match = filter->PrefixMayMatch( + prefix, prefix_extractor, handle.offset(), /*no_io=*/false, + /*const_key_ptr=*/nullptr, lookup_context); } } } @@ -2588,7 +2619,7 @@ void BlockBasedTableIterator::InitDataBlock() { table_->NewDataBlockIterator( read_options_, data_block_handle, &block_iter_, block_type_, key_includes_seq_, index_key_is_full_, - /* get_context */ nullptr, s, prefetch_buffer_.get()); + /*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get()); block_iter_points_to_real_block_ = true; if (read_options_.iterate_upper_bound != nullptr) { data_block_within_upper_bound_ = @@ -2682,6 +2713,9 @@ void BlockBasedTableIterator::CheckOutOfBound() { InternalIterator* BlockBasedTable::NewIterator( const ReadOptions& read_options, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, bool for_compaction) { + BlockCacheLookupContext lookup_context{ + for_compaction ? BlockCacheLookupCaller::kCompaction + : BlockCacheLookupCaller::kUserIterator}; bool need_upper_bound_check = PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor); if (arena == nullptr) { @@ -2690,7 +2724,8 @@ InternalIterator* BlockBasedTable::NewIterator( NewIndexIterator( read_options, need_upper_bound_check && - rep_->index_type == BlockBasedTableOptions::kHashSearch), + rep_->index_type == BlockBasedTableOptions::kHashSearch, + /*input_iter=*/nullptr, /*get_context=*/nullptr, &lookup_context), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, need_upper_bound_check, prefix_extractor, BlockType::kData, @@ -2700,7 +2735,9 @@ InternalIterator* BlockBasedTable::NewIterator( arena->AllocateAligned(sizeof(BlockBasedTableIterator)); return new (mem) BlockBasedTableIterator( this, read_options, rep_->internal_comparator, - NewIndexIterator(read_options, need_upper_bound_check), + NewIndexIterator(read_options, need_upper_bound_check, + /*input_iter=*/nullptr, /*get_context=*/nullptr, + &lookup_context), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, need_upper_bound_check, prefix_extractor, BlockType::kData, @@ -2724,7 +2761,8 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( bool BlockBasedTable::FullFilterKeyMayMatch( const ReadOptions& read_options, FilterBlockReader* filter, const Slice& internal_key, const bool no_io, - const SliceTransform* prefix_extractor) const { + const SliceTransform* prefix_extractor, + BlockCacheLookupContext* lookup_context) const { if (filter == nullptr || filter->IsBlockBased()) { return true; } @@ -2735,15 +2773,16 @@ bool BlockBasedTable::FullFilterKeyMayMatch( size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size(); Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz); - may_match = filter->KeyMayMatch(user_key_without_ts, prefix_extractor, - kNotValid, no_io, const_ikey_ptr); + may_match = + filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, + no_io, const_ikey_ptr, lookup_context); } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0 && prefix_extractor->InDomain(user_key) && !filter->PrefixMayMatch(prefix_extractor->Transform(user_key), prefix_extractor, kNotValid, false, - const_ikey_ptr)) { + const_ikey_ptr, lookup_context)) { may_match = false; } if (may_match) { @@ -2756,12 +2795,14 @@ bool BlockBasedTable::FullFilterKeyMayMatch( void BlockBasedTable::FullFilterKeysMayMatch( const ReadOptions& read_options, FilterBlockReader* filter, MultiGetRange* range, const bool no_io, - const SliceTransform* prefix_extractor) const { + const SliceTransform* prefix_extractor, + BlockCacheLookupContext* lookup_context) const { if (filter == nullptr || filter->IsBlockBased()) { return; } if (filter->whole_key_filtering()) { - filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io); + filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io, + lookup_context); } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0) { @@ -2772,7 +2813,8 @@ void BlockBasedTable::FullFilterKeysMayMatch( range->SkipKey(iter); } } - filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false); + filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, + lookup_context); } } @@ -2786,18 +2828,19 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, CachableEntry filter_entry; bool may_match; FilterBlockReader* filter = nullptr; + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kUserGet}; { if (!skip_filters) { - filter_entry = - GetFilter(prefix_extractor, /*prefetch_buffer*/ nullptr, - read_options.read_tier == kBlockCacheTier, get_context); + filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, + read_options.read_tier == kBlockCacheTier, + get_context, &lookup_context); } filter = filter_entry.GetValue(); // First check the full filter // If full filter not useful, Then go into each block may_match = FullFilterKeyMayMatch(read_options, filter, key, no_io, - prefix_extractor); + prefix_extractor, &lookup_context); } if (!may_match) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); @@ -2811,8 +2854,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, need_upper_bound_check = PrefixExtractorChanged( rep_->table_properties.get(), prefix_extractor); } - auto iiter = NewIndexIterator(read_options, need_upper_bound_check, - &iiter_on_stack, get_context); + auto iiter = + NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, + get_context, &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr.reset(iiter); @@ -2828,7 +2872,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, bool not_exist_in_filter = filter != nullptr && filter->IsBlockBased() == true && !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz), - prefix_extractor, handle.offset(), no_io); + prefix_extractor, handle.offset(), no_io, + /*const_ikey_ptr=*/nullptr, &lookup_context); if (not_exist_in_filter) { // Not found @@ -2841,8 +2886,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, DataBlockIter biter; NewDataBlockIterator( read_options, iiter->value(), &biter, BlockType::kData, - true /* key_includes_seq */, true /* index_key_is_full */, - get_context); + /*key_includes_seq=*/true, + /*index_key_is_full=*/true, get_context, &lookup_context, + /*s=*/Status(), /*prefetch_buffer*/ nullptr); if (read_options.read_tier == kBlockCacheTier && biter.status().IsIncomplete()) { @@ -2907,6 +2953,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, const MultiGetRange* mget_range, const SliceTransform* prefix_extractor, bool skip_filters) { + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kUserMGet}; const bool no_io = read_options.read_tier == kBlockCacheTier; CachableEntry filter_entry; FilterBlockReader* filter = nullptr; @@ -2915,16 +2962,16 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, { if (!skip_filters) { // TODO: Figure out where the stats should go - filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer*/ nullptr, + filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, read_options.read_tier == kBlockCacheTier, - nullptr /*get_context*/); + /*get_context=*/nullptr, &lookup_context); } filter = filter_entry.GetValue(); // First check the full filter // If full filter not useful, Then go into each block FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io, - prefix_extractor); + prefix_extractor, &lookup_context); } if (skip_filters || !sst_file_range.empty()) { IndexBlockIter iiter_on_stack; @@ -2937,7 +2984,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, } auto iiter = NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, - sst_file_range.begin()->get_context); + sst_file_range.begin()->get_context, &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr.reset(iiter); @@ -2958,11 +3005,12 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, offset = iiter->value().offset(); biter.Invalidate(Status::OK()); NewDataBlockIterator( - read_options, iiter->value(), &biter, BlockType::kData, false, - true /* key_includes_seq */, get_context); + read_options, iiter->value(), &biter, BlockType::kData, + /*key_includes_seq=*/false, + /*index_key_is_full=*/true, get_context, &lookup_context, + Status(), nullptr); reusing_block = false; } - if (read_options.read_tier == kBlockCacheTier && biter.status().IsIncomplete()) { // couldn't get block from block_cache @@ -3040,9 +3088,11 @@ Status BlockBasedTable::Prefetch(const Slice* const begin, if (begin && end && comparator.Compare(*begin, *end) > 0) { return Status::InvalidArgument(*begin, *end); } - + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch}; IndexBlockIter iiter_on_stack; - auto iiter = NewIndexIterator(ReadOptions(), false, &iiter_on_stack); + auto iiter = NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, + &iiter_on_stack, /*get_context=*/nullptr, + &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = @@ -3077,7 +3127,12 @@ Status BlockBasedTable::Prefetch(const Slice* const begin, // Load the block specified by the block_handle into the block cache DataBlockIter biter; - NewDataBlockIterator(ReadOptions(), block_handle, &biter); + + NewDataBlockIterator( + ReadOptions(), block_handle, &biter, /*type=*/BlockType::kData, + /*key_includes_seq=*/true, /*index_key_is_full=*/true, + /*get_context=*/nullptr, &lookup_context, Status(), + /*prefetch_buffer=*/nullptr); if (!biter.status().ok()) { // there was an unexpected error while pre-fetching @@ -3089,6 +3144,8 @@ Status BlockBasedTable::Prefetch(const Slice* const begin, } Status BlockBasedTable::VerifyChecksum() { + // TODO(haoyu): This function is called by external sst ingestion and the + // verify checksum public API. We don't log its block cache accesses for now. Status s; // Check Meta blocks std::unique_ptr meta; @@ -3104,8 +3161,9 @@ Status BlockBasedTable::VerifyChecksum() { } // Check Data blocks IndexBlockIter iiter_on_stack; - InternalIteratorBase* iiter = - NewIndexIterator(ReadOptions(), false, &iiter_on_stack); + InternalIteratorBase* iiter = NewIndexIterator( + ReadOptions(), /*need_upper_bound_check=*/false, &iiter_on_stack, + /*get_context=*/nullptr, /*lookup_contex=*/nullptr); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = @@ -3199,8 +3257,9 @@ bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const { bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, const Slice& key) { - std::unique_ptr> iiter( - NewIndexIterator(options)); + std::unique_ptr> iiter(NewIndexIterator( + options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, + /*get_context=*/nullptr, /*lookup_contex=*/nullptr)); iiter->Seek(key); assert(iiter->Valid()); @@ -3234,7 +3293,8 @@ BlockBasedTableOptions::IndexType BlockBasedTable::UpdateIndexType() { Status BlockBasedTable::CreateIndexReader( FilePrefetchBuffer* prefetch_buffer, InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, - bool pin, IndexReader** index_reader) { + bool pin, IndexReader** index_reader, + BlockCacheLookupContext* lookup_context) { auto index_type_on_file = rep_->index_type; // kHashSearch requires non-empty prefix_extractor but bypass checking @@ -3246,11 +3306,13 @@ Status BlockBasedTable::CreateIndexReader( switch (index_type_on_file) { case BlockBasedTableOptions::kTwoLevelIndexSearch: { return PartitionIndexReader::Create(this, prefetch_buffer, use_cache, - prefetch, pin, index_reader); + prefetch, pin, index_reader, + lookup_context); } case BlockBasedTableOptions::kBinarySearch: { return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache, - prefetch, pin, index_reader); + prefetch, pin, index_reader, + lookup_context); } case BlockBasedTableOptions::kHashSearch: { std::unique_ptr meta_guard; @@ -3264,14 +3326,16 @@ Status BlockBasedTable::CreateIndexReader( ROCKS_LOG_WARN(rep_->ioptions.info_log, "Unable to read the metaindex block." " Fall back to binary search index."); - return BinarySearchIndexReader::Create( - this, prefetch_buffer, use_cache, prefetch, pin, index_reader); + return BinarySearchIndexReader::Create(this, prefetch_buffer, + use_cache, prefetch, pin, + index_reader, lookup_context); } meta_index_iter = meta_iter_guard.get(); } return HashIndexReader::Create(this, prefetch_buffer, meta_index_iter, - use_cache, prefetch, pin, index_reader); + use_cache, prefetch, pin, index_reader, + lookup_context); } default: { std::string error_message = @@ -3281,9 +3345,15 @@ Status BlockBasedTable::CreateIndexReader( } } -uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { +uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key, + bool for_compaction) { + BlockCacheLookupContext context( + for_compaction ? BlockCacheLookupCaller::kCompaction + : BlockCacheLookupCaller::kUserApproximateSize); std::unique_ptr> index_iter( - NewIndexIterator(ReadOptions())); + NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, + /*input_iter=*/nullptr, /*get_context=*/nullptr, + /*lookup_contex=*/&context)); index_iter->Seek(key); uint64_t result; @@ -3319,7 +3389,9 @@ bool BlockBasedTable::TEST_IndexBlockInCache() const { Status BlockBasedTable::GetKVPairsFromDataBlocks( std::vector* kv_pair_blocks) { std::unique_ptr> blockhandles_iter( - NewIndexIterator(ReadOptions())); + NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, + /*input_iter=*/nullptr, /*get_context=*/nullptr, + /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { @@ -3337,7 +3409,11 @@ Status BlockBasedTable::GetKVPairsFromDataBlocks( std::unique_ptr datablock_iter; datablock_iter.reset(NewDataBlockIterator( - ReadOptions(), blockhandles_iter->value())); + ReadOptions(), blockhandles_iter->value(), /*input_iter=*/nullptr, + /*type=*/BlockType::kData, + /*key_includes_seq=*/true, /*index_key_is_full=*/true, + /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(), + /*prefetch_buffer=*/nullptr)); s = datablock_iter->status(); if (!s.ok()) { @@ -3545,7 +3621,9 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { "Index Details:\n" "--------------------------------------\n"); std::unique_ptr> blockhandles_iter( - NewIndexIterator(ReadOptions())); + NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, + /*input_iter=*/nullptr, /*get_context=*/nullptr, + /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); @@ -3594,7 +3672,9 @@ Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) { std::unique_ptr> blockhandles_iter( - NewIndexIterator(ReadOptions())); + NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, + /*input_iter=*/nullptr, /*get_context=*/nullptr, + /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); @@ -3628,7 +3708,11 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) { std::unique_ptr datablock_iter; datablock_iter.reset(NewDataBlockIterator( - ReadOptions(), blockhandles_iter->value())); + ReadOptions(), blockhandles_iter->value(), /*input_iter=*/nullptr, + /*type=*/BlockType::kData, + /*key_includes_seq=*/true, /*index_key_is_full=*/true, + /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(), + /*prefetch_buffer=*/nullptr)); s = datablock_iter->status(); if (!s.ok()) { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index d8319a3e7..a92289f9b 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -113,17 +113,22 @@ class BlockBasedTable : public TableReader { bool PrefixMayMatch(const Slice& internal_key, const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, - const bool need_upper_bound_check) const; + const bool need_upper_bound_check, + BlockCacheLookupContext* lookup_context) const; // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). // @param skip_filters Disables loading/accessing the filter block - InternalIterator* NewIterator(const ReadOptions&, - const SliceTransform* prefix_extractor, - Arena* arena = nullptr, - bool skip_filters = false, - bool for_compaction = false) override; + InternalIterator* NewIterator( + const ReadOptions&, const SliceTransform* prefix_extractor, + Arena* arena = nullptr, bool skip_filters = false, + // TODO(haoyu) 1. External SST ingestion sets for_compaction as false. 2. + // Compaction also sets it to false when paranoid_file_checks is true, + // i.e., it will populate the block cache with blocks in the new SST + // files. We treat those as a user is calling iterator for now. We should + // differentiate the callers. + bool for_compaction = false) override; FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& read_options) override; @@ -149,7 +154,7 @@ class BlockBasedTable : public TableReader { // bytes, and so includes effects like compression of the underlying data. // E.g., the approximate offset of the last key in the table will // be close to the file length. - uint64_t ApproximateOffsetOf(const Slice& key) override; + uint64_t ApproximateOffsetOf(const Slice& key, bool for_compaction) override; bool TEST_BlockInCache(const BlockHandle& handle) const; @@ -193,7 +198,8 @@ class BlockBasedTable : public TableReader { // returned object. virtual InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool disable_prefix_seek, - IndexBlockIter* iter, GetContext* get_context) = 0; + IndexBlockIter* iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) = 0; // Report an approximation of how much memory has been used other than // memory that was allocated in block cache. @@ -222,10 +228,10 @@ class BlockBasedTable : public TableReader { template TBlockIter* NewDataBlockIterator( const ReadOptions& ro, const BlockHandle& block_handle, - TBlockIter* input_iter = nullptr, BlockType block_type = BlockType::kData, - bool key_includes_seq = true, bool index_key_is_full = true, - GetContext* get_context = nullptr, Status s = Status(), - FilePrefetchBuffer* prefetch_buffer = nullptr) const; + TBlockIter* input_iter, BlockType block_type, bool key_includes_seq, + bool index_key_is_full, GetContext* get_context, + BlockCacheLookupContext* lookup_context, Status s, + FilePrefetchBuffer* prefetch_buffer) const; class PartitionedIndexIteratorState; @@ -262,7 +268,7 @@ class BlockBasedTable : public TableReader { FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, - GetContext* get_context = nullptr) const; + GetContext* get_context, BlockCacheLookupContext* lookup_context) const; // Similar to the above, with one crucial difference: it will retrieve the // block from the file even if there are no caches configured (assuming the @@ -271,23 +277,25 @@ class BlockBasedTable : public TableReader { const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, - GetContext* get_context) const; + GetContext* get_context, + BlockCacheLookupContext* lookup_context) const; // For the following two functions: // if `no_io == true`, we will not try to read filter/index from sst file // were they not present in cache yet. CachableEntry GetFilter( - const SliceTransform* prefix_extractor = nullptr, - FilePrefetchBuffer* prefetch_buffer = nullptr, bool no_io = false, - GetContext* get_context = nullptr) const; + const SliceTransform* prefix_extractor, + FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, + BlockCacheLookupContext* lookup_context) const; virtual CachableEntry GetFilter( FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle, const bool is_a_filter_partition, bool no_io, GetContext* get_context, - const SliceTransform* prefix_extractor = nullptr) const; + BlockCacheLookupContext* lookup_context, + const SliceTransform* prefix_extractor) const; CachableEntry GetUncompressionDict( - FilePrefetchBuffer* prefetch_buffer, bool no_io, - GetContext* get_context) const; + FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, + BlockCacheLookupContext* lookup_context) const; // Get the iterator from the index reader. // If input_iter is not set, return new Iterator @@ -300,9 +308,9 @@ class BlockBasedTable : public TableReader { // 3. We disallowed any io to be performed, that is, read_options == // kBlockCacheTier InternalIteratorBase* NewIndexIterator( - const ReadOptions& read_options, bool need_upper_bound_check = false, - IndexBlockIter* input_iter = nullptr, - GetContext* get_context = nullptr) const; + const ReadOptions& read_options, bool need_upper_bound_check, + IndexBlockIter* input_iter, GetContext* get_context, + BlockCacheLookupContext* lookup_context) const; // Read block cache from block caches (if set): block_cache and // block_cache_compressed. @@ -352,17 +360,20 @@ class BlockBasedTable : public TableReader { Status CreateIndexReader(FilePrefetchBuffer* prefetch_buffer, InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, bool pin, - IndexReader** index_reader); + IndexReader** index_reader, + BlockCacheLookupContext* lookup_context); - bool FullFilterKeyMayMatch( - const ReadOptions& read_options, FilterBlockReader* filter, - const Slice& user_key, const bool no_io, - const SliceTransform* prefix_extractor = nullptr) const; + bool FullFilterKeyMayMatch(const ReadOptions& read_options, + FilterBlockReader* filter, const Slice& user_key, + const bool no_io, + const SliceTransform* prefix_extractor, + BlockCacheLookupContext* lookup_context) const; - void FullFilterKeysMayMatch( - const ReadOptions& read_options, FilterBlockReader* filter, - MultiGetRange* range, const bool no_io, - const SliceTransform* prefix_extractor = nullptr) const; + void FullFilterKeysMayMatch(const ReadOptions& read_options, + FilterBlockReader* filter, MultiGetRange* range, + const bool no_io, + const SliceTransform* prefix_extractor, + BlockCacheLookupContext* lookup_context) const; static Status PrefetchTail( RandomAccessFileReader* file, uint64_t file_size, @@ -380,14 +391,16 @@ class BlockBasedTable : public TableReader { const SequenceNumber largest_seqno); Status ReadRangeDelBlock(FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, - const InternalKeyComparator& internal_comparator); + const InternalKeyComparator& internal_comparator, + BlockCacheLookupContext* lookup_context); Status ReadCompressionDictBlock( FilePrefetchBuffer* prefetch_buffer, std::unique_ptr* compression_dict_block) const; Status PrefetchIndexAndFilterBlocks( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all, - const BlockBasedTableOptions& table_options, const int level); + const BlockBasedTableOptions& table_options, const int level, + BlockCacheLookupContext* lookup_context); Status VerifyChecksumInMetaBlocks(InternalIteratorBase* index_iter); Status VerifyChecksumInBlocks(InternalIteratorBase* index_iter); @@ -583,7 +596,10 @@ class BlockBasedTableIterator : public InternalIteratorBase { block_type_(block_type), key_includes_seq_(key_includes_seq), index_key_is_full_(index_key_is_full), - for_compaction_(for_compaction) {} + for_compaction_(for_compaction), + lookup_context_(for_compaction + ? BlockCacheLookupCaller::kCompaction + : BlockCacheLookupCaller::kUserIterator) {} ~BlockBasedTableIterator() { delete index_iter_; } @@ -644,7 +660,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { bool CheckPrefixMayMatch(const Slice& ikey) { if (check_filter_ && !table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_, - need_upper_bound_check_)) { + need_upper_bound_check_, &lookup_context_)) { // TODO remember the iterator is invalidated because of prefix // match. This can avoid the upper level file iterator to falsely // believe the position is the end of the SST file and move to @@ -702,6 +718,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { // If this iterator is created for compaction bool for_compaction_; BlockHandle prev_index_value_; + BlockCacheLookupContext lookup_context_; // All the below fields control iterator readahead static const size_t kInitAutoReadaheadSize = 8 * 1024; diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index 378cdacff..d54de5ae1 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -30,6 +30,7 @@ #include "rocksdb/table.h" #include "table/format.h" #include "table/multiget_context.h" +#include "trace_replay/block_cache_tracer.h" #include "util/hash.h" namespace rocksdb { @@ -99,18 +100,19 @@ class FilterBlockReader { */ virtual bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) = 0; + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) = 0; virtual void KeysMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false) { + uint64_t block_offset, const bool no_io, + BlockCacheLookupContext* context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey = iter->ukey; const Slice ikey = iter->ikey; - if (!KeyMayMatch(ukey, prefix_extractor, block_offset, no_io, &ikey)) { + if (!KeyMayMatch(ukey, prefix_extractor, block_offset, no_io, &ikey, + context)) { range->SkipKey(iter); } } @@ -121,19 +123,19 @@ class FilterBlockReader { */ virtual bool PrefixMayMatch(const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) = 0; + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) = 0; virtual void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false) { + uint64_t block_offset, const bool no_io, + BlockCacheLookupContext* context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey = iter->ukey; const Slice ikey = iter->ikey; if (!KeyMayMatch(prefix_extractor->Transform(ukey), prefix_extractor, - block_offset, no_io, &ikey)) { + block_offset, no_io, &ikey, context)) { range->SkipKey(iter); } } @@ -156,13 +158,13 @@ class FilterBlockReader { virtual bool RangeMayExist( const Slice* /*iterate_upper_bound*/, const Slice& user_key, - const SliceTransform* prefix_extractor, - const Comparator* /*comparator*/, const Slice* const const_ikey_ptr, - bool* filter_checked, bool /*need_upper_bound_check*/) { + const SliceTransform* prefix_extractor, const Comparator* /*comparator*/, + const Slice* const const_ikey_ptr, bool* filter_checked, + bool /*need_upper_bound_check*/, BlockCacheLookupContext* context) { *filter_checked = true; Slice prefix = prefix_extractor->Transform(user_key); return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false, - const_ikey_ptr); + const_ikey_ptr, context); } protected: diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 56dc74c67..6d2b9d70a 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -124,7 +124,8 @@ FullFilterBlockReader::FullFilterBlockReader( bool FullFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* /*prefix_extractor*/, uint64_t block_offset, const bool /*no_io*/, - const Slice* const /*const_ikey_ptr*/) { + const Slice* const /*const_ikey_ptr*/, + BlockCacheLookupContext* /*context*/) { #ifdef NDEBUG (void)block_offset; #endif @@ -138,7 +139,8 @@ bool FullFilterBlockReader::KeyMayMatch( bool FullFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool /*no_io*/, - const Slice* const /*const_ikey_ptr*/) { + const Slice* const /*const_ikey_ptr*/, + BlockCacheLookupContext* /*context*/) { #ifdef NDEBUG (void)block_offset; #endif @@ -161,7 +163,8 @@ bool FullFilterBlockReader::MayMatch(const Slice& entry) { void FullFilterBlockReader::KeysMayMatch( MultiGetRange* range, const SliceTransform* /*prefix_extractor*/, - uint64_t block_offset, const bool /*no_io*/) { + uint64_t block_offset, const bool /*no_io*/, + BlockCacheLookupContext* /*context*/) { #ifdef NDEBUG (void)range; (void)block_offset; @@ -177,7 +180,8 @@ void FullFilterBlockReader::KeysMayMatch( void FullFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool /*no_io*/) { + uint64_t block_offset, const bool /*no_io*/, + BlockCacheLookupContext* /*context*/) { #ifdef NDEBUG (void)range; (void)block_offset; @@ -224,10 +228,11 @@ size_t FullFilterBlockReader::ApproximateMemoryUsage() const { return usage; } -bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound, - const Slice& user_key, const SliceTransform* prefix_extractor, - const Comparator* comparator, const Slice* const const_ikey_ptr, - bool* filter_checked, bool need_upper_bound_check) { +bool FullFilterBlockReader::RangeMayExist( + const Slice* iterate_upper_bound, const Slice& user_key, + const SliceTransform* prefix_extractor, const Comparator* comparator, + const Slice* const const_ikey_ptr, bool* filter_checked, + bool need_upper_bound_check, BlockCacheLookupContext* context) { if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) { *filter_checked = false; return true; @@ -240,7 +245,7 @@ bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound, } else { *filter_checked = true; return PrefixMayMatch(prefix, prefix_extractor, kNotValid, false, - const_ikey_ptr); + const_ikey_ptr, context); } } diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 61df028c9..99e5299b3 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -95,35 +95,38 @@ class FullFilterBlockReader : public FilterBlockReader { // bits_reader is created in filter_policy, it should be passed in here // directly. and be deleted here - ~FullFilterBlockReader() {} + ~FullFilterBlockReader() override {} + + bool IsBlockBased() override { return false; } + + bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + + bool PrefixMayMatch(const Slice& prefix, + const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + + void KeysMayMatch(MultiGetRange* range, + const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + BlockCacheLookupContext* context) override; + + void PrefixesMayMatch(MultiGetRange* range, + const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + BlockCacheLookupContext* context) override; + size_t ApproximateMemoryUsage() const override; + bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key, + const SliceTransform* prefix_extractor, + const Comparator* comparator, + const Slice* const const_ikey_ptr, bool* filter_checked, + bool need_upper_bound_check, + BlockCacheLookupContext* context) override; - virtual bool IsBlockBased() override { return false; } - - virtual bool KeyMayMatch( - const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - - virtual bool PrefixMayMatch( - const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - - virtual void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false) override; - - virtual void PrefixesMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, - const bool no_io = false) override; - virtual size_t ApproximateMemoryUsage() const override; - virtual bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key, - const SliceTransform* prefix_extractor, - const Comparator* comparator, - const Slice* const const_ikey_ptr, bool* filter_checked, - bool need_upper_bound_check) override; private: const SliceTransform* prefix_extractor_; Slice contents_; diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 82c43b34e..57ff158c5 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -112,7 +112,9 @@ TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) { nullptr, true, block, table_options_.filter_policy->GetFilterBitsReader(block), nullptr); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) { @@ -127,13 +129,27 @@ TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) { FullFilterBlockReader reader( nullptr, true, block, table_options_.filter_policy->GetFilterBitsReader(block), nullptr); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } class FullFilterBlockTest : public testing::Test { @@ -157,7 +173,9 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { nullptr, true, block, table_options_.filter_policy->GetFilterBitsReader(block), nullptr); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } TEST_F(FullFilterBlockTest, DuplicateEntries) { @@ -207,13 +225,27 @@ TEST_F(FullFilterBlockTest, SingleChunk) { FullFilterBlockReader reader( nullptr, true, block, table_options_.filter_policy->GetFilterBitsReader(block), nullptr); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch("missing", nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch("other", nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(reader.KeyMayMatch( + "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch( + "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*context=*/nullptr)); } } // namespace rocksdb diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 7874ce187..e80085dfb 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -162,8 +162,8 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() { bool PartitionedFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr) { + uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) { assert(const_ikey_ptr != nullptr); assert(block_offset == kNotValid); if (!whole_key_filtering_) { @@ -177,19 +177,20 @@ bool PartitionedFilterBlockReader::KeyMayMatch( return false; } auto filter_partition = - GetFilterPartition(nullptr /* prefetch_buffer */, filter_handle, no_io, - prefix_extractor); + GetFilterPartition(/*prefetch_buffer=*/nullptr, filter_handle, no_io, + prefix_extractor, context); if (UNLIKELY(!filter_partition.GetValue())) { return true; } - return filter_partition.GetValue()->KeyMayMatch(key, prefix_extractor, - block_offset, no_io); + return filter_partition.GetValue()->KeyMayMatch( + key, prefix_extractor, block_offset, no_io, /*const_ikey_ptr=*/nullptr, + context); } bool PartitionedFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr) { + uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) { #ifdef NDEBUG (void)block_offset; #endif @@ -206,13 +207,14 @@ bool PartitionedFilterBlockReader::PrefixMayMatch( return false; } auto filter_partition = - GetFilterPartition(nullptr /* prefetch_buffer */, filter_handle, no_io, - prefix_extractor); + GetFilterPartition(/*prefetch_buffer=*/nullptr, filter_handle, no_io, + prefix_extractor, context); if (UNLIKELY(!filter_partition.GetValue())) { return true; } - return filter_partition.GetValue()->PrefixMayMatch(prefix, prefix_extractor, - kNotValid, no_io); + return filter_partition.GetValue()->PrefixMayMatch( + prefix, prefix_extractor, kNotValid, no_io, /*const_ikey_ptr=*/nullptr, + context); } BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle( @@ -234,7 +236,8 @@ BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle( CachableEntry PartitionedFilterBlockReader::GetFilterPartition( FilePrefetchBuffer* prefetch_buffer, BlockHandle& fltr_blk_handle, - const bool no_io, const SliceTransform* prefix_extractor) { + const bool no_io, const SliceTransform* prefix_extractor, + BlockCacheLookupContext* context) { const bool is_a_filter_partition = true; auto block_cache = table_->rep_->table_options.block_cache.get(); if (LIKELY(block_cache != nullptr)) { @@ -247,9 +250,10 @@ PartitionedFilterBlockReader::GetFilterPartition( nullptr /* cache_handle */, false /* own_value */}; } } - return table_->GetFilter(/*prefetch_buffer*/ nullptr, fltr_blk_handle, + return table_->GetFilter(/*prefetch_buffer=*/nullptr, fltr_blk_handle, is_a_filter_partition, no_io, - /* get_context */ nullptr, prefix_extractor); + /*get_context=*/nullptr, context, + prefix_extractor); } else { auto filter = table_->ReadFilter(prefetch_buffer, fltr_blk_handle, is_a_filter_partition, prefix_extractor); @@ -273,6 +277,7 @@ size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const { void PartitionedFilterBlockReader::CacheDependencies( bool pin, const SliceTransform* prefix_extractor) { // Before read partitions, prefetch them to avoid lots of IOs + BlockCacheLookupContext lookup_context{BlockCacheLookupCaller::kPrefetch}; IndexBlockIter biter; Statistics* kNullStats = nullptr; idx_on_fltr_blk_->NewIterator( @@ -304,7 +309,7 @@ void PartitionedFilterBlockReader::CacheDependencies( const bool is_a_filter_partition = true; auto filter = table_->GetFilter( prefetch_buffer.get(), handle, is_a_filter_partition, !no_io, - /* get_context */ nullptr, prefix_extractor); + /*get_context=*/nullptr, &lookup_context, prefix_extractor); if (LIKELY(filter.IsCached())) { if (pin) { filter_map_[handle.offset()] = std::move(filter); diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 6860bf82f..4b0fb523d 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -77,26 +77,28 @@ class PartitionedFilterBlockReader : public FilterBlockReader { Statistics* stats, const InternalKeyComparator comparator, const BlockBasedTable* table, const bool index_key_includes_seq, const bool index_value_is_full); - virtual ~PartitionedFilterBlockReader(); + ~PartitionedFilterBlockReader() override; - virtual bool IsBlockBased() override { return false; } - virtual bool KeyMayMatch( - const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - virtual bool PrefixMayMatch( - const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset = kNotValid, const bool no_io = false, - const Slice* const const_ikey_ptr = nullptr) override; - virtual size_t ApproximateMemoryUsage() const override; + bool IsBlockBased() override { return false; } + bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + bool PrefixMayMatch(const Slice& prefix, + const SliceTransform* prefix_extractor, + uint64_t block_offset, const bool no_io, + const Slice* const const_ikey_ptr, + BlockCacheLookupContext* context) override; + size_t ApproximateMemoryUsage() const override; private: BlockHandle GetFilterPartitionHandle(const Slice& entry); CachableEntry GetFilterPartition( FilePrefetchBuffer* prefetch_buffer, BlockHandle& handle, - const bool no_io, const SliceTransform* prefix_extractor = nullptr); - virtual void CacheDependencies( - bool bin, const SliceTransform* prefix_extractor) override; + const bool no_io, const SliceTransform* prefix_extractor, + BlockCacheLookupContext* context); + void CacheDependencies(bool bin, + const SliceTransform* prefix_extractor) override; const SliceTransform* prefix_extractor_; std::unique_ptr idx_on_fltr_blk_; diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 70e5bbd3b..5af703496 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -31,6 +31,7 @@ class MockedBlockBasedTable : public BlockBasedTable { CachableEntry GetFilter( FilePrefetchBuffer*, const BlockHandle& filter_blk_handle, const bool /* unused */, bool /* unused */, GetContext* /* unused */, + BlockCacheLookupContext* /*context*/, const SliceTransform* prefix_extractor) const override { Slice slice = slices[filter_blk_handle.offset()]; auto obj = new FullFilterBlockReader( @@ -168,14 +169,15 @@ class PartitionedFilterBlockTest auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io, - &ikey_slice)); + &ikey_slice, /*context=*/nullptr)); } { // querying a key twice auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid, - !no_io, &ikey_slice)); + !no_io, &ikey_slice, + /*context=*/nullptr)); } // querying missing keys for (auto key : missing_keys) { @@ -183,11 +185,13 @@ class PartitionedFilterBlockTest const Slice ikey_slice = Slice(*ikey.rep()); if (empty) { ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, - !no_io, &ikey_slice)); + !no_io, &ikey_slice, + /*context=*/nullptr)); } else { // assuming a good hash function ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, - !no_io, &ikey_slice)); + !no_io, &ikey_slice, + /*context=*/nullptr)); } } } @@ -335,9 +339,9 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { for (auto key : pkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), - prefix_extractor.get(), kNotValid, - false /*no_io*/, &ikey_slice)); + ASSERT_TRUE(reader->PrefixMayMatch( + prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, + /*no_io=*/false, &ikey_slice, /*context=*/nullptr)); } } diff --git a/table/cuckoo/cuckoo_table_reader.h b/table/cuckoo/cuckoo_table_reader.h index b37d46373..0080a76e1 100644 --- a/table/cuckoo/cuckoo_table_reader.h +++ b/table/cuckoo/cuckoo_table_reader.h @@ -56,7 +56,10 @@ class CuckooTableReader: public TableReader { size_t ApproximateMemoryUsage() const override; // Following methods are not implemented for Cuckoo Table Reader - uint64_t ApproximateOffsetOf(const Slice& /*key*/) override { return 0; } + uint64_t ApproximateOffsetOf(const Slice& /*key*/, + bool /*for_compaction*/ = false) override { + return 0; + } void SetupForCompaction() override {} // End of methods not implemented. diff --git a/table/mock_table.h b/table/mock_table.h index 42e28266d..005de1c3d 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -50,9 +50,12 @@ class MockTableReader : public TableReader { GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; - uint64_t ApproximateOffsetOf(const Slice& /*key*/) override { return 0; } + uint64_t ApproximateOffsetOf(const Slice& /*key*/, + bool /*for_compaction*/ = false) override { + return 0; + } - virtual size_t ApproximateMemoryUsage() const override { return 0; } + size_t ApproximateMemoryUsage() const override { return 0; } void SetupForCompaction() override {} diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index 38852059b..15f7be1c2 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -613,7 +613,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target, return Status::OK(); } -uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/) { +uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/, + bool /*for_compaction*/) { return 0; } diff --git a/table/plain/plain_table_reader.h b/table/plain/plain_table_reader.h index 6c1c12ab8..774e2eb36 100644 --- a/table/plain/plain_table_reader.h +++ b/table/plain/plain_table_reader.h @@ -89,7 +89,8 @@ class PlainTableReader: public TableReader { GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; - uint64_t ApproximateOffsetOf(const Slice& key) override; + uint64_t ApproximateOffsetOf(const Slice& key, + bool for_compaction = false) override; uint32_t GetIndexSize() const { return index_.GetIndexSize(); } void SetupForCompaction() override; diff --git a/table/table_reader.h b/table/table_reader.h index 037dbc338..bf3289818 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -61,7 +61,8 @@ class TableReader { // bytes, and so includes effects like compression of the underlying data. // E.g., the approximate offset of the last key in the table will // be close to the file length. - virtual uint64_t ApproximateOffsetOf(const Slice& key) = 0; + virtual uint64_t ApproximateOffsetOf(const Slice& key, + bool for_compaction = false) = 0; // Set up the table for Compaction. Might change some parameters with // posix_fadvise diff --git a/trace_replay/block_cache_tracer.h b/trace_replay/block_cache_tracer.h index 7b3c82e2b..5fd14cbf1 100644 --- a/trace_replay/block_cache_tracer.h +++ b/trace_replay/block_cache_tracer.h @@ -17,12 +17,38 @@ enum BlockCacheLookupCaller : char { kUserGet = 1, kUserMGet = 2, kUserIterator = 3, - kPrefetch = 4, - kCompaction = 5, + kUserApproximateSize = 4, + kPrefetch = 5, + kCompaction = 6, // All callers should be added before kMaxBlockCacheLookupCaller. kMaxBlockCacheLookupCaller }; +// Lookup context for tracing block cache accesses. +// We trace block accesses at five places: +// 1. BlockBasedTable::GetFilter +// 2. BlockBasedTable::GetUncompressedDict. +// 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index, +// and range deletion block.) +// 4. BlockBasedTable::Get. (To trace the referenced key and whether the +// referenced key exists in a fetched data block.) +// 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the +// referenced key exists in a fetched data block.) +// The context is created at: +// 1. BlockBasedTable::Get. (kUserGet) +// 2. BlockBasedTable::MultiGet. (kUserMGet) +// 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or +// external SST ingestion calls this function.) +// 4. BlockBasedTable::Open. (kPrefetch) +// 5. Index/Filter::CacheDependencies. (kPrefetch) +// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or +// kUserApproximateSize). +struct BlockCacheLookupContext { + BlockCacheLookupContext(const BlockCacheLookupCaller& _caller) + : caller(_caller) {} + const BlockCacheLookupCaller caller; +}; + enum Boolean : char { kTrue = 1, kFalse = 0 }; struct BlockCacheTraceRecord {