From 6aef1a05d65d10731fada543ecab838c51d01156 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 17 Feb 2023 09:03:37 -0800 Subject: [PATCH] Use CacheDependencies() at start of ApproximateKeyAnchors() (#11230) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11230 Test Plan: - setup command: `$ ./db_bench -benchmarks=fillrandom,compact -compression_type=none -num=1000000 -write_buffer_size=4194304 -target_file_size_base=4194304 -use_direct_io_for_flush_and_compaction=true -partition_index_and_filters=true -bloom_bits=10 -metadata_block_size=1024` - measure small read count bucketed by size: `$ strace -fye pread64 ./db_bench.ctrl -use_existing_db=true -benchmarks=compact -compaction_readahead_size=4194304 -compression_type=none -num=1000000 -write_buffer_size=4194304 -target_file_size_base=4194304 -use_direct_io_for_flush_and_compaction=true -partition_index_and_filters=true -bloom_bits=10 -metadata_block_size=1024 -subcompactions=4 -cache_size=1048576000 2>&1 >/dev/null | awk '/= [0-9]+$/{print "[", int($NF / 1024), "KB,", int(1 + $NF / 1024), "KB)"}' | sort -n -k 2 | uniq -c | head -3` - before: ``` 1119 [ 0 KB, 1 KB) 1 [ 6 KB, 7 KB) 2 [ 7 KB, 8 KB) ``` - after: ``` 242 [ 0 KB, 1 KB) 1 [ 6 KB, 7 KB) 2 [ 7 KB, 8 KB) ``` Reviewed By: pdillinger Differential Revision: D43388507 Pulled By: ajkr fbshipit-source-id: a02413c9f615b00784700646825a9870ee10f3a7 --- table/block_based/block_based_table_reader.cc | 10 ++++++++++ table/block_based/partitioned_index_reader.cc | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index f79636b0e..eff4df56d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1929,6 +1929,16 @@ Status BlockBasedTable::ApproximateKeyAnchors(const ReadOptions& read_options, // likely not to be a problem. We are compacting the whole file, so all // keys will be read out anyway. An extra read to index block might be // a small share of the overhead. We can try to optimize if needed. + // + // `CacheDependencies()` brings all the blocks into cache using one I/O. That + // way the full index scan usually finds the index data it is looking for in + // cache rather than doing an I/O for each "dependency" (partition). + Status s = + rep_->index_reader->CacheDependencies(read_options, false /* pin */); + if (!s.ok()) { + return s; + } + IndexBlockIter iiter_on_stack; auto iiter = NewIndexIterator( read_options, /*disable_prefix_seek=*/false, &iiter_on_stack, diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index 705223c90..dbe8b2bd4 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -113,6 +113,11 @@ InternalIteratorBase* PartitionIndexReader::NewIterator( } Status PartitionIndexReader::CacheDependencies(const ReadOptions& ro, bool pin) { + if (!partition_map_.empty()) { + // The dependencies are already cached since `partition_map_` is filled in + // an all-or-nothing manner. + return Status::OK(); + } // Before read partitions, prefetch them to avoid lots of IOs BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; const BlockBasedTable::Rep* rep = table()->rep_;