From d92a59b6f2c0c3cb3420504d26610edd5c7d71b4 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 14 Aug 2019 18:13:14 -0700 Subject: [PATCH] Fix regression affecting partitioned indexes/filters when cache_index_and_filter_blocks is false (#5705) Summary: PR https://github.com/facebook/rocksdb/issues/5298 (and subsequent related patches) unintentionally changed the semantics of cache_index_and_filter_blocks: historically, this option only affected the main index/filter block; with the changes, it affects index/filter partitions as well. This can cause performance issues when cache_index_and_filter_blocks is false since in this case, partitions are neither cached nor preloaded (i.e. they are loaded on demand upon each access). The patch reverts to the earlier behavior, that is, partitions are cached similarly to data blocks regardless of the value of the above option. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5705 Test Plan: make check ./db_bench -benchmarks=fillrandom --statistics --stats_interval_seconds=1 --duration=30 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false ./db_bench -benchmarks=readrandom --use_existing_db --statistics --stats_interval_seconds=1 --duration=10 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false --cache_size=8000000000 Relevant statistics from the readrandom benchmark with the old code: rocksdb.block.cache.index.miss COUNT : 0 rocksdb.block.cache.index.hit COUNT : 0 rocksdb.block.cache.index.add COUNT : 0 rocksdb.block.cache.index.bytes.insert COUNT : 0 rocksdb.block.cache.index.bytes.evict COUNT : 0 rocksdb.block.cache.filter.miss COUNT : 0 rocksdb.block.cache.filter.hit COUNT : 0 rocksdb.block.cache.filter.add COUNT : 0 rocksdb.block.cache.filter.bytes.insert COUNT : 0 rocksdb.block.cache.filter.bytes.evict COUNT : 0 With the new code: rocksdb.block.cache.index.miss COUNT : 2500 rocksdb.block.cache.index.hit COUNT : 42696 rocksdb.block.cache.index.add COUNT : 2500 rocksdb.block.cache.index.bytes.insert COUNT : 4050048 rocksdb.block.cache.index.bytes.evict COUNT : 0 rocksdb.block.cache.filter.miss COUNT : 2500 rocksdb.block.cache.filter.hit COUNT : 4550493 rocksdb.block.cache.filter.add COUNT : 2500 rocksdb.block.cache.filter.bytes.insert COUNT : 10331040 rocksdb.block.cache.filter.bytes.evict COUNT : 0 Differential Revision: D16817382 Pulled By: ltamasi fbshipit-source-id: 28a516b0da1f041a03313e0b70b28cf5cf205d00 --- table/block_based/block_based_filter_block.cc | 4 +- table/block_based/block_based_table_reader.cc | 47 +++++++++++-------- table/block_based/block_based_table_reader.h | 2 +- .../block_based/filter_block_reader_common.cc | 16 +++++-- .../block_based/filter_block_reader_common.h | 3 +- table/block_based/full_filter_block.cc | 4 +- table/block_based/partitioned_filter_block.cc | 7 +-- .../block_based/uncompression_dict_reader.cc | 22 ++++++--- table/block_based/uncompression_dict_reader.h | 4 +- 9 files changed, 70 insertions(+), 39 deletions(-) diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index 5585b8441..319c5bf6d 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -181,8 +181,8 @@ std::unique_ptr BlockBasedFilterBlockReader::Create( CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, lookup_context, - &filter_block); + use_cache, nullptr /* get_context */, + lookup_context, &filter_block); if (!s.ok()) { return std::unique_ptr(); } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 314763ec3..bb12188d3 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -208,7 +208,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { protected: static Status ReadIndexBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block); @@ -240,6 +240,12 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { return table_->get_rep()->index_value_is_full; } + bool cache_index_blocks() const { + assert(table_ != nullptr); + assert(table_->get_rep() != nullptr); + return table_->get_rep()->table_options.cache_index_and_filter_blocks; + } + Status GetOrReadIndexBlock(bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const; @@ -258,7 +264,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, GetContext* get_context, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) { PERF_TIMER_GUARD(read_index_block_nanos); @@ -273,7 +279,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->footer.index_handle(), UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, - get_context, lookup_context); + get_context, lookup_context, /* for_compaction */ false, use_cache); return s; } @@ -295,7 +301,8 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( } return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options, - get_context, lookup_context, index_block); + cache_index_blocks(), get_context, lookup_context, + index_block); } // Index that allows binary search lookup in a two-level index structure. @@ -318,7 +325,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -509,7 +516,7 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -593,7 +600,7 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { CachableEntry index_block; if (prefetch || !use_cache) { const Status s = - ReadIndexBlock(table, prefetch_buffer, ReadOptions(), + ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; @@ -1915,7 +1922,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( CachableEntry block; s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block, - block_type, get_context, lookup_context, for_compaction); + block_type, get_context, lookup_context, for_compaction, + /* use_cache */ true); if (!s.ok()) { assert(block.IsEmpty()); @@ -2078,8 +2086,10 @@ Status BlockBasedTable::GetDataBlockFromCache( GetContext* get_context) const { BlockCacheLookupContext lookup_data_block_context( TableReaderCaller::kUserMultiGet); + assert(block_type == BlockType::kData); Status s = RetrieveBlock(nullptr, ro, handle, uncompression_dict, block, - block_type, get_context, &lookup_data_block_context); + block_type, get_context, &lookup_data_block_context, + /* for_compaction */ false, /* use_cache */ true); if (s.IsIncomplete()) { s = Status::OK(); } @@ -2289,9 +2299,11 @@ void BlockBasedTable::MaybeLoadBlocksToCache( continue; } - (*statuses)[idx_in_batch] = RetrieveBlock(nullptr, options, handle, - uncompression_dict, &(*results)[idx_in_batch], BlockType::kData, - mget_iter->get_context, &lookup_data_block_context); + (*statuses)[idx_in_batch] = + RetrieveBlock(nullptr, options, handle, uncompression_dict, + &(*results)[idx_in_batch], BlockType::kData, + mget_iter->get_context, &lookup_data_block_context, + /* for_compaction */ false, /* use_cache */ true); } return; } @@ -2418,15 +2430,12 @@ Status BlockBasedTable::RetrieveBlock( const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, - bool for_compaction) const { + bool for_compaction, bool use_cache) const { assert(block_entry); assert(block_entry->IsEmpty()); Status s; - if (rep_->table_options.cache_index_and_filter_blocks || - (block_type != BlockType::kFilter && - block_type != BlockType::kCompressionDictionary && - block_type != BlockType::kIndex)) { + if (use_cache) { s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, uncompression_dict, block_entry, block_type, get_context, lookup_context, @@ -2487,14 +2496,14 @@ template Status BlockBasedTable::RetrieveBlock( const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, - bool for_compaction) const; + bool for_compaction, bool use_cache) const; template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, - bool for_compaction) const; + bool for_compaction, bool use_cache) const; BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState( const BlockBasedTable* table, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 85346d75c..017199d80 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -299,7 +299,7 @@ class BlockBasedTable : public TableReader { CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, - bool for_compaction = false) const; + bool for_compaction, bool use_cache) const; Status GetDataBlockFromCache( const ReadOptions& ro, const BlockHandle& handle, diff --git a/table/block_based/filter_block_reader_common.cc b/table/block_based/filter_block_reader_common.cc index 717a4ad0d..b6a334986 100644 --- a/table/block_based/filter_block_reader_common.cc +++ b/table/block_based/filter_block_reader_common.cc @@ -13,7 +13,7 @@ namespace rocksdb { template Status FilterBlockReaderCommon::ReadFilterBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, GetContext* get_context, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) { PERF_TIMER_GUARD(read_filter_block_nanos); @@ -28,7 +28,8 @@ Status FilterBlockReaderCommon::ReadFilterBlock( const Status s = table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle, UncompressionDict::GetEmptyDict(), filter_block, - BlockType::kFilter, get_context, lookup_context); + BlockType::kFilter, get_context, lookup_context, + /* for_compaction */ false, use_cache); return s; } @@ -52,6 +53,14 @@ bool FilterBlockReaderCommon::whole_key_filtering() const { return table_->get_rep()->whole_key_filtering; } +template +bool FilterBlockReaderCommon::cache_filter_blocks() const { + assert(table_); + assert(table_->get_rep()); + + return table_->get_rep()->table_options.cache_index_and_filter_blocks; +} + template Status FilterBlockReaderCommon::GetOrReadFilterBlock( bool no_io, GetContext* get_context, @@ -70,7 +79,8 @@ Status FilterBlockReaderCommon::GetOrReadFilterBlock( } return ReadFilterBlock(table_, nullptr /* prefetch_buffer */, read_options, - get_context, lookup_context, filter_block); + cache_filter_blocks(), get_context, lookup_context, + filter_block); } template diff --git a/table/block_based/filter_block_reader_common.h b/table/block_based/filter_block_reader_common.h index 3698d3f1e..4e691e0d9 100644 --- a/table/block_based/filter_block_reader_common.h +++ b/table/block_based/filter_block_reader_common.h @@ -31,7 +31,7 @@ class FilterBlockReaderCommon : public FilterBlockReader { protected: static Status ReadFilterBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block); @@ -39,6 +39,7 @@ class FilterBlockReaderCommon : public FilterBlockReader { const BlockBasedTable* table() const { return table_; } const SliceTransform* table_prefix_extractor() const; bool whole_key_filtering() const; + bool cache_filter_blocks() const; Status GetOrReadFilterBlock(bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 553bd37d9..29decc35b 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -134,8 +134,8 @@ std::unique_ptr FullFilterBlockReader::Create( CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, lookup_context, - &filter_block); + use_cache, nullptr /* get_context */, + lookup_context, &filter_block); if (!s.ok()) { return std::unique_ptr(); } diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 158ed84ab..1ba6b3c07 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -133,8 +133,8 @@ std::unique_ptr PartitionedFilterBlockReader::Create( CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), - nullptr /* get_context */, lookup_context, - &filter_block); + use_cache, nullptr /* get_context */, + lookup_context, &filter_block); if (!s.ok()) { return std::unique_ptr(); } @@ -226,7 +226,8 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock( const Status s = table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle, UncompressionDict::GetEmptyDict(), filter_block, - BlockType::kFilter, get_context, lookup_context); + BlockType::kFilter, get_context, lookup_context, + /* for_compaction */ false, /* use_cache */ true); return s; } diff --git a/table/block_based/uncompression_dict_reader.cc b/table/block_based/uncompression_dict_reader.cc index d74dbf6c4..92db24bb2 100644 --- a/table/block_based/uncompression_dict_reader.cc +++ b/table/block_based/uncompression_dict_reader.cc @@ -24,8 +24,8 @@ Status UncompressionDictReader::Create( CachableEntry uncompression_dict_block; if (prefetch || !use_cache) { const Status s = ReadUncompressionDictionaryBlock( - table, prefetch_buffer, ReadOptions(), nullptr /* get_context */, - lookup_context, &uncompression_dict_block); + table, prefetch_buffer, ReadOptions(), use_cache, + nullptr /* get_context */, lookup_context, &uncompression_dict_block); if (!s.ok()) { return s; } @@ -43,7 +43,7 @@ Status UncompressionDictReader::Create( Status UncompressionDictReader::ReadUncompressionDictionaryBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, GetContext* get_context, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict_block) { // TODO: add perf counter for compression dictionary read time @@ -59,7 +59,8 @@ Status UncompressionDictReader::ReadUncompressionDictionaryBlock( const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->compression_dict_handle, UncompressionDict::GetEmptyDict(), uncompression_dict_block, - BlockType::kCompressionDictionary, get_context, lookup_context); + BlockType::kCompressionDictionary, get_context, lookup_context, + /* for_compaction */ false, use_cache); if (!s.ok()) { ROCKS_LOG_WARN( @@ -89,9 +90,9 @@ Status UncompressionDictReader::GetOrReadUncompressionDictionaryBlock( read_options.read_tier = kBlockCacheTier; } - return ReadUncompressionDictionaryBlock(table_, prefetch_buffer, read_options, - get_context, lookup_context, - uncompression_dict_block); + return ReadUncompressionDictionaryBlock( + table_, prefetch_buffer, read_options, cache_dictionary_blocks(), + get_context, lookup_context, uncompression_dict_block); } Status UncompressionDictReader::GetOrReadUncompressionDictionary( @@ -135,4 +136,11 @@ size_t UncompressionDictReader::ApproximateMemoryUsage() const { return usage; } +bool UncompressionDictReader::cache_dictionary_blocks() const { + assert(table_); + assert(table_->get_rep()); + + return table_->get_rep()->table_options.cache_index_and_filter_blocks; +} + } // namespace rocksdb diff --git a/table/block_based/uncompression_dict_reader.h b/table/block_based/uncompression_dict_reader.h index 808149e96..09fdf54b0 100644 --- a/table/block_based/uncompression_dict_reader.h +++ b/table/block_based/uncompression_dict_reader.h @@ -46,9 +46,11 @@ class UncompressionDictReader { assert(table_); } + bool cache_dictionary_blocks() const; + static Status ReadUncompressionDictionaryBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, - const ReadOptions& read_options, GetContext* get_context, + const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict_block);