Fix regression affecting partitioned indexes/filters when cache_index_and_filter_blocks is false (#5705)

Summary:
PR https://github.com/facebook/rocksdb/issues/5298 (and subsequent related patches) unintentionally changed the
semantics of cache_index_and_filter_blocks: historically, this option
only affected the main index/filter block; with the changes, it affects
index/filter partitions as well. This can cause performance issues when
cache_index_and_filter_blocks is false since in this case, partitions are
neither cached nor preloaded (i.e. they are loaded on demand upon each
access). The patch reverts to the earlier behavior, that is, partitions
are cached similarly to data blocks regardless of the value of the above
option.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5705

Test Plan:
make check
./db_bench -benchmarks=fillrandom --statistics --stats_interval_seconds=1 --duration=30 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false
./db_bench -benchmarks=readrandom --use_existing_db --statistics --stats_interval_seconds=1 --duration=10 --num=500000000 --bloom_bits=20 --partition_index_and_filters=true --cache_index_and_filter_blocks=false --cache_size=8000000000

Relevant statistics from the readrandom benchmark with the old code:

rocksdb.block.cache.index.miss COUNT : 0
rocksdb.block.cache.index.hit COUNT : 0
rocksdb.block.cache.index.add COUNT : 0
rocksdb.block.cache.index.bytes.insert COUNT : 0
rocksdb.block.cache.index.bytes.evict COUNT : 0
rocksdb.block.cache.filter.miss COUNT : 0
rocksdb.block.cache.filter.hit COUNT : 0
rocksdb.block.cache.filter.add COUNT : 0
rocksdb.block.cache.filter.bytes.insert COUNT : 0
rocksdb.block.cache.filter.bytes.evict COUNT : 0

With the new code:

rocksdb.block.cache.index.miss COUNT : 2500
rocksdb.block.cache.index.hit COUNT : 42696
rocksdb.block.cache.index.add COUNT : 2500
rocksdb.block.cache.index.bytes.insert COUNT : 4050048
rocksdb.block.cache.index.bytes.evict COUNT : 0
rocksdb.block.cache.filter.miss COUNT : 2500
rocksdb.block.cache.filter.hit COUNT : 4550493
rocksdb.block.cache.filter.add COUNT : 2500
rocksdb.block.cache.filter.bytes.insert COUNT : 10331040
rocksdb.block.cache.filter.bytes.evict COUNT : 0

Differential Revision: D16817382

Pulled By: ltamasi

fbshipit-source-id: 28a516b0da1f041a03313e0b70b28cf5cf205d00
main
Levi Tamasi 5 years ago committed by Facebook Github Bot
parent 77273d4137
commit d92a59b6f2
  1. 4
      table/block_based/block_based_filter_block.cc
  2. 47
      table/block_based/block_based_table_reader.cc
  3. 2
      table/block_based/block_based_table_reader.h
  4. 16
      table/block_based/filter_block_reader_common.cc
  5. 3
      table/block_based/filter_block_reader_common.h
  6. 4
      table/block_based/full_filter_block.cc
  7. 7
      table/block_based/partitioned_filter_block.cc
  8. 22
      table/block_based/uncompression_dict_reader.cc
  9. 4
      table/block_based/uncompression_dict_reader.h

@ -181,8 +181,8 @@ std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create(
CachableEntry<BlockContents> filter_block; CachableEntry<BlockContents> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, lookup_context, use_cache, nullptr /* get_context */,
&filter_block); lookup_context, &filter_block);
if (!s.ok()) { if (!s.ok()) {
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();
} }

@ -208,7 +208,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
protected: protected:
static Status ReadIndexBlock(const BlockBasedTable* table, static Status ReadIndexBlock(const BlockBasedTable* table,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, const ReadOptions& read_options, bool use_cache,
GetContext* get_context, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block); CachableEntry<Block>* index_block);
@ -240,6 +240,12 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
return table_->get_rep()->index_value_is_full; return table_->get_rep()->index_value_is_full;
} }
bool cache_index_blocks() const {
assert(table_ != nullptr);
assert(table_->get_rep() != nullptr);
return table_->get_rep()->table_options.cache_index_and_filter_blocks;
}
Status GetOrReadIndexBlock(bool no_io, GetContext* get_context, Status GetOrReadIndexBlock(bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block) const; CachableEntry<Block>* index_block) const;
@ -258,7 +264,7 @@ class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader {
Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, GetContext* get_context, const ReadOptions& read_options, bool use_cache, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<Block>* index_block) { CachableEntry<Block>* index_block) {
PERF_TIMER_GUARD(read_index_block_nanos); PERF_TIMER_GUARD(read_index_block_nanos);
@ -273,7 +279,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
const Status s = table->RetrieveBlock( const Status s = table->RetrieveBlock(
prefetch_buffer, read_options, rep->footer.index_handle(), prefetch_buffer, read_options, rep->footer.index_handle(),
UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex,
get_context, lookup_context); get_context, lookup_context, /* for_compaction */ false, use_cache);
return s; return s;
} }
@ -295,7 +301,8 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock(
} }
return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options, return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options,
get_context, lookup_context, index_block); cache_index_blocks(), get_context, lookup_context,
index_block);
} }
// Index that allows binary search lookup in a two-level index structure. // Index that allows binary search lookup in a two-level index structure.
@ -318,7 +325,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -509,7 +516,7 @@ class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -593,7 +600,7 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
CachableEntry<Block> index_block; CachableEntry<Block> index_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache,
/*get_context=*/nullptr, lookup_context, &index_block); /*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -1915,7 +1922,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
CachableEntry<Block> block; CachableEntry<Block> block;
s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block, s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block,
block_type, get_context, lookup_context, for_compaction); block_type, get_context, lookup_context, for_compaction,
/* use_cache */ true);
if (!s.ok()) { if (!s.ok()) {
assert(block.IsEmpty()); assert(block.IsEmpty());
@ -2078,8 +2086,10 @@ Status BlockBasedTable::GetDataBlockFromCache(
GetContext* get_context) const { GetContext* get_context) const {
BlockCacheLookupContext lookup_data_block_context( BlockCacheLookupContext lookup_data_block_context(
TableReaderCaller::kUserMultiGet); TableReaderCaller::kUserMultiGet);
assert(block_type == BlockType::kData);
Status s = RetrieveBlock(nullptr, ro, handle, uncompression_dict, block, Status s = RetrieveBlock(nullptr, ro, handle, uncompression_dict, block,
block_type, get_context, &lookup_data_block_context); block_type, get_context, &lookup_data_block_context,
/* for_compaction */ false, /* use_cache */ true);
if (s.IsIncomplete()) { if (s.IsIncomplete()) {
s = Status::OK(); s = Status::OK();
} }
@ -2289,9 +2299,11 @@ void BlockBasedTable::MaybeLoadBlocksToCache(
continue; continue;
} }
(*statuses)[idx_in_batch] = RetrieveBlock(nullptr, options, handle, (*statuses)[idx_in_batch] =
uncompression_dict, &(*results)[idx_in_batch], BlockType::kData, RetrieveBlock(nullptr, options, handle, uncompression_dict,
mget_iter->get_context, &lookup_data_block_context); &(*results)[idx_in_batch], BlockType::kData,
mget_iter->get_context, &lookup_data_block_context,
/* for_compaction */ false, /* use_cache */ true);
} }
return; return;
} }
@ -2418,15 +2430,12 @@ Status BlockBasedTable::RetrieveBlock(
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<TBlocklike>* block_entry, BlockType block_type, CachableEntry<TBlocklike>* block_entry, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction) const { bool for_compaction, bool use_cache) const {
assert(block_entry); assert(block_entry);
assert(block_entry->IsEmpty()); assert(block_entry->IsEmpty());
Status s; Status s;
if (rep_->table_options.cache_index_and_filter_blocks || if (use_cache) {
(block_type != BlockType::kFilter &&
block_type != BlockType::kCompressionDictionary &&
block_type != BlockType::kIndex)) {
s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle,
uncompression_dict, block_entry, uncompression_dict, block_entry,
block_type, get_context, lookup_context, block_type, get_context, lookup_context,
@ -2487,14 +2496,14 @@ template Status BlockBasedTable::RetrieveBlock<BlockContents>(
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<BlockContents>* block_entry, BlockType block_type, CachableEntry<BlockContents>* block_entry, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction) const; bool for_compaction, bool use_cache) const;
template Status BlockBasedTable::RetrieveBlock<Block>( template Status BlockBasedTable::RetrieveBlock<Block>(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type, CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction) const; bool for_compaction, bool use_cache) const;
BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState( BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
const BlockBasedTable* table, const BlockBasedTable* table,

@ -299,7 +299,7 @@ class BlockBasedTable : public TableReader {
CachableEntry<TBlocklike>* block_entry, CachableEntry<TBlocklike>* block_entry,
BlockType block_type, GetContext* get_context, BlockType block_type, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
bool for_compaction = false) const; bool for_compaction, bool use_cache) const;
Status GetDataBlockFromCache( Status GetDataBlockFromCache(
const ReadOptions& ro, const BlockHandle& handle, const ReadOptions& ro, const BlockHandle& handle,

@ -13,7 +13,7 @@ namespace rocksdb {
template <typename TBlocklike> template <typename TBlocklike>
Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock( Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, GetContext* get_context, const ReadOptions& read_options, bool use_cache, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<TBlocklike>* filter_block) { CachableEntry<TBlocklike>* filter_block) {
PERF_TIMER_GUARD(read_filter_block_nanos); PERF_TIMER_GUARD(read_filter_block_nanos);
@ -28,7 +28,8 @@ Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock(
const Status s = const Status s =
table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle, table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle,
UncompressionDict::GetEmptyDict(), filter_block, UncompressionDict::GetEmptyDict(), filter_block,
BlockType::kFilter, get_context, lookup_context); BlockType::kFilter, get_context, lookup_context,
/* for_compaction */ false, use_cache);
return s; return s;
} }
@ -52,6 +53,14 @@ bool FilterBlockReaderCommon<TBlocklike>::whole_key_filtering() const {
return table_->get_rep()->whole_key_filtering; return table_->get_rep()->whole_key_filtering;
} }
template <typename TBlocklike>
bool FilterBlockReaderCommon<TBlocklike>::cache_filter_blocks() const {
assert(table_);
assert(table_->get_rep());
return table_->get_rep()->table_options.cache_index_and_filter_blocks;
}
template <typename TBlocklike> template <typename TBlocklike>
Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock( Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock(
bool no_io, GetContext* get_context, bool no_io, GetContext* get_context,
@ -70,7 +79,8 @@ Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock(
} }
return ReadFilterBlock(table_, nullptr /* prefetch_buffer */, read_options, return ReadFilterBlock(table_, nullptr /* prefetch_buffer */, read_options,
get_context, lookup_context, filter_block); cache_filter_blocks(), get_context, lookup_context,
filter_block);
} }
template <typename TBlocklike> template <typename TBlocklike>

@ -31,7 +31,7 @@ class FilterBlockReaderCommon : public FilterBlockReader {
protected: protected:
static Status ReadFilterBlock(const BlockBasedTable* table, static Status ReadFilterBlock(const BlockBasedTable* table,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, const ReadOptions& read_options, bool use_cache,
GetContext* get_context, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<TBlocklike>* filter_block); CachableEntry<TBlocklike>* filter_block);
@ -39,6 +39,7 @@ class FilterBlockReaderCommon : public FilterBlockReader {
const BlockBasedTable* table() const { return table_; } const BlockBasedTable* table() const { return table_; }
const SliceTransform* table_prefix_extractor() const; const SliceTransform* table_prefix_extractor() const;
bool whole_key_filtering() const; bool whole_key_filtering() const;
bool cache_filter_blocks() const;
Status GetOrReadFilterBlock(bool no_io, GetContext* get_context, Status GetOrReadFilterBlock(bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,

@ -134,8 +134,8 @@ std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
CachableEntry<BlockContents> filter_block; CachableEntry<BlockContents> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, lookup_context, use_cache, nullptr /* get_context */,
&filter_block); lookup_context, &filter_block);
if (!s.ok()) { if (!s.ok()) {
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();
} }

@ -133,8 +133,8 @@ std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create(
CachableEntry<Block> filter_block; CachableEntry<Block> filter_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
nullptr /* get_context */, lookup_context, use_cache, nullptr /* get_context */,
&filter_block); lookup_context, &filter_block);
if (!s.ok()) { if (!s.ok()) {
return std::unique_ptr<FilterBlockReader>(); return std::unique_ptr<FilterBlockReader>();
} }
@ -226,7 +226,8 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock(
const Status s = const Status s =
table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle, table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle,
UncompressionDict::GetEmptyDict(), filter_block, UncompressionDict::GetEmptyDict(), filter_block,
BlockType::kFilter, get_context, lookup_context); BlockType::kFilter, get_context, lookup_context,
/* for_compaction */ false, /* use_cache */ true);
return s; return s;
} }

@ -24,8 +24,8 @@ Status UncompressionDictReader::Create(
CachableEntry<BlockContents> uncompression_dict_block; CachableEntry<BlockContents> uncompression_dict_block;
if (prefetch || !use_cache) { if (prefetch || !use_cache) {
const Status s = ReadUncompressionDictionaryBlock( const Status s = ReadUncompressionDictionaryBlock(
table, prefetch_buffer, ReadOptions(), nullptr /* get_context */, table, prefetch_buffer, ReadOptions(), use_cache,
lookup_context, &uncompression_dict_block); nullptr /* get_context */, lookup_context, &uncompression_dict_block);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -43,7 +43,7 @@ Status UncompressionDictReader::Create(
Status UncompressionDictReader::ReadUncompressionDictionaryBlock( Status UncompressionDictReader::ReadUncompressionDictionaryBlock(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, GetContext* get_context, const ReadOptions& read_options, bool use_cache, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<BlockContents>* uncompression_dict_block) { CachableEntry<BlockContents>* uncompression_dict_block) {
// TODO: add perf counter for compression dictionary read time // TODO: add perf counter for compression dictionary read time
@ -59,7 +59,8 @@ Status UncompressionDictReader::ReadUncompressionDictionaryBlock(
const Status s = table->RetrieveBlock( const Status s = table->RetrieveBlock(
prefetch_buffer, read_options, rep->compression_dict_handle, prefetch_buffer, read_options, rep->compression_dict_handle,
UncompressionDict::GetEmptyDict(), uncompression_dict_block, UncompressionDict::GetEmptyDict(), uncompression_dict_block,
BlockType::kCompressionDictionary, get_context, lookup_context); BlockType::kCompressionDictionary, get_context, lookup_context,
/* for_compaction */ false, use_cache);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
@ -89,9 +90,9 @@ Status UncompressionDictReader::GetOrReadUncompressionDictionaryBlock(
read_options.read_tier = kBlockCacheTier; read_options.read_tier = kBlockCacheTier;
} }
return ReadUncompressionDictionaryBlock(table_, prefetch_buffer, read_options, return ReadUncompressionDictionaryBlock(
get_context, lookup_context, table_, prefetch_buffer, read_options, cache_dictionary_blocks(),
uncompression_dict_block); get_context, lookup_context, uncompression_dict_block);
} }
Status UncompressionDictReader::GetOrReadUncompressionDictionary( Status UncompressionDictReader::GetOrReadUncompressionDictionary(
@ -135,4 +136,11 @@ size_t UncompressionDictReader::ApproximateMemoryUsage() const {
return usage; return usage;
} }
bool UncompressionDictReader::cache_dictionary_blocks() const {
assert(table_);
assert(table_->get_rep());
return table_->get_rep()->table_options.cache_index_and_filter_blocks;
}
} // namespace rocksdb } // namespace rocksdb

@ -46,9 +46,11 @@ class UncompressionDictReader {
assert(table_); assert(table_);
} }
bool cache_dictionary_blocks() const;
static Status ReadUncompressionDictionaryBlock( static Status ReadUncompressionDictionaryBlock(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
const ReadOptions& read_options, GetContext* get_context, const ReadOptions& read_options, bool use_cache, GetContext* get_context,
BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
CachableEntry<BlockContents>* uncompression_dict_block); CachableEntry<BlockContents>* uncompression_dict_block);

Loading…
Cancel
Save