From 17e04039dda178b19d4b5d6cdce41ebd85fbf50e Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Fri, 20 Apr 2018 15:08:00 -0700 Subject: [PATCH] Propagate fill_cache config to partitioned index iterator Summary: Currently the partitioned index iterator creates a new ReadOptions which ignores the fill_cache config set to ReadOptions passed by the user. The patch propagates fill_cache from the user's ReadOptions to that of partition index iterator. Also it clarifies the contract of fill_cache that i) it does not apply to filters, ii) it still charges block cache for the size of the data block, it still pin the block if it is already in the block cache. Closes https://github.com/facebook/rocksdb/pull/3739 Differential Revision: D7678308 Pulled By: maysamyabandeh fbshipit-source-id: 53ed96424ae922e499e2d4e3580ddc3f0db893da --- include/rocksdb/options.h | 7 ++++--- table/block_based_table_reader.cc | 17 +++++++++++------ table/block_based_table_reader.h | 3 ++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 5b7cc0cc9..e09e161c9 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1033,10 +1033,11 @@ struct ReadOptions { // Default: true bool verify_checksums; - // Should the "data block"/"index block"/"filter block" read for this - // iteration be cached in memory? + // Should the "data block"/"index block"" read for this iteration be placed in + // block cache? // Callers may wish to set this field to false for bulk scans. - // Default: true + // This would help not to the change eviction order of existing items in the + // block cache. Default: true bool fill_cache; // Specify to create a tailing iterator -- a special iterator that has a diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 5b04cd416..7579a4234 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -208,7 +208,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable { // return a two-level iterator: first level is on the partition index virtual InternalIterator* NewIterator(BlockIter* /*iter*/ = nullptr, - bool /*dont_care*/ = true) override { + bool /*dont_care*/ = true, + bool fill_cache = true) override { // Filters are already checked before seeking the index if (!partition_map_.empty()) { return NewTwoLevelIterator( @@ -216,8 +217,10 @@ class PartitionIndexReader : public IndexReader, public Cleanable { table_, partition_map_.size() ? &partition_map_ : nullptr), index_block_->NewIterator(icomparator_, nullptr, true)); } else { + auto ro = ReadOptions(); + ro.fill_cache = fill_cache; return new BlockBasedTableIterator( - table_, ReadOptions(), *icomparator_, + table_, ro, *icomparator_, index_block_->NewIterator(icomparator_, nullptr, true), false); } // TODO(myabandeh): Update TwoLevelIterator to be able to make use of @@ -364,6 +367,7 @@ class BinarySearchIndexReader : public IndexReader { } virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, + bool /*dont_care*/ = true, bool /*dont_care*/ = true) override { return index_block_->NewIterator(icomparator_, iter, true); } @@ -474,7 +478,8 @@ class HashIndexReader : public IndexReader { } virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, - bool total_order_seek = true) override { + bool total_order_seek = true, + bool /*dont_care*/ = true) override { return index_block_->NewIterator(icomparator_, iter, total_order_seek); } @@ -1364,12 +1369,12 @@ InternalIterator* BlockBasedTable::NewIndexIterator( // index reader has already been pre-populated. if (rep_->index_reader) { return rep_->index_reader->NewIterator( - input_iter, read_options.total_order_seek); + input_iter, read_options.total_order_seek, read_options.fill_cache); } // we have a pinned index block if (rep_->index_entry.IsSet()) { - return rep_->index_entry.value->NewIterator(input_iter, - read_options.total_order_seek); + return rep_->index_entry.value->NewIterator( + input_iter, read_options.total_order_seek, read_options.fill_cache); } PERF_TIMER_GUARD(read_index_block_nanos); diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index f2c5082af..bb1f79774 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -167,7 +167,8 @@ class BlockBasedTable : public TableReader { // a different object then iter and the callee has the ownership of the // returned object. virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, - bool total_order_seek = true) = 0; + bool total_order_seek = true, + bool fill_cache = true) = 0; // The size of the index. virtual size_t size() const = 0;