diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 5b7cc0cc9..e09e161c9 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1033,10 +1033,11 @@ struct ReadOptions { // Default: true bool verify_checksums; - // Should the "data block"/"index block"/"filter block" read for this - // iteration be cached in memory? + // Should the "data block"/"index block"" read for this iteration be placed in + // block cache? // Callers may wish to set this field to false for bulk scans. - // Default: true + // This would help not to the change eviction order of existing items in the + // block cache. Default: true bool fill_cache; // Specify to create a tailing iterator -- a special iterator that has a diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 5b04cd416..7579a4234 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -208,7 +208,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable { // return a two-level iterator: first level is on the partition index virtual InternalIterator* NewIterator(BlockIter* /*iter*/ = nullptr, - bool /*dont_care*/ = true) override { + bool /*dont_care*/ = true, + bool fill_cache = true) override { // Filters are already checked before seeking the index if (!partition_map_.empty()) { return NewTwoLevelIterator( @@ -216,8 +217,10 @@ class PartitionIndexReader : public IndexReader, public Cleanable { table_, partition_map_.size() ? &partition_map_ : nullptr), index_block_->NewIterator(icomparator_, nullptr, true)); } else { + auto ro = ReadOptions(); + ro.fill_cache = fill_cache; return new BlockBasedTableIterator( - table_, ReadOptions(), *icomparator_, + table_, ro, *icomparator_, index_block_->NewIterator(icomparator_, nullptr, true), false); } // TODO(myabandeh): Update TwoLevelIterator to be able to make use of @@ -364,6 +367,7 @@ class BinarySearchIndexReader : public IndexReader { } virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, + bool /*dont_care*/ = true, bool /*dont_care*/ = true) override { return index_block_->NewIterator(icomparator_, iter, true); } @@ -474,7 +478,8 @@ class HashIndexReader : public IndexReader { } virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, - bool total_order_seek = true) override { + bool total_order_seek = true, + bool /*dont_care*/ = true) override { return index_block_->NewIterator(icomparator_, iter, total_order_seek); } @@ -1364,12 +1369,12 @@ InternalIterator* BlockBasedTable::NewIndexIterator( // index reader has already been pre-populated. if (rep_->index_reader) { return rep_->index_reader->NewIterator( - input_iter, read_options.total_order_seek); + input_iter, read_options.total_order_seek, read_options.fill_cache); } // we have a pinned index block if (rep_->index_entry.IsSet()) { - return rep_->index_entry.value->NewIterator(input_iter, - read_options.total_order_seek); + return rep_->index_entry.value->NewIterator( + input_iter, read_options.total_order_seek, read_options.fill_cache); } PERF_TIMER_GUARD(read_index_block_nanos); diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index f2c5082af..bb1f79774 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -167,7 +167,8 @@ class BlockBasedTable : public TableReader { // a different object then iter and the callee has the ownership of the // returned object. virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, - bool total_order_seek = true) = 0; + bool total_order_seek = true, + bool fill_cache = true) = 0; // The size of the index. virtual size_t size() const = 0;