Combine the read-ahead logic for user reads and compaction reads (#5431)

Summary:
Currently the read-ahead logic for user reads and compaction reads go through different code paths where compaction reads create new table readers and use `ReadaheadRandomAccessFile`. This change is to unify read-ahead logic to use read-ahead in BlockBasedTableReader::InitDataBlock(). As a result of the change  `ReadAheadRandomAccessFile` class and `new_table_reader_for_compaction_inputs` option will no longer be used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5431

Test Plan:
make check

Here is the benchmarking - https://gist.github.com/vjnadimpalli/083cf423f7b6aa12dcdb14c858bc18a5

Differential Revision: D15772533

Pulled By: vjnadimpalli

fbshipit-source-id: b71dca710590471ede6fb37553388654e2e479b9
main
Vijay Nadimpalli 6 years ago committed by Facebook Github Bot
parent fe90ed7a70
commit 24b118ad98
  1. 21
      db/db_compaction_test.cc
  2. 62
      db/table_cache.cc
  3. 3
      db/table_cache.h
  4. 2
      include/rocksdb/options.h
  5. 37
      table/block_based/block_based_table_reader.cc
  6. 19
      table/block_based/block_based_table_reader.h
  7. 5
      table/block_fetcher.cc
  8. 8
      table/block_fetcher.h
  9. 3
      table/cuckoo/cuckoo_table_reader.cc
  10. 6
      table/cuckoo/cuckoo_table_reader.h
  11. 11
      table/meta_blocks.cc
  12. 3
      table/mock_table.cc
  13. 3
      table/mock_table.h
  14. 3
      table/plain/plain_table_reader.cc
  15. 6
      table/plain/plain_table_reader.h
  16. 11
      table/table_reader.h
  17. 16
      util/file_reader_writer.cc
  18. 11
      util/file_reader_writer.h

@ -497,14 +497,14 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
// Create new iterator for:
// (1) 1 for verifying flush results
// (2) 3 for compaction input files
// (3) 1 for verifying compaction results.
ASSERT_EQ(num_new_table_reader, 5);
// (2) 1 for verifying compaction results.
// (3) New TableReaders will not be created for compaction inputs
ASSERT_EQ(num_new_table_reader, 2);
num_table_cache_lookup = 0;
num_new_table_reader = 0;
ASSERT_EQ(Key(1), Get(Key(1)));
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3);
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 5);
ASSERT_EQ(num_new_table_reader, 0);
num_table_cache_lookup = 0;
@ -519,13 +519,14 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
// May preload table cache too.
ASSERT_GE(num_table_cache_lookup, 1);
old_num_table_cache_lookup2 = num_table_cache_lookup;
// One for compaction input, one for verifying compaction results.
ASSERT_EQ(num_new_table_reader, 2);
// One for verifying compaction results.
// No new iterator created for compaction.
ASSERT_EQ(num_new_table_reader, 1);
num_table_cache_lookup = 0;
num_new_table_reader = 0;
ASSERT_EQ(Key(1), Get(Key(1)));
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 2);
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3);
ASSERT_EQ(num_new_table_reader, 0);
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
@ -4339,12 +4340,6 @@ TEST_P(DBCompactionDirectIOTest, DirectIO) {
options.env = new MockEnv(Env::Default());
Reopen(options);
bool readahead = false;
SyncPoint::GetInstance()->SetCallBack(
"TableCache::NewIterator:for_compaction", [&](void* arg) {
bool* use_direct_reads = static_cast<bool*>(arg);
ASSERT_EQ(*use_direct_reads,
options.use_direct_reads);
});
SyncPoint::GetInstance()->SetCallBack(
"CompactionJob::OpenCompactionOutputFile", [&](void* arg) {
bool* use_direct_writes = static_cast<bool*>(arg);

@ -16,6 +16,7 @@
#include "monitoring/perf_context_imp.h"
#include "rocksdb/statistics.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/get_context.h"
#include "table/internal_iterator.h"
#include "table/iterator_wrapper.h"
@ -43,13 +44,6 @@ static void UnrefEntry(void* arg1, void* arg2) {
cache->Release(h);
}
static void DeleteTableReader(void* arg1, void* arg2) {
TableReader* table_reader = reinterpret_cast<TableReader*>(arg1);
Statistics* stats = reinterpret_cast<Statistics*>(arg2);
RecordTick(stats, NO_FILE_CLOSES);
delete table_reader;
}
static Slice GetSliceForFileNumber(const uint64_t* file_number) {
return Slice(reinterpret_cast<const char*>(file_number),
sizeof(*file_number));
@ -96,8 +90,8 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
Status TableCache::GetTableReader(
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
bool sequential_mode, size_t readahead, bool record_read_stats,
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
const SliceTransform* prefix_extractor, bool skip_filters, int level,
bool prefetch_index_and_filter_in_cache, bool for_compaction) {
std::string fname =
@ -107,13 +101,6 @@ Status TableCache::GetTableReader(
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.ok()) {
if (readahead > 0 && !env_options.use_mmap_reads) {
// Not compatible with mmap files since ReadaheadRandomAccessFile requires
// its wrapped file's Read() to copy data into the provided scratch
// buffer, which mmap files don't use.
// TODO(ajkr): try madvise for mmap files in place of buffered readahead.
file = NewReadaheadRandomAccessFile(std::move(file), readahead);
}
if (!sequential_mode && ioptions_.advise_random_on_open) {
file->Hint(RandomAccessFile::RANDOM);
}
@ -164,10 +151,9 @@ Status TableCache::FindTable(const EnvOptions& env_options,
}
std::unique_ptr<TableReader> table_reader;
s = GetTableReader(env_options, internal_comparator, fd,
false /* sequential mode */, 0 /* readahead */,
record_read_stats, file_read_hist, &table_reader,
prefix_extractor, skip_filters, level,
prefetch_index_and_filter_in_cache);
false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader, prefix_extractor,
skip_filters, level, prefetch_index_and_filter_in_cache);
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
@ -196,39 +182,13 @@ InternalIterator* TableCache::NewIterator(
PERF_TIMER_GUARD(new_table_iterator_nanos);
Status s;
bool create_new_table_reader = false;
TableReader* table_reader = nullptr;
Cache::Handle* handle = nullptr;
if (table_reader_ptr != nullptr) {
*table_reader_ptr = nullptr;
}
size_t readahead = 0;
if (for_compaction) {
#ifndef NDEBUG
bool use_direct_reads_for_compaction = env_options.use_direct_reads;
TEST_SYNC_POINT_CALLBACK("TableCache::NewIterator:for_compaction",
&use_direct_reads_for_compaction);
#endif // !NDEBUG
if (ioptions_.new_table_reader_for_compaction_inputs) {
// get compaction_readahead_size from env_options allows us to set the
// value dynamically
readahead = env_options.compaction_readahead_size;
create_new_table_reader = true;
}
}
auto& fd = file_meta.fd;
if (create_new_table_reader) {
std::unique_ptr<TableReader> table_reader_unique_ptr;
s = GetTableReader(
env_options, icomparator, fd, true /* sequential_mode */, readahead,
!for_compaction /* record stats */, nullptr, &table_reader_unique_ptr,
prefix_extractor, false /* skip_filters */, level,
true /* prefetch_index_and_filter_in_cache */, for_compaction);
if (s.ok()) {
table_reader = table_reader_unique_ptr.release();
}
} else {
table_reader = fd.table_reader;
if (table_reader == nullptr) {
s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor,
@ -239,7 +199,6 @@ InternalIterator* TableCache::NewIterator(
table_reader = GetTableReaderFromHandle(handle);
}
}
}
InternalIterator* result = nullptr;
if (s.ok()) {
if (options.table_filter &&
@ -247,13 +206,10 @@ InternalIterator* TableCache::NewIterator(
result = NewEmptyInternalIterator<Slice>(arena);
} else {
result = table_reader->NewIterator(options, prefix_extractor, arena,
skip_filters, for_compaction);
skip_filters, for_compaction,
env_options.compaction_readahead_size);
}
if (create_new_table_reader) {
assert(handle == nullptr);
result->RegisterCleanup(&DeleteTableReader, table_reader,
ioptions_.statistics);
} else if (handle != nullptr) {
if (handle != nullptr) {
result->RegisterCleanup(&UnrefEntry, cache_, handle);
handle = nullptr; // prevent from releasing below
}

@ -177,8 +177,7 @@ class TableCache {
Status GetTableReader(const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, bool sequential_mode,
size_t readahead, bool record_read_stats,
HistogramImpl* file_read_hist,
bool record_read_stats, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
const SliceTransform* prefix_extractor = nullptr,
bool skip_filters = false, int level = -1,

@ -760,6 +760,8 @@ struct DBOptions {
// for this mode if using block-based table.
//
// Default: false
// This flag has no affect on the behavior of compaction and plan to delete
// in the future.
bool new_table_reader_for_compaction_inputs = false;
// If non-zero, we perform bigger reads when doing compaction. If you're

@ -83,12 +83,13 @@ Status ReadBlockFromFile(
bool do_uncompress, bool maybe_compressed, BlockType block_type,
const UncompressionDict& uncompression_dict,
const PersistentCacheOptions& cache_options, SequenceNumber global_seqno,
size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator) {
size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator,
bool for_compaction = false) {
BlockContents contents;
BlockFetcher block_fetcher(file, prefetch_buffer, footer, options, handle,
&contents, ioptions, do_uncompress,
maybe_compressed, block_type, uncompression_dict,
cache_options, memory_allocator);
BlockFetcher block_fetcher(
file, prefetch_buffer, footer, options, handle, &contents, ioptions,
do_uncompress, maybe_compressed, block_type, uncompression_dict,
cache_options, memory_allocator, nullptr, for_compaction);
Status s = block_fetcher.ReadBlockContents();
if (s.ok()) {
result->reset(new Block(std::move(contents), global_seqno,
@ -2075,7 +2076,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
BlockType block_type, bool key_includes_seq, bool index_key_is_full,
GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s,
FilePrefetchBuffer* prefetch_buffer) const {
FilePrefetchBuffer* prefetch_buffer, bool for_compaction) const {
PERF_TIMER_GUARD(new_table_block_iter_nanos);
TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
@ -2094,7 +2095,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
CachableEntry<Block> block;
s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block,
block_type, get_context, lookup_context);
block_type, get_context, lookup_context, for_compaction);
if (!s.ok()) {
assert(block.IsEmpty());
@ -2144,6 +2145,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
s = block_cache->Insert(unique_key, nullptr,
block.GetValue()->ApproximateMemoryUsage(),
nullptr, &cache_handle);
if (s.ok()) {
assert(cache_handle != nullptr);
iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache,
@ -2297,7 +2299,8 @@ Status BlockBasedTable::RetrieveBlock(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction) const {
assert(block_entry);
assert(block_entry->IsEmpty());
@ -2340,7 +2343,7 @@ Status BlockBasedTable::RetrieveBlock(
block_type == BlockType::kData
? rep_->table_options.read_amp_bytes_per_bit
: 0,
GetMemoryAllocator(rep_->table_options));
GetMemoryAllocator(rep_->table_options), for_compaction);
}
if (!s.ok()) {
@ -2714,13 +2717,18 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
rep->file.get(), read_options_.readahead_size,
read_options_.readahead_size));
}
} else if (!prefetch_buffer_) {
prefetch_buffer_.reset(
new FilePrefetchBuffer(rep->file.get(), compaction_readahead_size_,
compaction_readahead_size_));
}
Status s;
table_->NewDataBlockIterator<TBlockIter>(
read_options_, data_block_handle, &block_iter_, block_type_,
key_includes_seq_, index_key_is_full_,
/*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get());
/*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get(),
for_compaction_);
block_iter_points_to_real_block_ = true;
}
}
@ -2806,7 +2814,8 @@ void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
InternalIterator* BlockBasedTable::NewIterator(
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
Arena* arena, bool skip_filters, bool for_compaction) {
Arena* arena, bool skip_filters, bool for_compaction,
size_t compaction_readahead_size) {
BlockCacheLookupContext lookup_context{
for_compaction ? BlockCacheLookupCaller::kCompaction
: BlockCacheLookupCaller::kUserIterator};
@ -2823,7 +2832,8 @@ InternalIterator* BlockBasedTable::NewIterator(
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, BlockType::kData,
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
compaction_readahead_size);
} else {
auto* mem =
arena->AllocateAligned(sizeof(BlockBasedTableIterator<DataBlockIter>));
@ -2835,7 +2845,8 @@ InternalIterator* BlockBasedTable::NewIterator(
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, BlockType::kData,
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
compaction_readahead_size);
}
}

@ -123,6 +123,8 @@ class BlockBasedTable : public TableReader {
// The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it).
// @param skip_filters Disables loading/accessing the filter block
// compaction_readahead_size: its value will only be used if for_compaction =
// true
InternalIterator* NewIterator(
const ReadOptions&, const SliceTransform* prefix_extractor,
Arena* arena = nullptr, bool skip_filters = false,
@ -131,7 +133,8 @@ class BlockBasedTable : public TableReader {
// i.e., it will populate the block cache with blocks in the new SST
// files. We treat those as a user is calling iterator for now. We should
// differentiate the callers.
bool for_compaction = false) override;
bool for_compaction = false,
size_t compaction_readahead_size = 0) override;
FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
const ReadOptions& read_options) override;
@ -234,7 +237,7 @@ class BlockBasedTable : public TableReader {
TBlockIter* input_iter, BlockType block_type, bool key_includes_seq,
bool index_key_is_full, GetContext* get_context,
BlockCacheLookupContext* lookup_context, Status s,
FilePrefetchBuffer* prefetch_buffer) const;
FilePrefetchBuffer* prefetch_buffer, bool for_compaction = false) const;
class PartitionedIndexIteratorState;
@ -283,7 +286,8 @@ class BlockBasedTable : public TableReader {
const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
BlockCacheLookupContext* lookup_context,
bool for_compaction = false) const;
// For the following two functions:
// if `no_io == true`, we will not try to read filter/index from sst file
@ -596,6 +600,8 @@ struct BlockBasedTable::Rep {
// Iterates over the contents of BlockBasedTable.
template <class TBlockIter, typename TValue = Slice>
class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
// compaction_readahead_size: its value will only be used if for_compaction =
// true
public:
BlockBasedTableIterator(const BlockBasedTable* table,
const ReadOptions& read_options,
@ -605,7 +611,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
const SliceTransform* prefix_extractor,
BlockType block_type, bool key_includes_seq = true,
bool index_key_is_full = true,
bool for_compaction = false)
bool for_compaction = false,
size_t compaction_readahead_size = 0)
: InternalIteratorBase<TValue>(false),
table_(table),
read_options_(read_options),
@ -621,6 +628,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
key_includes_seq_(key_includes_seq),
index_key_is_full_(index_key_is_full),
for_compaction_(for_compaction),
compaction_readahead_size_(compaction_readahead_size),
lookup_context_(for_compaction
? BlockCacheLookupCaller::kCompaction
: BlockCacheLookupCaller::kUserIterator) {}
@ -734,6 +742,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
bool index_key_is_full_;
// If this iterator is created for compaction
bool for_compaction_;
// Readahead size used in compaction, its value is used only if
// for_compaction_ = true
size_t compaction_readahead_size_;
BlockHandle prev_index_value_;
BlockCacheLookupContext lookup_context_;

@ -93,7 +93,8 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
if (prefetch_buffer_ != nullptr &&
prefetch_buffer_->TryReadFromCache(
handle_.offset(),
static_cast<size_t>(handle_.size()) + kBlockTrailerSize, &slice_)) {
static_cast<size_t>(handle_.size()) + kBlockTrailerSize, &slice_,
for_compaction_)) {
block_size_ = static_cast<size_t>(handle_.size());
CheckBlockChecksum();
if (!status_.ok()) {
@ -217,7 +218,7 @@ Status BlockFetcher::ReadBlockContents() {
PERF_TIMER_GUARD(block_read_time);
// Actual file read
status_ = file_->Read(handle_.offset(), block_size_ + kBlockTrailerSize,
&slice_, used_buf_);
&slice_, used_buf_, for_compaction_);
}
PERF_COUNTER_ADD(block_read_count, 1);

@ -44,7 +44,8 @@ class BlockFetcher {
const UncompressionDict& uncompression_dict,
const PersistentCacheOptions& cache_options,
MemoryAllocator* memory_allocator = nullptr,
MemoryAllocator* memory_allocator_compressed = nullptr)
MemoryAllocator* memory_allocator_compressed = nullptr,
bool for_compaction = false)
: file_(file),
prefetch_buffer_(prefetch_buffer),
footer_(footer),
@ -58,7 +59,9 @@ class BlockFetcher {
uncompression_dict_(uncompression_dict),
cache_options_(cache_options),
memory_allocator_(memory_allocator),
memory_allocator_compressed_(memory_allocator_compressed) {}
memory_allocator_compressed_(memory_allocator_compressed),
for_compaction_(for_compaction) {}
Status ReadBlockContents();
CompressionType get_compression_type() const { return compression_type_; }
@ -88,6 +91,7 @@ class BlockFetcher {
char stack_buf_[kDefaultStackBufferSize];
bool got_from_prefetch_buffer_ = false;
rocksdb::CompressionType compression_type_;
bool for_compaction_ = false;
// return true if found
bool TryGetUncompressBlockFromPersistentCache();

@ -377,7 +377,8 @@ Slice CuckooTableIterator::value() const {
InternalIterator* CuckooTableReader::NewIterator(
const ReadOptions& /*read_options*/,
const SliceTransform* /* prefix_extractor */, Arena* arena,
bool /*skip_filters*/, bool /*for_compaction*/) {
bool /*skip_filters*/, bool /*for_compaction*/,
size_t /*compaction_readahead_size*/) {
if (!status().ok()) {
return NewErrorInternalIterator<Slice>(
Status::Corruption("CuckooTableReader status is not okay."), arena);

@ -45,11 +45,15 @@ class CuckooTableReader: public TableReader {
GetContext* get_context, const SliceTransform* prefix_extractor,
bool skip_filters = false) override;
// Returns a new iterator over table contents
// compaction_readahead_size: its value will only be used if for_compaction =
// true
InternalIterator* NewIterator(const ReadOptions&,
const SliceTransform* prefix_extractor,
Arena* arena = nullptr,
bool skip_filters = false,
bool for_compaction = false) override;
bool for_compaction = false,
size_t compaction_readahead_size = 0) override;
void Prepare(const Slice& target) override;
// Report an approximation of how much memory has been used.

@ -487,12 +487,11 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
read_options.verify_checksums = false;
PersistentCacheOptions cache_options;
BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options,
metaindex_handle, &metaindex_contents, ioptions,
false /* decompress */, false /*maybe_compressed*/,
BlockType::kMetaIndex,
UncompressionDict::GetEmptyDict(), cache_options,
memory_allocator);
BlockFetcher block_fetcher(
file, prefetch_buffer, footer, read_options, metaindex_handle,
&metaindex_contents, ioptions, false /* decompress */,
false /*maybe_compressed*/, BlockType::kMetaIndex,
UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
status = block_fetcher.ReadBlockContents();
if (!status.ok()) {
return status;

@ -34,7 +34,8 @@ stl_wrappers::KVMap MakeMockFile(
InternalIterator* MockTableReader::NewIterator(
const ReadOptions&, const SliceTransform* /* prefix_extractor */,
Arena* /*arena*/, bool /*skip_filters*/, bool /*for_compaction*/) {
Arena* /*arena*/, bool /*skip_filters*/, bool /*for_compaction*/,
size_t /*compaction_readahead_size*/) {
return new MockTableIterator(table_);
}

@ -44,7 +44,8 @@ class MockTableReader : public TableReader {
const SliceTransform* prefix_extractor,
Arena* arena = nullptr,
bool skip_filters = false,
bool for_compaction = false) override;
bool for_compaction = false,
size_t compaction_readahead_size = 0) override;
Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context, const SliceTransform* prefix_extractor,

@ -196,7 +196,8 @@ void PlainTableReader::SetupForCompaction() {
InternalIterator* PlainTableReader::NewIterator(
const ReadOptions& options, const SliceTransform* /* prefix_extractor */,
Arena* arena, bool /*skip_filters*/, bool /*for_compaction*/) {
Arena* arena, bool /*skip_filters*/, bool /*for_compaction*/,
size_t /*compaction_readahead_size*/) {
bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek;
if (arena == nullptr) {
return new PlainTableIterator(this, use_prefix_seek);

@ -77,11 +77,15 @@ class PlainTableReader: public TableReader {
bool full_scan_mode, const bool immortal_table = false,
const SliceTransform* prefix_extractor = nullptr);
// Returns new iterator over table contents
// compaction_readahead_size: its value will only be used if for_compaction =
// true
InternalIterator* NewIterator(const ReadOptions&,
const SliceTransform* prefix_extractor,
Arena* arena = nullptr,
bool skip_filters = false,
bool for_compaction = false) override;
bool for_compaction = false,
size_t compaction_readahead_size = 0) override;
void Prepare(const Slice& target) override;

@ -44,11 +44,12 @@ class TableReader {
// all the states but those allocated in arena.
// skip_filters: disables checking the bloom filters even if they exist. This
// option is effective only for block-based table format.
virtual InternalIterator* NewIterator(const ReadOptions&,
const SliceTransform* prefix_extractor,
Arena* arena = nullptr,
bool skip_filters = false,
bool for_compaction = false) = 0;
// compaction_readahead_size: its value will only be used if for_compaction =
// true
virtual InternalIterator* NewIterator(
const ReadOptions&, const SliceTransform* prefix_extractor,
Arena* arena = nullptr, bool skip_filters = false,
bool for_compaction = false, size_t compaction_readahead_size = 0) = 0;
virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
const ReadOptions& /*read_options*/) {

@ -70,7 +70,7 @@ Status SequentialFileReader::Skip(uint64_t n) {
}
Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
char* scratch, bool for_compaction) const {
Status s;
uint64_t elapsed = 0;
{
@ -90,7 +90,7 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
buf.AllocateNewBuffer(read_size);
while (buf.CurrentSize() < read_size) {
size_t allowed;
if (for_compaction_ && rate_limiter_ != nullptr) {
if (for_compaction && rate_limiter_ != nullptr) {
allowed = rate_limiter_->RequestToken(
buf.Capacity() - buf.CurrentSize(), buf.Alignment(),
Env::IOPriority::IO_LOW, stats_, RateLimiter::OpType::kRead);
@ -134,7 +134,7 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
const char* res_scratch = nullptr;
while (pos < n) {
size_t allowed;
if (for_compaction_ && rate_limiter_ != nullptr) {
if (for_compaction && rate_limiter_ != nullptr) {
if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
sw.DelayStart();
}
@ -711,7 +711,8 @@ private:
} // namespace
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
uint64_t offset, size_t n) {
uint64_t offset, size_t n,
bool for_compaction) {
size_t alignment = reader->file()->GetRequiredBufferAlignment();
size_t offset_ = static_cast<size_t>(offset);
uint64_t rounddown_offset = Rounddown(offset_, alignment);
@ -771,7 +772,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
Slice result;
s = reader->Read(rounddown_offset + chunk_len,
static_cast<size_t>(roundup_len - chunk_len), &result,
buffer_.BufferStart() + chunk_len);
buffer_.BufferStart() + chunk_len, for_compaction);
if (s.ok()) {
buffer_offset_ = rounddown_offset;
buffer_.Size(static_cast<size_t>(chunk_len) + result.size());
@ -780,7 +781,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
}
bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
Slice* result) {
Slice* result, bool for_compaction) {
if (track_min_offset_ && offset < min_offset_read_) {
min_offset_read_ = static_cast<size_t>(offset);
}
@ -797,7 +798,8 @@ bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
assert(file_reader_ != nullptr);
assert(max_readahead_size_ >= readahead_size_);
Status s = Prefetch(file_reader_, offset, n + readahead_size_);
Status s =
Prefetch(file_reader_, offset, n + readahead_size_, for_compaction);
if (!s.ok()) {
return false;
}

@ -158,7 +158,8 @@ class RandomAccessFileReader {
RandomAccessFileReader(const RandomAccessFileReader&) = delete;
RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch,
bool for_compaction = false) const;
Status Prefetch(uint64_t offset, size_t n) const {
return file_->Prefetch(offset, n);
@ -343,7 +344,9 @@ class FilePrefetchBuffer {
// reader : the file reader.
// offset : the file offset to start reading from.
// n : the number of bytes to read.
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n);
// for_compaction : if prefetch is done for compaction read.
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n,
bool for_compaction = false);
// Tries returning the data for a file raed from this buffer, if that data is
// in the buffer.
@ -354,7 +357,9 @@ class FilePrefetchBuffer {
// offset : the file offset.
// n : the number of bytes.
// result : output buffer to put the data into.
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result);
// for_compaction : if cache read is done for compaction read.
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result,
bool for_compaction = false);
// The minimum `offset` ever passed to TryReadFromCache(). This will nly be
// tracked if track_min_offset = true.

Loading…
Cancel
Save