Prevent file prefetch when mmap is enabled. (#6206)

Summary:
Right now, sometimes file prefetching is still on when mmap is enabled. This causes bug of reading wrong data. In this commit, we remove all those possible paths.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6206

Test Plan: make crash_test with compaction_readahead_size, which used to fail. RUn all existing tests.

Differential Revision: D19149429

fbshipit-source-id: 9e18ea8c566e416aac9647bdd05afe596634791b
main
sdong 5 years ago committed by Facebook Github Bot
parent dfb259e48d
commit 02193ce406
  1. 3
      file/file_prefetch_buffer.cc
  2. 35
      table/block_based/block_based_table_reader.cc
  3. 7
      table/block_based/block_based_table_reader.h

@ -24,6 +24,9 @@ namespace rocksdb {
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
uint64_t offset, size_t n, uint64_t offset, size_t n,
bool for_compaction) { bool for_compaction) {
if (!enable_ || reader == nullptr) {
return Status::OK();
}
size_t alignment = reader->file()->GetRequiredBufferAlignment(); size_t alignment = reader->file()->GetRequiredBufferAlignment();
size_t offset_ = static_cast<size_t>(offset); size_t offset_ = static_cast<size_t>(offset);
uint64_t rounddown_offset = Rounddown(offset_, alignment); uint64_t rounddown_offset = Rounddown(offset_, alignment);

@ -499,9 +499,8 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon {
uint64_t last_off = handle.offset() + block_size(handle); uint64_t last_off = handle.offset() + block_size(handle);
uint64_t prefetch_len = last_off - prefetch_off; uint64_t prefetch_len = last_off - prefetch_off;
std::unique_ptr<FilePrefetchBuffer> prefetch_buffer; std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
auto& file = rep->file; rep->CreateFilePrefetchBuffer(0, 0, &prefetch_buffer);
prefetch_buffer.reset(new FilePrefetchBuffer()); s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off,
s = prefetch_buffer->Prefetch(file.get(), prefetch_off,
static_cast<size_t>(prefetch_len)); static_cast<size_t>(prefetch_len));
// After prefetch, read the partitions one by one // After prefetch, read the partitions one by one
@ -1147,8 +1146,14 @@ Status BlockBasedTable::Open(
const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0; const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
const bool preload_all = !table_options.cache_index_and_filter_blocks; const bool preload_all = !table_options.cache_index_and_filter_blocks;
if (!ioptions.allow_mmap_reads) {
s = PrefetchTail(file.get(), file_size, tail_prefetch_stats, prefetch_all, s = PrefetchTail(file.get(), file_size, tail_prefetch_stats, prefetch_all,
preload_all, &prefetch_buffer); preload_all, &prefetch_buffer);
} else {
// Should not prefetch for mmap mode.
prefetch_buffer.reset(new FilePrefetchBuffer(
nullptr, 0, 0, false /* enable */, true /* track_min_offset */));
}
// Read in the following order: // Read in the following order:
// 1. Footer // 1. Footer
@ -1274,10 +1279,12 @@ Status BlockBasedTable::PrefetchTail(
Status s; Status s;
// TODO should not have this special logic in the future. // TODO should not have this special logic in the future.
if (!file->use_direct_io()) { if (!file->use_direct_io()) {
prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, false, true)); prefetch_buffer->reset(new FilePrefetchBuffer(
nullptr, 0, 0, false /* enable */, true /* track_min_offset */));
s = file->Prefetch(prefetch_off, prefetch_len); s = file->Prefetch(prefetch_off, prefetch_len);
} else { } else {
prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, true, true)); prefetch_buffer->reset(new FilePrefetchBuffer(
nullptr, 0, 0, true /* enable */, true /* track_min_offset */));
s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len); s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len);
} }
return s; return s;
@ -3016,23 +3023,23 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
} else if (rep->file->use_direct_io() && !prefetch_buffer_) { } else if (rep->file->use_direct_io() && !prefetch_buffer_) {
// Direct I/O // Direct I/O
// Let FilePrefetchBuffer take care of the readahead. // Let FilePrefetchBuffer take care of the readahead.
prefetch_buffer_.reset(new FilePrefetchBuffer( rep->CreateFilePrefetchBuffer(
rep->file.get(), BlockBasedTable::kInitAutoReadaheadSize, BlockBasedTable::kInitAutoReadaheadSize,
BlockBasedTable::kMaxAutoReadaheadSize)); BlockBasedTable::kMaxAutoReadaheadSize, &prefetch_buffer_);
} }
} }
} else if (!prefetch_buffer_) { } else if (!prefetch_buffer_) {
// Explicit user requested readahead // Explicit user requested readahead
// The actual condition is: // The actual condition is:
// if (read_options_.readahead_size != 0 && !prefetch_buffer_) // if (read_options_.readahead_size != 0 && !prefetch_buffer_)
prefetch_buffer_.reset(new FilePrefetchBuffer( rep->CreateFilePrefetchBuffer(read_options_.readahead_size,
rep->file.get(), read_options_.readahead_size, read_options_.readahead_size,
read_options_.readahead_size)); &prefetch_buffer_);
} }
} else if (!prefetch_buffer_) { } else if (!prefetch_buffer_) {
prefetch_buffer_.reset( rep->CreateFilePrefetchBuffer(compaction_readahead_size_,
new FilePrefetchBuffer(rep->file.get(), compaction_readahead_size_, compaction_readahead_size_,
compaction_readahead_size_)); &prefetch_buffer_);
} }
Status s; Status s;

@ -604,6 +604,13 @@ struct BlockBasedTable::Rep {
uint64_t sst_number_for_tracing() const { uint64_t sst_number_for_tracing() const {
return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX; return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX;
} }
void CreateFilePrefetchBuffer(
size_t readahead_size, size_t max_readahead_size,
std::unique_ptr<FilePrefetchBuffer>* fpb) const {
fpb->reset(new FilePrefetchBuffer(file.get(), readahead_size,
max_readahead_size,
!ioptions.allow_mmap_reads /* enable */));
}
}; };
// Iterates over the contents of BlockBasedTable. // Iterates over the contents of BlockBasedTable.

Loading…
Cancel
Save