From 4bf169f07ec858c97318319d79777186a621f490 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 8 May 2018 12:03:29 -0700 Subject: [PATCH] Disable readahead when using mmap for reads Summary: `ReadaheadRandomAccessFile` had an unwritten assumption, which was that its wrapped file's `Read()` function always copies into the provided scratch buffer. Actually this was not true when the wrapped file was `PosixMmapReadableFile`, whose `Read()` implementation does no copying and instead returns a `Slice` pointing directly into the `mmap`'d memory region. This PR: - prevents `ReadaheadRandomAccessFile` from ever wrapping mmap readable files - adds an assert for the assumption `ReadaheadRandomAccessFile` makes about the wrapped file's use of scratch buffer Closes https://github.com/facebook/rocksdb/pull/3813 Differential Revision: D7891513 Pulled By: ajkr fbshipit-source-id: dc64a55222d6af280c39a1852ee39e9e9d7cde7d --- db/table_cache.cc | 6 +++++- util/file_reader_writer.cc | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/db/table_cache.cc b/db/table_cache.cc index 852d048a3..2e7c58957 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -98,7 +98,11 @@ Status TableCache::GetTableReader( RecordTick(ioptions_.statistics, NO_FILE_OPENS); if (s.ok()) { - if (readahead > 0) { + if (readahead > 0 && !env_options.use_mmap_reads) { + // Not compatible with mmap files since ReadaheadRandomAccessFile requires + // its wrapped file's Read() to copy data into the provided scratch + // buffer, which mmap files don't use. + // TODO(ajkr): try madvise for mmap files in place of buffered readahead. file = NewReadaheadRandomAccessFile(std::move(file), readahead); } if (!sequential_mode && ioptions_.advise_random_on_open) { diff --git a/util/file_reader_writer.cc b/util/file_reader_writer.cc index 9d4298b1e..aa8ca30b0 100644 --- a/util/file_reader_writer.cc +++ b/util/file_reader_writer.cc @@ -622,6 +622,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile { if (s.ok()) { buffer_offset_ = offset; buffer_len_ = result.size(); + assert(buffer_.BufferStart() == result.data()); } return s; }