Remove memcpy from RandomAccessFileReader::Read in direct IO mode (#6455)

Summary:
In direct IO mode, RandomAccessFileReader::Read allocates an internal aligned buffer, and then copies the result into the scratch buffer. If the result is only temporarily used inside a function, there is no need to do the memcpy and just let the result Slice refer to the internally allocated buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6455

Test Plan: make check

Differential Revision: D20106753

Pulled By: cheng-chang

fbshipit-source-id: 44f505843837bba47a56e3fa2c4dd3bd76486b58
main
Cheng Chang 5 years ago committed by Facebook Github Bot
parent f6c2777d95
commit 0a0151fb99
  1. 2
      file/file_prefetch_buffer.cc
  2. 14
      file/random_access_file_reader.cc
  3. 11
      file/random_access_file_reader.h
  4. 2
      table/block_fetcher.cc
  5. 2
      table/cuckoo/cuckoo_table_builder_test.cc
  6. 3
      table/cuckoo/cuckoo_table_reader.cc
  7. 11
      table/format.cc
  8. 2
      table/mock_table.cc
  9. 2
      table/plain/plain_table_key_coding.cc
  10. 3
      table/plain/plain_table_reader.cc
  11. 6
      table/table_test.cc
  12. 7
      util/aligned_buffer.h
  13. 13
      utilities/blob_db/blob_db_impl.cc
  14. 2
      utilities/blob_db/blob_dump_tool.cc
  15. 36
      utilities/blob_db/blob_file.cc
  16. 3
      utilities/blob_db/blob_log_reader.cc
  17. 2
      utilities/persistent_cache/block_cache_tier_file.cc
  18. 5
      utilities/trace/file_trace_reader_writer.cc

@ -88,7 +88,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
Slice result; Slice result;
s = reader->Read(rounddown_offset + chunk_len, s = reader->Read(rounddown_offset + chunk_len,
static_cast<size_t>(roundup_len - chunk_len), &result, static_cast<size_t>(roundup_len - chunk_len), &result,
buffer_.BufferStart() + chunk_len, for_compaction); buffer_.BufferStart() + chunk_len, nullptr, for_compaction);
if (s.ok()) { if (s.ok()) {
buffer_offset_ = rounddown_offset; buffer_offset_ = rounddown_offset;
buffer_.Size(static_cast<size_t>(chunk_len) + result.size()); buffer_.Size(static_cast<size_t>(chunk_len) + result.size());

@ -21,7 +21,10 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result, Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
char* scratch, bool for_compaction) const { char* scratch,
std::unique_ptr<const char[]>* internal_buf,
bool for_compaction) const {
(void) internal_buf;
Status s; Status s;
uint64_t elapsed = 0; uint64_t elapsed = 0;
{ {
@ -77,8 +80,13 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
} }
size_t res_len = 0; size_t res_len = 0;
if (s.ok() && offset_advance < buf.CurrentSize()) { if (s.ok() && offset_advance < buf.CurrentSize()) {
res_len = buf.Read(scratch, offset_advance, res_len = std::min(buf.CurrentSize() - offset_advance, n);
std::min(buf.CurrentSize() - offset_advance, n)); if (internal_buf == nullptr) {
buf.Read(scratch, offset_advance, res_len);
} else {
scratch = buf.BufferStart();
internal_buf->reset(buf.Release());
}
} }
*result = Slice(scratch, res_len); *result = Slice(scratch, res_len);
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE

@ -102,7 +102,18 @@ class RandomAccessFileReader {
RandomAccessFileReader(const RandomAccessFileReader&) = delete; RandomAccessFileReader(const RandomAccessFileReader&) = delete;
RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete; RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;
// In non-direct IO mode,
// 1. if using mmap, result is stored in a buffer other than scratch;
// 2. if not using mmap, result is stored in the buffer starting from scratch.
//
// In direct IO mode, an internal aligned buffer is allocated.
// 1. If internal_buf is null, then results are copied to the buffer
// starting from scratch;
// 2. Otherwise, scratch is not used and can be null, the internal_buf owns
// the internally allocated buffer on return, and the result refers to a
// region in internal_buf.
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch, Status Read(uint64_t offset, size_t n, Slice* result, char* scratch,
std::unique_ptr<const char[]>* internal_buf,
bool for_compaction = false) const; bool for_compaction = false) const;
Status MultiRead(FSReadRequest* reqs, size_t num_reqs) const; Status MultiRead(FSReadRequest* reqs, size_t num_reqs) const;

@ -216,7 +216,7 @@ Status BlockFetcher::ReadBlockContents() {
PERF_TIMER_GUARD(block_read_time); PERF_TIMER_GUARD(block_read_time);
// Actual file read // Actual file read
status_ = file_->Read(handle_.offset(), block_size_ + kBlockTrailerSize, status_ = file_->Read(handle_.offset(), block_size_ + kBlockTrailerSize,
&slice_, used_buf_, for_compaction_); &slice_, used_buf_, nullptr, for_compaction_);
} }
PERF_COUNTER_ADD(block_read_count, 1); PERF_COUNTER_ADD(block_read_count, 1);

@ -114,7 +114,7 @@ class CuckooBuilderTest : public testing::Test {
for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) { for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) {
Slice read_slice; Slice read_slice;
ASSERT_OK(file_reader->Read(i * bucket_size, bucket_size, &read_slice, ASSERT_OK(file_reader->Read(i * bucket_size, bucket_size, &read_slice,
nullptr)); nullptr, nullptr));
size_t key_idx = size_t key_idx =
std::find(expected_locations.begin(), expected_locations.end(), i) - std::find(expected_locations.begin(), expected_locations.end(), i) -
expected_locations.begin(); expected_locations.begin();

@ -137,7 +137,8 @@ CuckooTableReader::CuckooTableReader(
cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>( cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
cuckoo_block_size->second.data()); cuckoo_block_size->second.data());
cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
status_ = file_->Read(0, static_cast<size_t>(file_size), &file_data_, nullptr); status_ = file_->Read(0, static_cast<size_t>(file_size), &file_data_, nullptr,
nullptr);
} }
Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,

@ -292,7 +292,8 @@ Status ReadFooterFromFile(RandomAccessFileReader* file,
file->file_name()); file->file_name());
} }
char footer_space[Footer::kMaxEncodedLength]; std::string footer_buf;
std::unique_ptr<const char[]> internal_buf;
Slice footer_input; Slice footer_input;
size_t read_offset = size_t read_offset =
(file_size > Footer::kMaxEncodedLength) (file_size > Footer::kMaxEncodedLength)
@ -302,8 +303,14 @@ Status ReadFooterFromFile(RandomAccessFileReader* file,
if (prefetch_buffer == nullptr || if (prefetch_buffer == nullptr ||
!prefetch_buffer->TryReadFromCache(read_offset, Footer::kMaxEncodedLength, !prefetch_buffer->TryReadFromCache(read_offset, Footer::kMaxEncodedLength,
&footer_input)) { &footer_input)) {
if (file->use_direct_io()) {
s = file->Read(read_offset, Footer::kMaxEncodedLength, &footer_input, s = file->Read(read_offset, Footer::kMaxEncodedLength, &footer_input,
footer_space); nullptr, &internal_buf);
} else {
footer_buf.reserve(Footer::kMaxEncodedLength);
s = file->Read(read_offset, Footer::kMaxEncodedLength, &footer_input,
&footer_buf[0], nullptr);
}
if (!s.ok()) return s; if (!s.ok()) return s;
} }

@ -114,7 +114,7 @@ uint32_t MockTableFactory::GetAndWriteNextID(WritableFileWriter* file) const {
uint32_t MockTableFactory::GetIDFromFile(RandomAccessFileReader* file) const { uint32_t MockTableFactory::GetIDFromFile(RandomAccessFileReader* file) const {
char buf[4]; char buf[4];
Slice result; Slice result;
file->Read(0, 4, &result, buf); file->Read(0, 4, &result, buf, nullptr);
assert(result.size() == 4); assert(result.size() == 4);
return DecodeFixed32(buf); return DecodeFixed32(buf);
} }

@ -208,7 +208,7 @@ bool PlainTableFileReader::ReadNonMmap(uint32_t file_offset, uint32_t len,
} }
Slice read_result; Slice read_result;
Status s = file_info_->file->Read(file_offset, size_to_read, &read_result, Status s = file_info_->file->Read(file_offset, size_to_read, &read_result,
new_buffer->buf.get()); new_buffer->buf.get(), nullptr);
if (!s.ok()) { if (!s.ok()) {
status_ = s; status_ = s;
return false; return false;

@ -288,7 +288,8 @@ void PlainTableReader::FillBloom(const std::vector<uint32_t>& prefix_hashes) {
Status PlainTableReader::MmapDataIfNeeded() { Status PlainTableReader::MmapDataIfNeeded() {
if (file_info_.is_mmap_mode) { if (file_info_.is_mmap_mode) {
// Get mmapped memory. // Get mmapped memory.
return file_info_.file->Read(0, static_cast<size_t>(file_size_), &file_info_.file_data, nullptr); return file_info_.file->Read(0, static_cast<size_t>(file_size_),
&file_info_.file_data, nullptr, nullptr);
} }
return Status::OK(); return Status::OK();
} }

@ -1251,7 +1251,8 @@ class FileChecksumTestHelper {
std::string tmp_checksum; std::string tmp_checksum;
bool first_read = true; bool first_read = true;
Status s; Status s;
s = file_reader_->Read(offset, 2048, &result, scratch.get(), false); s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1264,7 +1265,8 @@ class FileChecksumTestHelper {
result.size()); result.size());
} }
offset += static_cast<uint64_t>(result.size()); offset += static_cast<uint64_t>(result.size());
s = file_reader_->Read(offset, 2048, &result, scratch.get(), false); s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

@ -116,6 +116,13 @@ public:
cursize_ = 0; cursize_ = 0;
} }
char* Release() {
cursize_ = 0;
capacity_ = 0;
bufstart_ = nullptr;
return buf_.release();
}
void Alignment(size_t alignment) { void Alignment(size_t alignment) {
assert(alignment > 0); assert(alignment > 0);
assert((alignment & (alignment - 1)) == 0); assert((alignment & (alignment - 1)) == 0);

@ -1482,15 +1482,22 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number,
const uint64_t record_size = sizeof(uint32_t) + key.size() + size; const uint64_t record_size = sizeof(uint32_t) + key.size() + size;
// Allocate the buffer. This is safe in C++11 // Allocate the buffer. This is safe in C++11
std::string buffer_str(static_cast<size_t>(record_size), static_cast<char>(0)); std::string buf;
char* buffer = &buffer_str[0]; std::unique_ptr<const char[]> internal_buf;
// A partial blob record contain checksum, key and value. // A partial blob record contain checksum, key and value.
Slice blob_record; Slice blob_record;
{ {
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
s = reader->Read(record_offset, static_cast<size_t>(record_size), &blob_record, buffer); if (reader->use_direct_io()) {
s = reader->Read(record_offset, static_cast<size_t>(record_size),
&blob_record, nullptr, &internal_buf);
} else {
buf.reserve(static_cast<size_t>(record_size));
s = reader->Read(record_offset, static_cast<size_t>(record_size),
&blob_record, &buf[0], nullptr);
}
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size()); RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size());
} }

@ -101,7 +101,7 @@ Status BlobDumpTool::Read(uint64_t offset, size_t size, Slice* result) {
} }
buffer_.reset(new char[buffer_size_]); buffer_.reset(new char[buffer_size_]);
} }
Status s = reader_->Read(offset, size, result, buffer_.get()); Status s = reader_->Read(offset, size, result, buffer_.get(), nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

@ -138,9 +138,17 @@ Status BlobFile::ReadFooter(BlobLogFooter* bf) {
assert(ra_file_reader_); assert(ra_file_reader_);
Slice result; Slice result;
char scratch[BlobLogFooter::kSize + 10]; std::string buf;
Status s = ra_file_reader_->Read(footer_offset, BlobLogFooter::kSize, &result, std::unique_ptr<const char[]> internal_buf;
scratch); Status s;
if (ra_file_reader_->use_direct_io()) {
s = ra_file_reader_->Read(footer_offset, BlobLogFooter::kSize, &result,
nullptr, &internal_buf);
} else {
buf.reserve(BlobLogFooter::kSize + 10);
s = ra_file_reader_->Read(footer_offset, BlobLogFooter::kSize, &result,
&buf[0], nullptr);
}
if (!s.ok()) return s; if (!s.ok()) return s;
if (result.size() != BlobLogFooter::kSize) { if (result.size() != BlobLogFooter::kSize) {
// should not happen // should not happen
@ -254,9 +262,17 @@ Status BlobFile::ReadMetadata(Env* env, const EnvOptions& env_options) {
PathName())); PathName()));
// Read file header. // Read file header.
char header_buf[BlobLogHeader::kSize]; std::string header_buf;
std::unique_ptr<const char[]> internal_buf;
Slice header_slice; Slice header_slice;
s = file_reader->Read(0, BlobLogHeader::kSize, &header_slice, header_buf); if (file_reader->use_direct_io()) {
s = file_reader->Read(0, BlobLogHeader::kSize, &header_slice, nullptr,
&internal_buf);
} else {
header_buf.reserve(BlobLogHeader::kSize);
s = file_reader->Read(0, BlobLogHeader::kSize, &header_slice,
&header_buf[0], nullptr);
}
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(info_log_, ROCKS_LOG_ERROR(info_log_,
"Failed to read header of blob file %" PRIu64 "Failed to read header of blob file %" PRIu64
@ -287,10 +303,16 @@ Status BlobFile::ReadMetadata(Env* env, const EnvOptions& env_options) {
assert(!footer_valid_); assert(!footer_valid_);
return Status::OK(); return Status::OK();
} }
char footer_buf[BlobLogFooter::kSize]; std::string footer_buf;
Slice footer_slice; Slice footer_slice;
if (file_reader->use_direct_io()) {
s = file_reader->Read(file_size - BlobLogFooter::kSize, BlobLogFooter::kSize,
&footer_slice, nullptr, &internal_buf);
} else {
footer_buf.reserve(BlobLogFooter::kSize);
s = file_reader->Read(file_size - BlobLogFooter::kSize, BlobLogFooter::kSize, s = file_reader->Read(file_size - BlobLogFooter::kSize, BlobLogFooter::kSize,
&footer_slice, footer_buf); &footer_slice, &footer_buf[0], nullptr);
}
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(info_log_, ROCKS_LOG_ERROR(info_log_,
"Failed to read footer of blob file %" PRIu64 "Failed to read footer of blob file %" PRIu64

@ -26,7 +26,8 @@ Reader::Reader(std::unique_ptr<RandomAccessFileReader>&& file_reader, Env* env,
Status Reader::ReadSlice(uint64_t size, Slice* slice, char* buf) { Status Reader::ReadSlice(uint64_t size, Slice* slice, char* buf) {
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
Status s = file_->Read(next_byte_, static_cast<size_t>(size), slice, buf); Status s =
file_->Read(next_byte_, static_cast<size_t>(size), slice, buf, nullptr);
next_byte_ += size; next_byte_ += size;
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -235,7 +235,7 @@ bool RandomAccessCacheFile::Read(const LBA& lba, Slice* key, Slice* val,
} }
Slice result; Slice result;
Status s = freader_->Read(lba.off_, lba.size_, &result, scratch); Status s = freader_->Read(lba.off_, lba.size_, &result, scratch, nullptr);
if (!s.ok()) { if (!s.ok()) {
Error(log_, "Error reading from file %s. %s", Path().c_str(), Error(log_, "Error reading from file %s. %s", Path().c_str(),
s.ToString().c_str()); s.ToString().c_str());

@ -33,7 +33,8 @@ Status FileTraceReader::Close() {
Status FileTraceReader::Read(std::string* data) { Status FileTraceReader::Read(std::string* data) {
assert(file_reader_ != nullptr); assert(file_reader_ != nullptr);
Status s = file_reader_->Read(offset_, kTraceMetadataSize, &result_, buffer_); Status s = file_reader_->Read(offset_, kTraceMetadataSize, &result_, buffer_,
nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -57,7 +58,7 @@ Status FileTraceReader::Read(std::string* data) {
unsigned int to_read = unsigned int to_read =
bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read; bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
while (to_read > 0) { while (to_read > 0) {
s = file_reader_->Read(offset_, to_read, &result_, buffer_); s = file_reader_->Read(offset_, to_read, &result_, buffer_, nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

Loading…
Cancel
Save