Enable per-request buffer allocation in RandomAccessFile

This change impacts only non-buffered I/O on Windows.
 Currently, there is a buffer per RandomAccessFile
 instance that is protected by a lock. The reason we
 maintain the buffer is non-buffered I/O requires an aligned
 buffer to work.
 XPerf traces demonstrate that we accumulate a considerable
 wait time while waiting for that lock.
 This change enables to set random access buffer size to zero
 which would indicate a per request allocation.
 We are expecting that allocation expense would be much less than
 I/O costs plus wait time due to the fact that the memory heap
 would tend to re-use page aligned allocations especially with the
 use of Jemalloc.
 This change does not affect buffer use as a read_ahead_buffer for
 compaction purposes.
main
Dmitri Smirnov 9 years ago
parent 3b2a1ddd2e
commit 36300fbbe3
  1. 3
      include/rocksdb/options.h
  2. 53
      port/win/env_win.cc

@ -1121,6 +1121,9 @@ struct DBOptions {
// This option is currently honored only on Windows // This option is currently honored only on Windows
// //
// Default: 1 Mb // Default: 1 Mb
//
// Special value: 0 - means do not maintain per instance buffer. Allocate
// per request buffer and avoid locking.
size_t random_access_max_buffer_size; size_t random_access_max_buffer_size;
// This is the maximum buffer size that is used by WritableFileWriter. // This is the maximum buffer size that is used by WritableFileWriter.

@ -766,6 +766,18 @@ class WinRandomAccessFile : public RandomAccessFile {
return read; return read;
} }
void CalculateReadParameters(uint64_t offset, size_t bytes_requested,
size_t& actual_bytes_toread,
uint64_t& first_page_start) const {
const size_t alignment = buffer_.Alignment();
first_page_start = TruncateToPageBoundary(alignment, offset);
const uint64_t last_page_start =
TruncateToPageBoundary(alignment, offset + bytes_requested - 1);
actual_bytes_toread = (last_page_start - first_page_start) + alignment;
}
public: public:
WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options) const EnvOptions& options)
@ -797,15 +809,36 @@ class WinRandomAccessFile : public RandomAccessFile {
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override { char* scratch) const override {
Status s; Status s;
SSIZE_T r = -1; SSIZE_T r = -1;
size_t left = n; size_t left = n;
char* dest = scratch; char* dest = scratch;
if (n == 0) {
*result = Slice(scratch, 0);
return s;
}
// When in unbuffered mode we need to do the following changes: // When in unbuffered mode we need to do the following changes:
// - use our own aligned buffer // - use our own aligned buffer
// - always read at the offset of that is a multiple of alignment // - always read at the offset of that is a multiple of alignment
if (!use_os_buffer_) { if (!use_os_buffer_) {
uint64_t first_page_start = 0;
size_t actual_bytes_toread = 0;
size_t bytes_requested = left;
if (!read_ahead_ && random_access_max_buffer_size_ == 0) {
CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
first_page_start);
assert(actual_bytes_toread > 0);
r = ReadIntoOneShotBuffer(offset, first_page_start,
actual_bytes_toread, left, dest);
} else {
std::unique_lock<std::mutex> lock(buffer_mut_); std::unique_lock<std::mutex> lock(buffer_mut_);
// Let's see if at least some of the requested data is already // Let's see if at least some of the requested data is already
@ -824,19 +857,16 @@ class WinRandomAccessFile : public RandomAccessFile {
// Still some left or none was buffered // Still some left or none was buffered
if (left > 0) { if (left > 0) {
// Figure out the start/end offset for reading and amount to read // Figure out the start/end offset for reading and amount to read
const size_t alignment = buffer_.Alignment(); bytes_requested = left;
const size_t first_page_start =
TruncateToPageBoundary(alignment, offset);
size_t bytes_requested = left;
if (read_ahead_ && bytes_requested < compaction_readahead_size_) { if (read_ahead_ && bytes_requested < compaction_readahead_size_) {
bytes_requested = compaction_readahead_size_; bytes_requested = compaction_readahead_size_;
} }
const size_t last_page_start = CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
TruncateToPageBoundary(alignment, offset + bytes_requested - 1); first_page_start);
const size_t actual_bytes_toread =
(last_page_start - first_page_start) + alignment; assert(actual_bytes_toread > 0);
if (buffer_.Capacity() < actual_bytes_toread) { if (buffer_.Capacity() < actual_bytes_toread) {
// If we are in read-ahead mode or the requested size // If we are in read-ahead mode or the requested size
@ -848,17 +878,20 @@ class WinRandomAccessFile : public RandomAccessFile {
lock.unlock(); lock.unlock();
r = ReadIntoOneShotBuffer(offset, first_page_start, r = ReadIntoOneShotBuffer(offset, first_page_start,
actual_bytes_toread, left, dest); actual_bytes_toread, left, dest);
} else { }
else {
buffer_.AllocateNewBuffer(actual_bytes_toread); buffer_.AllocateNewBuffer(actual_bytes_toread);
r = ReadIntoInstanceBuffer(offset, first_page_start, r = ReadIntoInstanceBuffer(offset, first_page_start,
actual_bytes_toread, left, dest); actual_bytes_toread, left, dest);
} }
} else { }
else {
buffer_.Clear(); buffer_.Clear();
r = ReadIntoInstanceBuffer(offset, first_page_start, r = ReadIntoInstanceBuffer(offset, first_page_start,
actual_bytes_toread, left, dest); actual_bytes_toread, left, dest);
} }
} }
}
} else { } else {
r = pread(hFile_, scratch, left, offset); r = pread(hFile_, scratch, left, offset);
if (r > 0) { if (r > 0) {

Loading…
Cancel
Save