From 7e9f28cb232248b58f22545733169137a907a97f Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Fri, 29 Aug 2014 21:21:49 -0700 Subject: [PATCH] limit max bytes that can be read/written per pread/write syscall Summary: BlockBasedTable sst file size can grow to a large size when universal compaction is used. When index block exceeds 2G, pread seems to fail and return truncated data and causes "trucated block" error. I tried to use ``` #define _FILE_OFFSET_BITS 64 ``` But the problem still persists. Splitting a big write/read into smaller batches seems to solve the problem. Test Plan: successfully compacted a case with resulting sst file at ~90G (2.1G index block size) Reviewers: yhchiang, igor, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D22569 --- util/env_posix.cc | 48 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index d644e7b0e..cf917e874 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -239,11 +239,23 @@ class PosixRandomAccessFile: public RandomAccessFile { char* scratch) const { Status s; ssize_t r = -1; - do { - r = pread(fd_, scratch, n, static_cast(offset)); - } while (r < 0 && errno == EINTR); - IOSTATS_ADD_IF_POSITIVE(bytes_read, r); - *result = Slice(scratch, (r < 0) ? 0 : r); + size_t left = n; + char* ptr = scratch; + while (left > 0) { + r = pread(fd_, ptr, left, static_cast(offset)); + if (r <= 0) { + if (errno == EINTR) { + continue; + } + break; + } + ptr += r; + offset += r; + left -= r; + } + + IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left); + *result = Slice(scratch, (r < 0) ? 0 : n - left); if (r < 0) { // An error: return a non-ok status s = IOError(filename_, errno); @@ -907,9 +919,23 @@ class PosixRandomRWFile : public RandomRWFile { virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; - ssize_t r = pread(fd_, scratch, n, static_cast(offset)); - IOSTATS_ADD_IF_POSITIVE(bytes_read, r); - *result = Slice(scratch, (r < 0) ? 0 : r); + ssize_t r = -1; + size_t left = n; + char* ptr = scratch; + while (left > 0) { + r = pread(fd_, ptr, left, static_cast(offset)); + if (r <= 0) { + if (errno == EINTR) { + continue; + } + break; + } + ptr += r; + offset += r; + left -= r; + } + IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left); + *result = Slice(scratch, (r < 0) ? 0 : n - left); if (r < 0) { s = IOError(filename_, errno); } @@ -1018,15 +1044,12 @@ class PosixFileLock : public FileLock { std::string filename; }; - -namespace { void PthreadCall(const char* label, int result) { if (result != 0) { fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); exit(1); } } -} class PosixEnv : public Env { public: @@ -1724,12 +1747,11 @@ unsigned int PosixEnv::GetThreadPoolQueueLen(Priority pri) const { return thread_pools_[pri].GetQueueLen(); } -namespace { struct StartThreadState { void (*user_function)(void*); void* arg; }; -} + static void* StartThreadWrapper(void* arg) { StartThreadState* state = reinterpret_cast(arg); state->user_function(state->arg);