From 3db8504cdea5920490b9bafe5b0fdf4a56e42116 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Thu, 24 May 2018 15:05:00 -0700 Subject: [PATCH] Catchup with posix features Summary: Catch up with Posix features NewWritableRWFile must fail when file does not exists Implement Env::Truncate() Adjust Env options optimization functions Implement MemoryMappedBuffer on Windows. Closes https://github.com/facebook/rocksdb/pull/3857 Differential Revision: D8053610 Pulled By: ajkr fbshipit-source-id: ccd0d46c29648a9f6f496873bc1c9d6c5547487e --- env/env_test.cc | 10 ++-- env/io_posix.cc | 2 +- include/rocksdb/env.h | 21 +++++-- port/win/env_win.cc | 135 +++++++++++++++++++++++++++++++++++++----- port/win/env_win.h | 21 ++++++- port/win/io_win.cc | 21 +++++++ port/win/io_win.h | 12 ++++ tools/db_stress.cc | 4 +- 8 files changed, 197 insertions(+), 29 deletions(-) diff --git a/env/env_test.cc b/env/env_test.cc index 6d2f0d899..8023b9d3d 100644 --- a/env/env_test.cc +++ b/env/env_test.cc @@ -232,10 +232,10 @@ TEST_F(EnvPosixTest, MemoryMappedFileBuffer) { ASSERT_OK(status); ASSERT_NE(nullptr, mmap_buffer.get()); - ASSERT_NE(nullptr, mmap_buffer->base); - ASSERT_EQ(kFileBytes, mmap_buffer->length); - std::string actual_data(static_cast(mmap_buffer->base), - mmap_buffer->length); + ASSERT_NE(nullptr, mmap_buffer->GetBase()); + ASSERT_EQ(kFileBytes, mmap_buffer->GetLen()); + std::string actual_data(reinterpret_cast(mmap_buffer->GetBase()), + mmap_buffer->GetLen()); ASSERT_EQ(expected_data, actual_data); } @@ -1437,10 +1437,8 @@ TEST_P(EnvPosixTestWithParam, PosixRandomRWFile) { std::unique_ptr file; -#ifdef OS_LINUX // Cannot open non-existing file. ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions())); -#endif // Create the file using WriteableFile { diff --git a/env/io_posix.cc b/env/io_posix.cc index a411b5639..0e067b4e8 100644 --- a/env/io_posix.cc +++ b/env/io_posix.cc @@ -1054,7 +1054,7 @@ Status PosixRandomRWFile::Close() { PosixMemoryMappedFileBuffer::~PosixMemoryMappedFileBuffer() { // TODO should have error handling though not much we can do... - munmap(this->base, length); + munmap(this->base_, length_); } /* diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 1f1f06010..79ff3f38b 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -42,7 +42,7 @@ class SequentialFile; class Slice; class WritableFile; class RandomRWFile; -struct MemoryMappedFileBuffer; +class MemoryMappedFileBuffer; class Directory; struct DBOptions; struct ImmutableDBOptions; @@ -822,13 +822,24 @@ class RandomRWFile { // MemoryMappedFileBuffer object represents a memory-mapped file's raw buffer. // Subclasses should release the mapping upon destruction. -struct MemoryMappedFileBuffer { +class MemoryMappedFileBuffer { +public: MemoryMappedFileBuffer(void* _base, size_t _length) - : base(_base), length(_length) {} + : base_(_base), length_(_length) {} + virtual ~MemoryMappedFileBuffer() = 0; - void* const base; - const size_t length; + // We do not want to unmap this twice. We can make this class + // movable if desired, however, since + MemoryMappedFileBuffer(const MemoryMappedFileBuffer&) = delete; + MemoryMappedFileBuffer& operator=(const MemoryMappedFileBuffer&) = delete; + + void* GetBase() const { return base_; } + size_t GetLen() const { return length_; } + +protected: + void* base_; + const size_t length_; }; // Directory object represents collection of files and implements diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 7ebdaa3be..3bce0d52f 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -112,6 +112,15 @@ Status WinEnvIO::DeleteFile(const std::string& fname) { return result; } +Status WinEnvIO::Truncate(const std::string& fname, size_t size) { + Status s; + int result = truncate(fname.c_str(), size); + if (result != 0) { + s = IOError("Failed to truncate: " + fname, errno); + } + return s; +} + Status WinEnvIO::GetCurrentTime(int64_t* unix_time) { time_t time = std::time(nullptr); if (time == (time_t)(-1)) { @@ -344,7 +353,7 @@ Status WinEnvIO::NewRandomRWFile(const std::string & fname, // Random access is to disable read-ahead as the system reads too much data DWORD desired_access = GENERIC_READ | GENERIC_WRITE; DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; - DWORD creation_disposition = OPEN_ALWAYS; // Create if necessary or open existing + DWORD creation_disposition = OPEN_EXISTING; // Fail if file does not exist DWORD file_flags = FILE_FLAG_RANDOM_ACCESS; if (options.use_direct_reads && options.use_direct_writes) { @@ -380,6 +389,84 @@ Status WinEnvIO::NewRandomRWFile(const std::string & fname, return s; } +Status WinEnvIO::NewMemoryMappedFileBuffer(const std::string & fname, + std::unique_ptr* result) { + Status s; + result->reset(); + + DWORD fileFlags = FILE_ATTRIBUTE_READONLY; + + HANDLE hFile = INVALID_HANDLE_VALUE; + { + IOSTATS_TIMER_GUARD(open_nanos); + hFile = CreateFileA( + fname.c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_EXISTING, // Open only if it exists + fileFlags, + NULL); + } + + if (INVALID_HANDLE_VALUE == hFile) { + auto lastError = GetLastError(); + s = IOErrorFromWindowsError("Failed to open NewMemoryMappedFileBuffer: " + fname, + lastError); + return s; + } + UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); + + uint64_t fileSize = 0; + s = GetFileSize(fname, &fileSize); + if (!s.ok()) { + return s; + } + // Will not map empty files + if (fileSize == 0) { + return Status::NotSupported("NewMemoryMappedFileBuffer can not map zero length files: " + fname); + } + + // size_t is 32-bit with 32-bit builds + if (fileSize > std::numeric_limits::max()) { + return Status::NotSupported( + "The specified file size does not fit into 32-bit memory addressing: " + fname); + } + + HANDLE hMap = CreateFileMappingA(hFile, NULL, PAGE_READWRITE, + 0, // Whole file at its present length + 0, + NULL); // Mapping name + + if (!hMap) { + auto lastError = GetLastError(); + return IOErrorFromWindowsError( + "Failed to create file mapping for NewMemoryMappedFileBuffer: " + fname, + lastError); + } + UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc); + + void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE, + 0, // High DWORD of access start + 0, // Low DWORD + fileSize, + NULL); // Let the OS choose the mapping + + if (!base) { + auto lastError = GetLastError(); + return IOErrorFromWindowsError( + "Failed to MapViewOfFile for NewMemoryMappedFileBuffer: " + fname, + lastError); + } + + result->reset(new WinMemoryMappedBuffer(hFile, hMap, + base, static_cast(fileSize))); + + mapGuard.release(); + fileGuard.release(); + + return s; +} + Status WinEnvIO::NewDirectory(const std::string& name, std::unique_ptr* result) { Status s; @@ -928,27 +1015,33 @@ std::string WinEnvIO::TimeToString(uint64_t secondsSince1970) { EnvOptions WinEnvIO::OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const { - EnvOptions optimized = env_options; + EnvOptions optimized(env_options); + // These two the same as default optimizations optimized.bytes_per_sync = db_options.wal_bytes_per_sync; - optimized.use_mmap_writes = false; - // This is because we flush only whole pages on unbuffered io and - // the last records are not guaranteed to be flushed. - optimized.use_direct_writes = false; - // TODO(icanadi) it's faster if fallocate_with_keep_size is false, but it - // breaks TransactionLogIteratorStallAtLastRecord unit test. Fix the unit - // test and make this false - optimized.fallocate_with_keep_size = true; optimized.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; + + // This adversely affects %999 on windows + optimized.use_mmap_writes = false; + // Direct writes will produce a huge perf impact on + // Windows. Pre-allocate space for WAL. + optimized.use_direct_writes = false; return optimized; } EnvOptions WinEnvIO::OptimizeForManifestWrite( const EnvOptions& env_options) const { - EnvOptions optimized = env_options; + EnvOptions optimized(env_options); optimized.use_mmap_writes = false; - optimized.use_direct_writes = false; - optimized.fallocate_with_keep_size = true; + optimized.use_direct_reads = false; + return optimized; +} + +EnvOptions WinEnvIO::OptimizeForManifestRead( + const EnvOptions& env_options) const { + EnvOptions optimized(env_options); + optimized.use_mmap_writes = false; + optimized.use_direct_reads = false; return optimized; } @@ -1145,6 +1238,10 @@ Status WinEnv::DeleteFile(const std::string& fname) { return winenv_io_.DeleteFile(fname); } +Status WinEnv::Truncate(const std::string& fname, size_t size) { + return winenv_io_.Truncate(fname, size); +} + Status WinEnv::GetCurrentTime(int64_t* unix_time) { return winenv_io_.GetCurrentTime(unix_time); } @@ -1173,10 +1270,15 @@ Status WinEnv::ReopenWritableFile(const std::string& fname, } Status WinEnv::NewRandomRWFile(const std::string & fname, - unique_ptr* result, const EnvOptions & options) { + std::unique_ptr* result, const EnvOptions & options) { return winenv_io_.NewRandomRWFile(fname, result, options); } +Status WinEnv::NewMemoryMappedFileBuffer(const std::string& fname, + std::unique_ptr* result) { + return winenv_io_.NewMemoryMappedFileBuffer(fname, result); +} + Status WinEnv::NewDirectory(const std::string& name, std::unique_ptr* result) { return winenv_io_.NewDirectory(name, result); @@ -1310,6 +1412,11 @@ void WinEnv::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) { return winenv_threads_.IncBackgroundThreadsIfNeeded(num, pri); } +EnvOptions WinEnv::OptimizeForManifestRead( + const EnvOptions& env_options) const { + return winenv_io_.OptimizeForManifestRead(env_options); +} + EnvOptions WinEnv::OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const { return winenv_io_.OptimizeForLogWrite(env_options, db_options); diff --git a/port/win/env_win.h b/port/win/env_win.h index ef35fab3f..8968490c2 100644 --- a/port/win/env_win.h +++ b/port/win/env_win.h @@ -89,6 +89,8 @@ public: virtual Status DeleteFile(const std::string& fname); + Status Truncate(const std::string& fname, size_t size); + virtual Status GetCurrentTime(int64_t* unix_time); virtual Status NewSequentialFile(const std::string& fname, @@ -110,6 +112,10 @@ public: unique_ptr* result, const EnvOptions& options); + virtual Status NewMemoryMappedFileBuffer( + const std::string& fname, + std::unique_ptr* result); + virtual Status NewDirectory(const std::string& name, std::unique_ptr* result); @@ -168,6 +174,9 @@ public: virtual EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const; + virtual EnvOptions OptimizeForManifestRead( + const EnvOptions& env_options) const; + size_t GetPageSize() const { return page_size_; } size_t GetAllocationGranularity() const { return allocation_granularity_; } @@ -197,6 +206,8 @@ public: Status DeleteFile(const std::string& fname) override; + Status Truncate(const std::string& fname, size_t size) override; + Status GetCurrentTime(int64_t* unix_time) override; Status NewSequentialFile(const std::string& fname, @@ -224,9 +235,13 @@ public: // The returned file will only be accessed by one thread at a time. Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override; + Status NewMemoryMappedFileBuffer( + const std::string& fname, + std::unique_ptr* result) override; + Status NewDirectory(const std::string& name, std::unique_ptr* result) override; @@ -302,12 +317,16 @@ public: void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) override; + EnvOptions OptimizeForManifestRead( + const EnvOptions& env_options) const override; + EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const override; EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const override; + private: WinEnvIO winenv_io_; diff --git a/port/win/io_win.cc b/port/win/io_win.cc index 9321438d7..66fe8a11e 100644 --- a/port/win/io_win.cc +++ b/port/win/io_win.cc @@ -1061,6 +1061,27 @@ Status WinRandomRWFile::Close() { return CloseImpl(); } +////////////////////////////////////////////////////////////////////////// +/// WinMemoryMappedBufer +WinMemoryMappedBuffer::~WinMemoryMappedBuffer() { + BOOL ret = FALSE; + if (base_ != nullptr) { + ret = ::UnmapViewOfFile(base_); + assert(ret); + base_ = nullptr; + } + if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) { + ret = ::CloseHandle(map_handle_); + assert(ret); + map_handle_ = NULL; + } + if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) { + ret = ::CloseHandle(file_handle_); + assert(ret); + file_handle_ = NULL; + } +} + ////////////////////////////////////////////////////////////////////////// /// WinDirectory diff --git a/port/win/io_win.h b/port/win/io_win.h index e177a2e3e..3b08c394f 100644 --- a/port/win/io_win.h +++ b/port/win/io_win.h @@ -411,6 +411,18 @@ class WinRandomRWFile : private WinFileData, virtual Status Close() override; }; +class WinMemoryMappedBuffer : public MemoryMappedFileBuffer { +private: + HANDLE file_handle_; + HANDLE map_handle_; +public: + WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) : + MemoryMappedFileBuffer(base, size), + file_handle_(file_handle), + map_handle_(map_handle) {} + ~WinMemoryMappedBuffer() override; +}; + class WinDirectory : public Directory { HANDLE handle_; public: diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 579515304..cc87f8a53 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -879,9 +879,9 @@ class SharedState { FLAGS_expected_values_path, &expected_mmap_buffer_); } if (status.ok()) { - assert(expected_mmap_buffer_->length == expected_values_size); + assert(expected_mmap_buffer_->GetLen() == expected_values_size); values_ = - static_cast*>(expected_mmap_buffer_->base); + static_cast*>(expected_mmap_buffer_->GetBase()); assert(values_ != nullptr); } else { fprintf(stderr, "Failed opening shared file '%s' with error: %s\n",