From b5c99cc908a7e34fa65d588b2706c33000786935 Mon Sep 17 00:00:00 2001 From: burtonli Date: Thu, 21 Dec 2017 18:37:27 -0800 Subject: [PATCH] Disable onboard cache for compaction output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: FILE_FLAG_WRITE_THROUGH is for disabling device on-board cache in windows API, which should be disabled if user doesn't need system cache. There was a perf issue related with this, we found during memtable flush, the high percentile latency jumps significantly. During profiling, we found those high latency (P99.9) read requests got queue-jumped by write requests from memtable flush and takes 80ms or even more time to wait, even when SSD overall IO throughput is relatively low. After enabling FILE_FLAG_WRITE_THROUGH, we rerun the test found high percentile latency drops a lot without observable impact on writes. Scenario 1: 40MB/s + 40MB/s R/W compaction throughput  Original | FILE_FLAG_WRITE_THROUGH | Percentage reduction --------------------------------------------------------------- P99.9 | 56.897 ms | 35.593 ms | -37.4% P99 | 3.905 ms | 3.896 ms | -2.8% Scenario 2: 14MB/s + 14MB/s R/W compaction throughput, cohosted with 100+ other rocksdb instances have manually triggered memtable flush operations (memtable is tiny), creating a lot of randomized the small file writes operations during test. Original | FILE_FLAG_WRITE_THROUGH | Percentage reduction --------------------------------------------------------------- P99.9 | 86.227 ms | 50.436 ms | -41.5% P99 | 8.415 ms | 3.356 ms | -60.1% Closes https://github.com/facebook/rocksdb/pull/3225 Differential Revision: D6624174 Pulled By: miasantreble fbshipit-source-id: 321b86aee9d74470840c70e5d0d4fa9880660a91 --- port/win/env_win.cc | 2 +- port/win/io_win.cc | 8 ++++++-- port/win/io_win.h | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 87b4eb159..5073944f5 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -253,7 +253,7 @@ Status WinEnvIO::OpenWritableFile(const std::string& fname, DWORD fileFlags = FILE_ATTRIBUTE_NORMAL; if (local_options.use_direct_writes && !local_options.use_mmap_writes) { - fileFlags = FILE_FLAG_NO_BUFFERING; + fileFlags = FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; } // Desired access. We are want to write only here but if we want to memory diff --git a/port/win/io_win.cc b/port/win/io_win.cc index 9403c829d..8cdd456d6 100644 --- a/port/win/io_win.cc +++ b/port/win/io_win.cc @@ -880,7 +880,7 @@ inline Status WinWritableImpl::SyncImpl() { Status s; // Calls flush buffers - if (fsync(file_data_->GetFileHandle()) < 0) { + if (!file_data_->use_direct_io() && fsync(file_data_->GetFileHandle()) < 0) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "fsync failed at Sync() for: " + file_data_->GetName(), lastError); @@ -961,7 +961,11 @@ Status WinWritableFile::Sync() { return SyncImpl(); } -Status WinWritableFile::Fsync() { return SyncImpl(); } +Status WinWritableFile::Fsync() { + return SyncImpl(); +} + +bool WinWritableFile::IsSyncThreadSafe() const { return true; } uint64_t WinWritableFile::GetFileSize() { return GetFileNextWriteOffset(); diff --git a/port/win/io_win.h b/port/win/io_win.h index 2c1d5a1ea..74a99ec93 100644 --- a/port/win/io_win.h +++ b/port/win/io_win.h @@ -368,6 +368,8 @@ class WinWritableFile : private WinFileData, virtual Status Fsync() override; + virtual bool IsSyncThreadSafe() const override; + // Indicates if the class makes use of direct I/O // Use PositionedAppend virtual bool use_direct_io() const override;