From 3bdbf67e1aa25b5517e2e169a0c7602fbec6279e Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Thu, 17 Mar 2022 19:50:30 -0700 Subject: [PATCH] Fix race condition caused by concurrent accesses to forceMmapOff_ when opening Posix WritableFile (#9685) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/9685 Our TSAN reports a race condition as follows when running test ``` gtest-parallel -r 100 ./external_sst_file_test --gtest_filter=ExternalSSTFileTest.MultiThreaded ``` leads to the following ``` WARNING: ThreadSanitizer: data race (pid=2683148) Write of size 1 at 0x556fede63340 by thread T7: #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string, std::allocator > const&, rocksdb::FileOptions const&, bool, std::unique_ptr >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:334 (external_sst_file_test+0xb61ac4) #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string, std::allocator > const&, rocksdb::FileOptions const&, std::unique_ptr >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:382 (external_sst_file_test+0xb5ba96) #2 rocksdb::CompositeEnv::ReopenWritableFile(std::__cxx11::basic_string, std::allocator > const&, std::unique_ptr >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/env/composite_env.cc:334 (external_sst_file_test+0xa6ab7f) #3 rocksdb::EnvWrapper::ReopenWritableFile(std::__cxx11::basic_string, std::allocator > const&, std::unique_ptr >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/include/rocksdb/env.h:1428 (external_sst_file_test+0x561f3e) Previous read of size 1 at 0x556fede63340 by thread T4: #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string, std::allocator > const&, rocksdb::FileOptions const&, bool, std::unique_ptr >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:328 (external_sst_file_test+0xb61a70) #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string, std::allocator ... ``` Fix by making sure the following block gets executed only once: ``` if (!checkedDiskForMmap_) { // this will be executed once in the program's lifetime. // do not use mmapWrite on non ext-3/xfs/tmpfs systems. if (!SupportsFastAllocate(fname)) { forceMmapOff_ = true; } checkedDiskForMmap_ = true; } ``` Reviewed By: pdillinger Differential Revision: D34780308 fbshipit-source-id: b761f66b24c8b5b8389d86ea371c8542b8d869d5 --- HISTORY.md | 1 + env/fs_posix.cc | 45 ++++++++++++++++++++++----------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c73d5da71..e683a0752 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -20,6 +20,7 @@ * Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed. * Fixed a race condition when disable and re-enable manual compaction. * Fixed automatic error recovery failure in atomic flush. +* Fixed a race condition when mmaping a WritableFile on POSIX. ### Public API changes * Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect. diff --git a/env/fs_posix.cc b/env/fs_posix.cc index 23ef89784..cd23b26a9 100644 --- a/env/fs_posix.cc +++ b/env/fs_posix.cc @@ -325,14 +325,7 @@ class PosixFileSystem : public FileSystem { SetFD_CLOEXEC(fd, &options); if (options.use_mmap_writes) { - if (!checkedDiskForMmap_) { - // this will be executed once in the program's lifetime. - // do not use mmapWrite on non ext-3/xfs/tmpfs systems. - if (!SupportsFastAllocate(fname)) { - forceMmapOff_ = true; - } - checkedDiskForMmap_ = true; - } + MaybeForceDisableMmap(fd); } if (options.use_mmap_writes && !forceMmapOff_) { result->reset(new PosixMmapFile(fname, fd, page_size_, options)); @@ -431,14 +424,7 @@ class PosixFileSystem : public FileSystem { } if (options.use_mmap_writes) { - if (!checkedDiskForMmap_) { - // this will be executed once in the program's lifetime. - // do not use mmapWrite on non ext-3/xfs/tmpfs systems. - if (!SupportsFastAllocate(fname)) { - forceMmapOff_ = true; - } - checkedDiskForMmap_ = true; - } + MaybeForceDisableMmap(fd); } if (options.use_mmap_writes && !forceMmapOff_) { result->reset(new PosixMmapFile(fname, fd, page_size_, options)); @@ -999,8 +985,7 @@ class PosixFileSystem : public FileSystem { } #endif private: - bool checkedDiskForMmap_; - bool forceMmapOff_; // do we override Env options? + bool forceMmapOff_ = false; // do we override Env options? // Returns true iff the named directory exists and is a directory. virtual bool DirExists(const std::string& dname) { @@ -1011,10 +996,10 @@ class PosixFileSystem : public FileSystem { return false; // stat() failed return false } - bool SupportsFastAllocate(const std::string& path) { + bool SupportsFastAllocate(int fd) { #ifdef ROCKSDB_FALLOCATE_PRESENT struct statfs s; - if (statfs(path.c_str(), &s)) { + if (fstatfs(fd, &s)) { return false; } switch (s.f_type) { @@ -1028,11 +1013,26 @@ class PosixFileSystem : public FileSystem { return false; } #else - (void)path; + (void)fd; return false; #endif } + void MaybeForceDisableMmap(int fd) { + static std::once_flag s_check_disk_for_mmap_once; + assert(this == FileSystem::Default().get()); + std::call_once( + s_check_disk_for_mmap_once, + [this](int fdesc) { + // this will be executed once in the program's lifetime. + // do not use mmapWrite on non ext-3/xfs/tmpfs systems. + if (!SupportsFastAllocate(fdesc)) { + forceMmapOff_ = true; + } + }, + fd); + } + #ifdef ROCKSDB_IOURING_PRESENT bool IsIOUringEnabled() { if (RocksDbIOUringEnable && RocksDbIOUringEnable()) { @@ -1166,8 +1166,7 @@ size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded( } PosixFileSystem::PosixFileSystem() - : checkedDiskForMmap_(false), - forceMmapOff_(false), + : forceMmapOff_(false), page_size_(getpagesize()), allow_non_owner_access_(true) { #if defined(ROCKSDB_IOURING_PRESENT)