From 65893ad95952d39b9503c87a816502ff561a264f Mon Sep 17 00:00:00 2001 From: Zichen Zhu Date: Wed, 1 Jun 2022 18:03:34 -0700 Subject: [PATCH] Explicitly closing all directory file descriptors (#10049) Summary: Currently, the DB directory file descriptor is left open until the deconstruction process (`DB::Close()` does not close the file descriptor). To verify this, comment out the lines between `db_ = nullptr` and `db_->Close()` (line 512, 513, 514, 515 in ldb_cmd.cc) to leak the ``db_'' object, build `ldb` tool and run ``` strace --trace=open,openat,close ./ldb --db=$TEST_TMPDIR --ignore_unknown_options put K1 V1 --create_if_missing ``` There is one directory file descriptor that is not closed in the strace log. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10049 Test Plan: Add a new unit test DBBasicTest.DBCloseAllDirectoryFDs: Open a database with different WAL directory and three different data directories, and all directory file descriptors should be closed after calling Close(). Explicitly call Close() after a directory file descriptor is not used so that the counter of directory open and close should be equivalent. Reviewed By: ajkr, hx235 Differential Revision: D36722135 Pulled By: littlepig2013 fbshipit-source-id: 07bdc2abc417c6b30997b9bbef1f79aa757b21ff --- db/column_family.cc | 13 ++++++++++++ db/db_basic_test.cc | 34 ++++++++++++++++++++++++++++++++ db/db_impl/db_impl.cc | 8 ++++++++ db/db_impl/db_impl.h | 33 +++++++++++++++++++++++++++++++ db/db_test_util.h | 1 + env/composite_env.cc | 7 +++++++ env/env.cc | 4 ++++ env/fs_posix.cc | 2 +- env/io_posix.cc | 29 +++++++++++++++++++++++---- env/io_posix.h | 5 ++++- env/mock_env.cc | 5 +++++ file/filename.cc | 3 +++ include/rocksdb/env.h | 3 +++ include/rocksdb/file_system.h | 7 +++++++ port/win/env_win.cc | 2 +- port/win/io_win.cc | 16 +++++++++++++++ port/win/io_win.h | 13 ++++++++++-- tools/ldb_cmd.cc | 2 ++ utilities/counted_fs.cc | 14 +++++++++++++ utilities/counted_fs.h | 8 +++++++- utilities/fault_injection_env.cc | 7 +++++++ utilities/fault_injection_env.h | 1 + utilities/fault_injection_fs.cc | 8 ++++++++ utilities/fault_injection_fs.h | 3 +++ 24 files changed, 218 insertions(+), 10 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 469f0e771..e5ec0a6ef 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -675,6 +675,19 @@ ColumnFamilyData::~ColumnFamilyData() { id_, name_.c_str()); } } + + if (data_dirs_.size()) { // Explicitly close data directories + Status s = Status::OK(); + for (auto& data_dir_ptr : data_dirs_) { + if (data_dir_ptr) { + s = data_dir_ptr->Close(IOOptions(), nullptr); + if (!s.ok()) { + ROCKS_LOG_WARN(ioptions_.logger, "Ignoring error %s", + s.ToString().c_str()); + } + } + } + } } bool ColumnFamilyData::UnrefAndTryDelete() { diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index a95160a38..f13fe0df5 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -25,6 +25,7 @@ #endif #include "util/file_checksum_helper.h" #include "util/random.h" +#include "utilities/counted_fs.h" #include "utilities/fault_injection_env.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend.h" @@ -1153,6 +1154,39 @@ TEST_F(DBBasicTest, DBClose) { ASSERT_EQ(env->GetCloseCount(), 3); } +TEST_F(DBBasicTest, DBCloseAllDirectoryFDs) { + Options options = GetDefaultOptions(); + std::string dbname = test::PerThreadDBPath("db_close_all_dir_fds_test"); + // Configure a specific WAL directory + options.wal_dir = dbname + "_wal_dir"; + // Configure 3 different data directories + options.db_paths.emplace_back(dbname + "_1", 512 * 1024); + options.db_paths.emplace_back(dbname + "_2", 4 * 1024 * 1024); + options.db_paths.emplace_back(dbname + "_3", 1024 * 1024 * 1024); + + ASSERT_OK(DestroyDB(dbname, options)); + + DB* db = nullptr; + std::unique_ptr env = NewCompositeEnv( + std::make_shared(FileSystem::Default())); + options.create_if_missing = true; + options.env = env.get(); + Status s = DB::Open(options, dbname, &db); + ASSERT_OK(s); + ASSERT_TRUE(db != nullptr); + + // Explicitly close the database to ensure the open and close counter for + // directories are equivalent + s = db->Close(); + auto* counted_fs = + options.env->GetFileSystem()->CheckedCast(); + assert(counted_fs); + ASSERT_TRUE(counted_fs->counters()->dir_opens == + counted_fs->counters()->dir_closes); + ASSERT_OK(s); + delete db; +} + TEST_F(DBBasicTest, DBCloseFlushError) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 74dd533d8..42543a671 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -714,12 +714,17 @@ Status DBImpl::CloseHelper() { write_buffer_manager_->RemoveDBFromQueue(wbm_stall_.get()); } + IOStatus io_s = directories_.Close(IOOptions(), nullptr /* dbg */); + if (!io_s.ok()) { + ret = io_s; + } if (ret.IsAborted()) { // Reserve IsAborted() error for those where users didn't release // certain resource and they can release them and come back and // retry. In this case, we wrap this exception to something else. return Status::Incomplete(ret.ToString()); } + return ret; } @@ -4382,6 +4387,9 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) { s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr, DirFsyncOptions(options_file_name)); } + if (s.ok()) { + s = dir_obj->Close(IOOptions(), nullptr); + } } if (s.ok()) { InstrumentedMutexLock l(&mutex_); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index c3a4b814c..dae0d21fb 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -113,6 +113,39 @@ class Directories { FSDirectory* GetDbDir() { return db_dir_.get(); } + IOStatus Close(const IOOptions& options, IODebugContext* dbg) { + // close all directories for all database paths + IOStatus s = IOStatus::OK(); + if (db_dir_) { + s = db_dir_->Close(options, dbg); + } + + if (!s.ok()) { + return s; + } + + if (wal_dir_) { + s = wal_dir_->Close(options, dbg); + } + + if (!s.ok()) { + return s; + } + + if (data_dirs_.size() > 0 && s.ok()) { + for (auto& data_dir_ptr : data_dirs_) { + if (data_dir_ptr) { + s = data_dir_ptr->Close(options, dbg); + if (!s.ok()) { + return s; + } + } + } + } + + return s; + } + private: std::unique_ptr db_dir_; std::vector> data_dirs_; diff --git a/db/db_test_util.h b/db/db_test_util.h index 3572a5d58..3a9592b54 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -588,6 +588,7 @@ class SpecialEnv : public EnvWrapper { ~NoopDirectory() {} Status Fsync() override { return Status::OK(); } + Status Close() override { return Status::OK(); } }; result->reset(new NoopDirectory()); diff --git a/env/composite_env.cc b/env/composite_env.cc index 6783fc17d..b93aa9fcb 100644 --- a/env/composite_env.cc +++ b/env/composite_env.cc @@ -276,6 +276,13 @@ class CompositeDirectoryWrapper : public Directory { IODebugContext dbg; return target_->FsyncWithDirOptions(io_opts, &dbg, DirFsyncOptions()); } + + Status Close() override { + IOOptions io_opts; + IODebugContext dbg; + return target_->Close(io_opts, &dbg); + } + size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } diff --git a/env/env.cc b/env/env.cc index 545136fe9..c322acde9 100644 --- a/env/env.cc +++ b/env/env.cc @@ -350,6 +350,10 @@ class LegacyDirectoryWrapper : public FSDirectory { IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Fsync()); } + IOStatus Close(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return status_to_io_status(target_->Close()); + } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } diff --git a/env/fs_posix.cc b/env/fs_posix.cc index 545dfade1..a75f22622 100644 --- a/env/fs_posix.cc +++ b/env/fs_posix.cc @@ -550,7 +550,7 @@ class PosixFileSystem : public FileSystem { if (fd < 0) { return IOError("While open directory", name, errno); } else { - result->reset(new PosixDirectory(fd)); + result->reset(new PosixDirectory(fd, name)); } return IOStatus::OK(); } diff --git a/env/io_posix.cc b/env/io_posix.cc index de941b5d7..c0860c376 100644 --- a/env/io_posix.cc +++ b/env/io_posix.cc @@ -1639,7 +1639,8 @@ PosixMemoryMappedFileBuffer::~PosixMemoryMappedFileBuffer() { // The magic number for BTRFS is fixed, if it's not defined, define it here #define BTRFS_SUPER_MAGIC 0x9123683E #endif -PosixDirectory::PosixDirectory(int fd) : fd_(fd) { +PosixDirectory::PosixDirectory(int fd, const std::string& directory_name) + : fd_(fd), directory_name_(directory_name) { is_btrfs_ = false; #ifdef OS_LINUX struct statfs buf; @@ -1649,12 +1650,28 @@ PosixDirectory::PosixDirectory(int fd) : fd_(fd) { #endif } -PosixDirectory::~PosixDirectory() { close(fd_); } +PosixDirectory::~PosixDirectory() { + if (fd_ >= 0) { + IOStatus s = PosixDirectory::Close(IOOptions(), nullptr); + s.PermitUncheckedError(); + } +} IOStatus PosixDirectory::Fsync(const IOOptions& opts, IODebugContext* dbg) { return FsyncWithDirOptions(opts, dbg, DirFsyncOptions()); } +IOStatus PosixDirectory::Close(const IOOptions& /*opts*/, + IODebugContext* /*dbg*/) { + IOStatus s = IOStatus::OK(); + if (close(fd_) < 0) { + s = IOError("While closing directory ", directory_name_, errno); + } else { + fd_ = -1; + } + return s; +} + IOStatus PosixDirectory::FsyncWithDirOptions( const IOOptions& /*opts*/, IODebugContext* /*dbg*/, const DirFsyncOptions& dir_fsync_options) { @@ -1686,15 +1703,19 @@ IOStatus PosixDirectory::FsyncWithDirOptions( } // fallback to dir-fsync for kDefault, kDirRenamed and kFileDeleted } + + // skip fsync/fcntl when fd_ == -1 since this file descriptor has been closed + // in either the de-construction or the close function, data must have been + // fsync-ed before de-construction and close is called #ifdef HAVE_FULLFSYNC // btrfs is a Linux file system, while currently F_FULLFSYNC is available on // Mac OS. assert(!is_btrfs_); - if (::fcntl(fd_, F_FULLFSYNC) < 0) { + if (fd_ != -1 && ::fcntl(fd_, F_FULLFSYNC) < 0) { return IOError("while fcntl(F_FULLFSYNC)", "a directory", errno); } #else // HAVE_FULLFSYNC - if (fsync(fd_) == -1) { + if (fd_ != -1 && fsync(fd_) == -1) { s = IOError("While fsync", "a directory", errno); } #endif // HAVE_FULLFSYNC diff --git a/env/io_posix.h b/env/io_posix.h index 1aacd75a3..0438163a3 100644 --- a/env/io_posix.h +++ b/env/io_posix.h @@ -450,10 +450,12 @@ struct PosixMemoryMappedFileBuffer : public MemoryMappedFileBuffer { class PosixDirectory : public FSDirectory { public: - explicit PosixDirectory(int fd); + explicit PosixDirectory(int fd, const std::string& directory_name); ~PosixDirectory(); virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override; + virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; + virtual IOStatus FsyncWithDirOptions( const IOOptions&, IODebugContext*, const DirFsyncOptions& dir_fsync_options) override; @@ -461,6 +463,7 @@ class PosixDirectory : public FSDirectory { private: int fd_; bool is_btrfs_; + const std::string directory_name_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/env/mock_env.cc b/env/mock_env.cc index ebdeb1b06..bfa7dc2f4 100644 --- a/env/mock_env.cc +++ b/env/mock_env.cc @@ -447,6 +447,11 @@ class MockEnvDirectory : public FSDirectory { IODebugContext* /*dbg*/) override { return IOStatus::OK(); } + + IOStatus Close(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return IOStatus::OK(); + } }; class MockEnvFileLock : public FileLock { diff --git a/file/filename.cc b/file/filename.cc index f63a469dd..e31dbb681 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -443,6 +443,9 @@ Status SetIdentityFile(Env* env, const std::string& dbname, s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr, DirFsyncOptions(identify_file_name)); } + if (s.ok()) { + s = dir_obj->Close(IOOptions(), nullptr); + } if (!s.ok()) { env->DeleteFile(tmp).PermitUncheckedError(); } diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 241c5cc05..cda120e84 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1148,6 +1148,8 @@ class Directory { virtual ~Directory() {} // Fsync directory. Can be called concurrently from multiple threads. virtual Status Fsync() = 0; + // Close directory. + virtual Status Close() = 0; virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; @@ -1810,6 +1812,7 @@ class DirectoryWrapper : public Directory { explicit DirectoryWrapper(Directory* target) : target_(target) {} Status Fsync() override { return target_->Fsync(); } + Status Close() override { return target_->Close(); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 27bab5833..6eab0d84e 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -1267,6 +1267,9 @@ class FSDirectory { return Fsync(options, dbg); } + // Close directory + virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) = 0; + virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; } @@ -1811,6 +1814,10 @@ class FSDirectoryWrapper : public FSDirectory { return target_->FsyncWithDirOptions(options, dbg, dir_fsync_options); } + IOStatus Close(const IOOptions& options, IODebugContext* dbg) override { + return target_->Close(options, dbg); + } + size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } diff --git a/port/win/env_win.cc b/port/win/env_win.cc index c5038bbfc..017e1200f 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -601,7 +601,7 @@ IOStatus WinFileSystem::NewDirectory(const std::string& name, return s; } - result->reset(new WinDirectory(handle)); + result->reset(new WinDirectory(name, handle)); return s; } diff --git a/port/win/io_win.cc b/port/win/io_win.cc index 41a6b8381..b374b6fc1 100644 --- a/port/win/io_win.cc +++ b/port/win/io_win.cc @@ -1064,6 +1064,22 @@ IOStatus WinDirectory::Fsync(const IOOptions& /*options*/, return IOStatus::OK(); } +IOStatus WinDirectory::Close(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) { + IOStatus s = IOStatus::OK(); + BOOL ret __attribute__((__unused__)); + if (handle_ != INVALID_HANDLE_VALUE) { + ret = ::CloseHandle(handle_); + if (!ret) { + auto lastError = GetLastError(); + s = IOErrorFromWindowsError("Directory closes failed for : " + GetName(), + lastError); + } + handle_ = NULL; + } + return s; +} + size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(handle_, id, max_size); } diff --git a/port/win/io_win.h b/port/win/io_win.h index fd6606b32..d5a079052 100644 --- a/port/win/io_win.h +++ b/port/win/io_win.h @@ -472,14 +472,23 @@ class WinMemoryMappedBuffer : public MemoryMappedFileBuffer { }; class WinDirectory : public FSDirectory { + const std::string filename_; HANDLE handle_; public: - explicit WinDirectory(HANDLE h) noexcept : handle_(h) { + explicit WinDirectory(const std::string& filename, HANDLE h) noexcept + : filename_(filename), handle_(h) { assert(handle_ != INVALID_HANDLE_VALUE); } - ~WinDirectory() { ::CloseHandle(handle_); } + ~WinDirectory() { + if (handle_ != NULL) { + IOStatus s = WinDirectory::Close(IOOptions(), nullptr); + s.PermitUncheckedError(); + } + } + const std::string& GetName() const { return filename_; } IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override; + IOStatus Close(const IOOptions& options, IODebugContext* dbg) override; size_t GetUniqueId(char* id, size_t max_size) const override; }; diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 78696f817..d391bd2bb 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -509,6 +509,8 @@ void LDBCommand::CloseDB() { for (auto& pair : cf_handles_) { delete pair.second; } + Status s = db_->Close(); + s.PermitUncheckedError(); delete db_; db_ = nullptr; } diff --git a/utilities/counted_fs.cc b/utilities/counted_fs.cc index f62158c8d..6917dc06e 100644 --- a/utilities/counted_fs.cc +++ b/utilities/counted_fs.cc @@ -224,6 +224,15 @@ class CountedDirectory : public FSDirectoryWrapper { return rv; } + IOStatus Close(const IOOptions& options, IODebugContext* dbg) override { + IOStatus rv = FSDirectoryWrapper::Close(options, dbg); + if (rv.ok()) { + fs_->counters()->closes++; + fs_->counters()->dir_closes++; + } + return rv; + } + IOStatus FsyncWithDirOptions(const IOOptions& options, IODebugContext* dbg, const DirFsyncOptions& dir_options) override { IOStatus rv = @@ -250,6 +259,10 @@ std::string FileOpCounters::PrintCounters() const { ss << "Num Dir Fsync(): " << dsyncs.load(std::memory_order_relaxed) << std::endl; ss << "Num Close(): " << closes.load(std::memory_order_relaxed) << std::endl; + ss << "Num Dir Open(): " << dir_opens.load(std::memory_order_relaxed) + << std::endl; + ss << "Num Dir Close(): " << dir_closes.load(std::memory_order_relaxed) + << std::endl; ss << "Num Read(): " << reads.ops.load(std::memory_order_relaxed) << std::endl; ss << "Num Append(): " << writes.ops.load(std::memory_order_relaxed) @@ -347,6 +360,7 @@ IOStatus CountedFileSystem::NewDirectory(const std::string& name, IOStatus s = target()->NewDirectory(name, options, &base, dbg); if (s.ok()) { counters_.opens++; + counters_.dir_opens++; result->reset(new CountedDirectory(std::move(base), this)); } return s; diff --git a/utilities/counted_fs.h b/utilities/counted_fs.h index f05a37b8d..cb8a8968f 100644 --- a/utilities/counted_fs.h +++ b/utilities/counted_fs.h @@ -46,6 +46,8 @@ struct FileOpCounters { std::atomic syncs; std::atomic dsyncs; std::atomic fsyncs; + std::atomic dir_opens; + std::atomic dir_closes; OpCounter reads; OpCounter writes; @@ -57,7 +59,9 @@ struct FileOpCounters { flushes(0), syncs(0), dsyncs(0), - fsyncs(0) {} + fsyncs(0), + dir_opens(0), + dir_closes(0) {} void Reset() { opens = 0; @@ -68,6 +72,8 @@ struct FileOpCounters { syncs = 0; dsyncs = 0; fsyncs = 0; + dir_opens = 0; + dir_closes = 0; reads.Reset(); writes.Reset(); } diff --git a/utilities/fault_injection_env.cc b/utilities/fault_injection_env.cc index 5ef206aec..b0495a8c1 100644 --- a/utilities/fault_injection_env.cc +++ b/utilities/fault_injection_env.cc @@ -107,6 +107,13 @@ Status TestDirectory::Fsync() { return dir_->Fsync(); } +Status TestDirectory::Close() { + if (!env_->IsFilesystemActive()) { + return env_->GetError(); + } + return dir_->Close(); +} + TestRandomAccessFile::TestRandomAccessFile( std::unique_ptr&& target, FaultInjectionTestEnv* env) : target_(std::move(target)), env_(env) { diff --git a/utilities/fault_injection_env.h b/utilities/fault_injection_env.h index 11d6a3053..c492f9987 100644 --- a/utilities/fault_injection_env.h +++ b/utilities/fault_injection_env.h @@ -138,6 +138,7 @@ class TestDirectory : public Directory { ~TestDirectory() {} virtual Status Fsync() override; + virtual Status Close() override; private: FaultInjectionTestEnv* env_; diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 161118672..5943ebb24 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -109,6 +109,14 @@ IOStatus TestFSDirectory::Fsync(const IOOptions& options, IODebugContext* dbg) { return s; } +IOStatus TestFSDirectory::Close(const IOOptions& options, IODebugContext* dbg) { + if (!fs_->IsFilesystemActive()) { + return fs_->GetError(); + } + IOStatus s = dir_->Close(options, dbg); + return s; +} + IOStatus TestFSDirectory::FsyncWithDirOptions( const IOOptions& options, IODebugContext* dbg, const DirFsyncOptions& dir_fsync_options) { diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index bca85ed07..886234eed 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -180,6 +180,9 @@ class TestFSDirectory : public FSDirectory { virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override; + virtual IOStatus Close(const IOOptions& options, + IODebugContext* dbg) override; + virtual IOStatus FsyncWithDirOptions( const IOOptions& options, IODebugContext* dbg, const DirFsyncOptions& dir_fsync_options) override;