diff --git a/HISTORY.md b/HISTORY.md index ee6152d59..59b543be3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Fixed bug in calls to `IngestExternalFiles()` with files for multiple column families. The bug could have introduced a delay in ingested file keys becoming visible after `IngestExternalFiles()` returned. Furthermore, mutations to ingested file keys while they were invisible could have been dropped (not necessarily immediately). * Fixed a possible race condition impacting users of `WriteBufferManager` who constructed it with `allow_stall == true`. The race condition led to undefined behavior (in our experience, typically a process crash). * Fixed a bug where stalled writes would remain stalled forever after the user calls `WriteBufferManager::SetBufferSize()` with `new_size == 0` to dynamically disable memory limiting. +* Make `DB::close()` thread-safe. ### New Features * Print information about blob files when using "ldb list_live_files_metadata" @@ -24,6 +25,7 @@ * Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method. * Some fields of SstFileMetaData are deprecated for compatibility with new base class FileStorageInfo. * Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files. +* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK. ## 6.25.0 (2021-09-20) ### Bug Fixes diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 9ebc0d708..5d1e12c6a 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1130,9 +1130,15 @@ TEST_F(DBBasicTest, DBCloseFlushError) { ASSERT_OK(Put("key3", "value3")); fault_injection_env->SetFilesystemActive(false); Status s = dbfull()->Close(); + ASSERT_NE(s, Status::OK()); + // retry should return the same error + s = dbfull()->Close(); + ASSERT_NE(s, Status::OK()); fault_injection_env->SetFilesystemActive(true); + // retry close() is no-op even the system is back. Could be improved if + // Close() is retry-able: #9029 + s = dbfull()->Close(); ASSERT_NE(s, Status::OK()); - Destroy(options); } @@ -2591,6 +2597,21 @@ TEST_F(DBBasicTest, ManifestChecksumMismatch) { ASSERT_TRUE(s.IsCorruption()); } +TEST_F(DBBasicTest, ConcurrentlyCloseDB) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + std::vector workers; + for (int i = 0; i < 10; i++) { + workers.push_back(std::thread([&]() { + auto s = db_->Close(); + ASSERT_OK(s); + })); + } + for (auto& w : workers) { + w.join(); + } +} + #ifndef ROCKSDB_LITE class DBBasicTestTrackWal : public DBTestBase, public testing::WithParamInterface { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 53a84e074..11b30e2df 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -718,9 +718,11 @@ Status DBImpl::CloseHelper() { Status DBImpl::CloseImpl() { return CloseHelper(); } DBImpl::~DBImpl() { + InstrumentedMutexLock closing_lock_guard(&closing_mutex_); if (!closed_) { closed_ = true; - CloseHelper().PermitUncheckedError(); + closing_status_ = CloseHelper(); + closing_status_.PermitUncheckedError(); } } @@ -4006,19 +4008,20 @@ Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) { DB::~DB() {} Status DBImpl::Close() { - if (!closed_) { - { - InstrumentedMutexLock l(&mutex_); - // If there is unreleased snapshot, fail the close call - if (!snapshots_.empty()) { - return Status::Aborted("Cannot close DB with unreleased snapshot."); - } + InstrumentedMutexLock closing_lock_guard(&closing_mutex_); + if (closed_) { + return closing_status_; + } + { + InstrumentedMutexLock l(&mutex_); + // If there is unreleased snapshot, fail the close call + if (!snapshots_.empty()) { + return Status::Aborted("Cannot close DB with unreleased snapshot."); } - - closed_ = true; - return CloseImpl(); } - return Status::OK(); + closing_status_ = CloseImpl(); + closed_ = true; + return closing_status_; } Status DB::ListColumnFamilies(const DBOptions& db_options, diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 2f77a32f3..c07eea23b 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2290,6 +2290,10 @@ class DBImpl : public DB { // Flag to check whether Close() has been called on this DB bool closed_; + // save the closing status, for re-calling the close() + Status closing_status_; + // mutex for DB::Close() + InstrumentedMutex closing_mutex_; // Conditional variable to coordinate installation of atomic flush results. // With atomic flush, each bg thread installs the result of flushing multiple diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 7a80c629f..c1203ec6c 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -285,9 +285,9 @@ class DB { // If the return status is Aborted(), closing fails because there is // unreleased snapshot in the system. In this case, users can release // the unreleased snapshots and try again and expect it to succeed. For - // other status, recalling Close() will be no-op. - // If the return status is NotSupported(), then the DB implementation does - // cleanup in the destructor + // other status, re-calling Close() will be no-op and return the original + // close status. If the return status is NotSupported(), then the DB + // implementation does cleanup in the destructor virtual Status Close() { return Status::NotSupported(); } // ListColumnFamilies will open the DB specified by argument name