From 97307d888f7b256b1dfc25e8942efa20203ec138 Mon Sep 17 00:00:00 2001 From: Mike Kolupaev Date: Fri, 16 Feb 2018 07:58:18 -0800 Subject: [PATCH] Fix deadlock in ColumnFamilyData::InstallSuperVersion() Summary: Deadlock: a memtable flush holds DB::mutex_ and calls ThreadLocalPtr::Scrape(), which locks ThreadLocalPtr mutex; meanwhile, a thread exit handler locks ThreadLocalPtr mutex and calls SuperVersionUnrefHandle, which tries to lock DB::mutex_. This deadlock is hit all the time on our workload. It blocks our release. In general, the problem is that ThreadLocalPtr takes an arbitrary callback and calls it while holding a lock on a global mutex. The same global mutex is (at least in some cases) locked by almost all ThreadLocalPtr methods, on any instance of ThreadLocalPtr. So, there'll be a deadlock if the callback tries to do anything to any instance of ThreadLocalPtr, or waits for another thread to do so. So, probably the only safe way to use ThreadLocalPtr callbacks is to do only do simple and lock-free things in them. This PR fixes the deadlock by making sure that local_sv_ never holds the last reference to a SuperVersion, and therefore SuperVersionUnrefHandle never has to do any nontrivial cleanup. I also searched for other uses of ThreadLocalPtr to see if they may have similar bugs. There's only one other use, in transaction_lock_mgr.cc, and it looks fine. Closes https://github.com/facebook/rocksdb/pull/3510 Reviewed By: sagar0 Differential Revision: D7005346 Pulled By: al13n321 fbshipit-source-id: 37575591b84f07a891d6659e87e784660fde815f --- db/column_family.cc | 32 +++++++++++++++++++------------- db/db_test.cc | 44 ++++++++++++++++++++++++++++++++++++++++++++ util/thread_local.h | 7 +++++++ 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index ea5eaa468..5824d7b54 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -344,12 +344,13 @@ void SuperVersionUnrefHandle(void* ptr) { // When latter happens, we are in ~ColumnFamilyData(), no get should happen as // well. SuperVersion* sv = static_cast(ptr); - if (sv->Unref()) { - sv->db_mutex->Lock(); - sv->Cleanup(); - sv->db_mutex->Unlock(); - delete sv; - } + bool was_last_ref __attribute__((__unused__)); + was_last_ref = sv->Unref(); + // Thread-local SuperVersions can't outlive ColumnFamilyData::super_version_. + // This is important because we can't do SuperVersion cleanup here. + // That would require locking DB mutex, which would deadlock because + // SuperVersionUnrefHandle is called with locked ThreadLocalPtr mutex. + assert(!was_last_ref); } } // anonymous namespace @@ -994,6 +995,12 @@ void ColumnFamilyData::InstallSuperVersion( RecalculateWriteStallConditions(mutable_cf_options); if (old_superversion != nullptr) { + // Reset SuperVersions cached in thread local storage. + // This should be done before old_superversion->Unref(). That's to ensure + // that local_sv_ never holds the last reference to SuperVersion, since + // it has no means to safely do SuperVersion cleanup. + ResetThreadLocalSuperVersions(); + if (old_superversion->mutable_cf_options.write_buffer_size != mutable_cf_options.write_buffer_size) { mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size); @@ -1009,9 +1016,6 @@ void ColumnFamilyData::InstallSuperVersion( sv_context->superversions_to_free.push_back(old_superversion); } } - - // Reset SuperVersions cached in thread local storage - ResetThreadLocalSuperVersions(); } void ColumnFamilyData::ResetThreadLocalSuperVersions() { @@ -1023,10 +1027,12 @@ void ColumnFamilyData::ResetThreadLocalSuperVersions() { continue; } auto sv = static_cast(ptr); - if (sv->Unref()) { - sv->Cleanup(); - delete sv; - } + bool was_last_ref __attribute__((__unused__)); + was_last_ref = sv->Unref(); + // sv couldn't have been the last reference because + // ResetThreadLocalSuperVersions() is called before + // unref'ing super_version_. + assert(!was_last_ref); } } diff --git a/db/db_test.cc b/db/db_test.cc index 5f2f62f99..b21150c66 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5722,6 +5722,50 @@ TEST_F(DBTest, PauseBackgroundWorkTest) { // now it's done ASSERT_TRUE(done.load()); } + +// Keep spawning short-living threads that create an iterator and quit. +// Meanwhile in another thread keep flushing memtables. +// This used to cause a deadlock. +TEST_F(DBTest, ThreadLocalPtrDeadlock) { + std::atomic flushes_done{0}; + std::atomic threads_destroyed{0}; + auto done = [&] { + return flushes_done.load() > 10; + }; + + std::thread flushing_thread([&] { + for (int i = 0; !done(); ++i) { + ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"), + Slice(std::to_string(i).c_str()))); + ASSERT_OK(db_->Flush(FlushOptions())); + int cnt = ++flushes_done; + fprintf(stderr, "Flushed %d times\n", cnt); + } + }); + + std::vector thread_spawning_threads(10); + for (auto& t: thread_spawning_threads) { + t = std::thread([&] { + while (!done()) { + { + std::thread tmp_thread([&] { + auto it = db_->NewIterator(ReadOptions()); + delete it; + }); + tmp_thread.join(); + } + ++threads_destroyed; + } + }); + } + + for (auto& t: thread_spawning_threads) { + t.join(); + } + flushing_thread.join(); + fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n", + flushes_done.load(), threads_destroyed.load()); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/thread_local.h b/util/thread_local.h index 1ca5b10dd..175f1cca4 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -24,6 +24,13 @@ namespace rocksdb { // pointer (if not NULL) when one of the following happens: // (1) a thread terminates // (2) a ThreadLocalPtr is destroyed +// +// Warning: this function is called while holding a global mutex. The same mutex +// is used (at least in some cases) by most methods of ThreadLocalPtr, and it's +// shared across all instances of ThreadLocalPtr. Thereforere extra care +// is needed to avoid deadlocks. In particular, the handler shouldn't lock any +// mutexes and shouldn't call any methods of any ThreadLocalPtr instances, +// unless you know what you're doing. typedef void (*UnrefHandler)(void* ptr); // ThreadLocalPtr stores only values of pointer type. Different from