diff --git a/util/env_posix.cc b/util/env_posix.cc index 06de7a486..132d865dd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -132,6 +132,7 @@ class PosixEnv : public Env { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; + TEST_SYNC_POINT("PosixEnv::~PosixEnv():End"); } void SetFD_CLOEXEC(int fd, const EnvOptions* options) { diff --git a/util/thread_local.cc b/util/thread_local.cc index 846c33877..5f3fddae5 100644 --- a/util/thread_local.cc +++ b/util/thread_local.cc @@ -104,7 +104,6 @@ PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::StaticMeta::InitSingletons(); - ThreadLocalPtr::Instance(); } ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { @@ -113,30 +112,46 @@ ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { // when the function is first call. As a result, we can properly // control their construction order by properly preparing their // first function call. - static ThreadLocalPtr::StaticMeta inst; - return &inst; + // + // Note that here we decide to make "inst" a static pointer w/o deleting + // it at the end instead of a static variable. This is to avoid the following + // destruction order desester happens when a child thread using ThreadLocalPtr + // dies AFTER the main thread dies: When a child thread happens to use + // ThreadLocalPtr, it will try to delete its thread-local data on its + // OnThreadExit when the child thread dies. However, OnThreadExit depends + // on the following variable. As a result, if the main thread dies before any + // child thread happen to use ThreadLocalPtr dies, then the destruction of + // the following variable will go first, then OnThreadExit, therefore causing + // invalid access. + // + // The above problem can be solved by using thread_local to store tls_ instead + // of using __thread. The major difference between thread_local and __thread + // is that thread_local supports dynamic construction and destruction of + // non-primitive typed variables. As a result, we can guarantee the + // desturction order even when the main thread dies before any child threads. + // However, thread_local requires gcc 4.8 and is not supported in all the + // compilers that accepts -std=c++11 (e.g., the default clang on Mac), while + // the current RocksDB still accept gcc 4.7. + static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta(); + return inst; } void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); } -port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { - // Here we prefer function static variable instead of global - // static variable as function static variable is initialized - // when the function is first call. As a result, we can properly - // control their construction order by properly preparing their - // first function call. - static port::Mutex mutex; - return &mutex; -} +port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; } void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { auto* tls = static_cast(ptr); assert(tls != nullptr); - auto* inst = Instance(); + // Use the cached StaticMeta::Instance() instead of directly calling + // the variable inside StaticMeta::Instance() might already go out of + // scope here in case this OnThreadExit is called after the main thread + // dies. + auto* inst = tls->inst; pthread_setspecific(inst->pthread_key_, nullptr); - MutexLock l(Mutex()); + MutexLock l(inst->MemberMutex()); inst->RemoveThreadData(tls); // Unref stored pointers of current thread from all instances uint32_t id = 0; @@ -154,7 +169,7 @@ void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { delete tls; } -ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0) { +ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) { if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) { abort(); } @@ -221,7 +236,7 @@ ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() { if (UNLIKELY(tls_ == nullptr)) { auto* inst = Instance(); - tls_ = new ThreadData(); + tls_ = new ThreadData(inst); { // Register it in the global chain, needs to be done before thread exit // handler registration diff --git a/util/thread_local.h b/util/thread_local.h index a4feac38d..3adf8ba85 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -79,6 +79,8 @@ class ThreadLocalPtr { std::atomic ptr; }; + class StaticMeta; + // This is the structure that is declared as "thread_local" storage. // The vector keep list of atomic pointer for all instances for "current" // thread. The vector is indexed by an Id that is unique in process and @@ -95,10 +97,11 @@ class ThreadLocalPtr { // | thread 3 | void* | void* | void* | <- ThreadData // --------------------------------------------------- struct ThreadData { - ThreadData() : entries() {} + explicit ThreadData(StaticMeta* _inst) : entries(), inst(_inst) {} std::vector entries; ThreadData* next; ThreadData* prev; + StaticMeta* inst; }; class StaticMeta { @@ -139,6 +142,31 @@ class ThreadLocalPtr { // initialized will be no-op. static void InitSingletons(); + // protect inst, next_instance_id_, free_instance_ids_, head_, + // ThreadData.entries + // + // Note that here we prefer function static variable instead of the usual + // global static variable. The reason is that c++ destruction order of + // static variables in the reverse order of their construction order. + // However, C++ does not guarantee any construction order when global + // static variables are defined in different files, while the function + // static variables are initialized when their function are first called. + // As a result, the construction order of the function static variables + // can be controlled by properly invoke their first function calls in + // the right order. + // + // For instance, the following function contains a function static + // variable. We place a dummy function call of this inside + // Env::Default() to ensure the construction order of the construction + // order. + static port::Mutex* Mutex(); + + // Returns the member mutex of the current StaticMeta. In general, + // Mutex() should be used instead of this one. However, in case where + // the static variable inside Instance() goes out of scope, MemberMutex() + // should be used. One example is OnThreadExit() function. + port::Mutex* MemberMutex() { return &mutex_; } + private: // Get UnrefHandler for id with acquiring mutex // REQUIRES: mutex locked @@ -169,24 +197,9 @@ class ThreadLocalPtr { std::unordered_map handler_map_; - // protect inst, next_instance_id_, free_instance_ids_, head_, - // ThreadData.entries - // - // Note that here we prefer function static variable instead of the usual - // global static variable. The reason is that c++ destruction order of - // static variables in the reverse order of their construction order. - // However, C++ does not guarantee any construction order when global - // static variables are defined in different files, while the function - // static variables are initialized when their function are first called. - // As a result, the construction order of the function static variables - // can be controlled by properly invoke their first function calls in - // the right order. - // - // For instance, the following function contains a function static - // variable. We place a dummy function call of this inside - // Env::Default() to ensure the construction order of the construction - // order. - static port::Mutex* Mutex(); + // The private mutex. Developers should always use Mutex() instead of + // using this variable directly. + port::Mutex mutex_; #if ROCKSDB_SUPPORT_THREAD_LOCAL // Thread local storage static __thread ThreadData* tls_; diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index 537737650..f531dfd0c 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -3,14 +3,17 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include #include +#include #include "rocksdb/env.h" #include "port/port.h" #include "util/autovector.h" -#include "util/thread_local.h" +#include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" +#include "util/thread_local.h" namespace rocksdb { @@ -467,6 +470,37 @@ TEST_F(ThreadLocalTest, CompareAndSwap) { ASSERT_EQ(tls.Get(), reinterpret_cast(3)); } +namespace { + +void* AccessThreadLocal(void* arg) { + TEST_SYNC_POINT("AccessThreadLocal:Start"); + ThreadLocalPtr tlp; + tlp.Reset(new std::string("hello RocksDB")); + TEST_SYNC_POINT("AccessThreadLocal:End"); + return nullptr; +} + +} // namespace + +// The following test is disabled as it requires manual steps to run it +// correctly. +// +// Currently we have no way to acess SyncPoint w/o ASAN error when the +// child thread dies after the main thread dies. So if you manually enable +// this test and only see an ASAN error on SyncPoint, it means you pass the +// test. +TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"}, + {"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}}); + + // Triggers the initialization of singletons. + Env::Default(); + pthread_t t; + pthread_create(&t, nullptr, &AccessThreadLocal, nullptr); + TEST_SYNC_POINT("MainThreadDiesFirst:End"); +} + } // namespace rocksdb int main(int argc, char** argv) {