From 908100399cdc8855980ca9f8c05d37836857ab79 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Wed, 10 Feb 2016 16:56:01 -0800 Subject: [PATCH] Fixed a dependency issue of ThreadLocalPtr Summary: When a child thread that uses ThreadLocalPtr, ThreadLocalPtr::OnThreadExit will be called when that child thread is destroyed. However, OnThreadExit will try to access a static singleton of ThreadLocalPtr, which will be destroyed when the main thread exit. As a result, when a child thread that uses ThreadLocalPtr exits AFTER the main thread exits, illegal memory access will occur. This diff includes a test that reproduce this legacy bug. ==2095206==ERROR: AddressSanitizer: heap-use-after-free on address 0x608000007fa0 at pc 0x959b79 bp 0x7f5fa7426b60 sp 0x7f5fa7426b58 READ of size 8 at 0x608000007fa0 thread T1 This patch fix this issue by having the thread local mutex never be deleted (but will leak small piece of memory at the end.) The patch also describe a better solution (thread_local) in the comment that requires gcc 4.8.1 and in latest clang as a future work once we agree to move toward gcc 4.8. Test Plan: COMPILE_WITH_ASAN=1 make thread_local_test -j32 ./thread_local_test --gtest_filter="*MainThreadDiesFirst" Reviewers: anthony, hermanlee4, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53013 --- util/env_posix.cc | 1 + util/thread_local.cc | 47 ++++++++++++++++++++++++------------ util/thread_local.h | 51 ++++++++++++++++++++++++--------------- util/thread_local_test.cc | 36 ++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 36 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index 06de7a486..132d865dd 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -132,6 +132,7 @@ class PosixEnv : public Env { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; + TEST_SYNC_POINT("PosixEnv::~PosixEnv():End"); } void SetFD_CLOEXEC(int fd, const EnvOptions* options) { diff --git a/util/thread_local.cc b/util/thread_local.cc index 846c33877..5f3fddae5 100644 --- a/util/thread_local.cc +++ b/util/thread_local.cc @@ -104,7 +104,6 @@ PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::StaticMeta::InitSingletons(); - ThreadLocalPtr::Instance(); } ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { @@ -113,30 +112,46 @@ ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { // when the function is first call. As a result, we can properly // control their construction order by properly preparing their // first function call. - static ThreadLocalPtr::StaticMeta inst; - return &inst; + // + // Note that here we decide to make "inst" a static pointer w/o deleting + // it at the end instead of a static variable. This is to avoid the following + // destruction order desester happens when a child thread using ThreadLocalPtr + // dies AFTER the main thread dies: When a child thread happens to use + // ThreadLocalPtr, it will try to delete its thread-local data on its + // OnThreadExit when the child thread dies. However, OnThreadExit depends + // on the following variable. As a result, if the main thread dies before any + // child thread happen to use ThreadLocalPtr dies, then the destruction of + // the following variable will go first, then OnThreadExit, therefore causing + // invalid access. + // + // The above problem can be solved by using thread_local to store tls_ instead + // of using __thread. The major difference between thread_local and __thread + // is that thread_local supports dynamic construction and destruction of + // non-primitive typed variables. As a result, we can guarantee the + // desturction order even when the main thread dies before any child threads. + // However, thread_local requires gcc 4.8 and is not supported in all the + // compilers that accepts -std=c++11 (e.g., the default clang on Mac), while + // the current RocksDB still accept gcc 4.7. + static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta(); + return inst; } void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); } -port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { - // Here we prefer function static variable instead of global - // static variable as function static variable is initialized - // when the function is first call. As a result, we can properly - // control their construction order by properly preparing their - // first function call. - static port::Mutex mutex; - return &mutex; -} +port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; } void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { auto* tls = static_cast(ptr); assert(tls != nullptr); - auto* inst = Instance(); + // Use the cached StaticMeta::Instance() instead of directly calling + // the variable inside StaticMeta::Instance() might already go out of + // scope here in case this OnThreadExit is called after the main thread + // dies. + auto* inst = tls->inst; pthread_setspecific(inst->pthread_key_, nullptr); - MutexLock l(Mutex()); + MutexLock l(inst->MemberMutex()); inst->RemoveThreadData(tls); // Unref stored pointers of current thread from all instances uint32_t id = 0; @@ -154,7 +169,7 @@ void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { delete tls; } -ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0) { +ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) { if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) { abort(); } @@ -221,7 +236,7 @@ ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() { if (UNLIKELY(tls_ == nullptr)) { auto* inst = Instance(); - tls_ = new ThreadData(); + tls_ = new ThreadData(inst); { // Register it in the global chain, needs to be done before thread exit // handler registration diff --git a/util/thread_local.h b/util/thread_local.h index a4feac38d..3adf8ba85 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -79,6 +79,8 @@ class ThreadLocalPtr { std::atomic ptr; }; + class StaticMeta; + // This is the structure that is declared as "thread_local" storage. // The vector keep list of atomic pointer for all instances for "current" // thread. The vector is indexed by an Id that is unique in process and @@ -95,10 +97,11 @@ class ThreadLocalPtr { // | thread 3 | void* | void* | void* | <- ThreadData // --------------------------------------------------- struct ThreadData { - ThreadData() : entries() {} + explicit ThreadData(StaticMeta* _inst) : entries(), inst(_inst) {} std::vector entries; ThreadData* next; ThreadData* prev; + StaticMeta* inst; }; class StaticMeta { @@ -139,6 +142,31 @@ class ThreadLocalPtr { // initialized will be no-op. static void InitSingletons(); + // protect inst, next_instance_id_, free_instance_ids_, head_, + // ThreadData.entries + // + // Note that here we prefer function static variable instead of the usual + // global static variable. The reason is that c++ destruction order of + // static variables in the reverse order of their construction order. + // However, C++ does not guarantee any construction order when global + // static variables are defined in different files, while the function + // static variables are initialized when their function are first called. + // As a result, the construction order of the function static variables + // can be controlled by properly invoke their first function calls in + // the right order. + // + // For instance, the following function contains a function static + // variable. We place a dummy function call of this inside + // Env::Default() to ensure the construction order of the construction + // order. + static port::Mutex* Mutex(); + + // Returns the member mutex of the current StaticMeta. In general, + // Mutex() should be used instead of this one. However, in case where + // the static variable inside Instance() goes out of scope, MemberMutex() + // should be used. One example is OnThreadExit() function. + port::Mutex* MemberMutex() { return &mutex_; } + private: // Get UnrefHandler for id with acquiring mutex // REQUIRES: mutex locked @@ -169,24 +197,9 @@ class ThreadLocalPtr { std::unordered_map handler_map_; - // protect inst, next_instance_id_, free_instance_ids_, head_, - // ThreadData.entries - // - // Note that here we prefer function static variable instead of the usual - // global static variable. The reason is that c++ destruction order of - // static variables in the reverse order of their construction order. - // However, C++ does not guarantee any construction order when global - // static variables are defined in different files, while the function - // static variables are initialized when their function are first called. - // As a result, the construction order of the function static variables - // can be controlled by properly invoke their first function calls in - // the right order. - // - // For instance, the following function contains a function static - // variable. We place a dummy function call of this inside - // Env::Default() to ensure the construction order of the construction - // order. - static port::Mutex* Mutex(); + // The private mutex. Developers should always use Mutex() instead of + // using this variable directly. + port::Mutex mutex_; #if ROCKSDB_SUPPORT_THREAD_LOCAL // Thread local storage static __thread ThreadData* tls_; diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc index 537737650..f531dfd0c 100644 --- a/util/thread_local_test.cc +++ b/util/thread_local_test.cc @@ -3,14 +3,17 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include #include +#include #include "rocksdb/env.h" #include "port/port.h" #include "util/autovector.h" -#include "util/thread_local.h" +#include "util/sync_point.h" #include "util/testharness.h" #include "util/testutil.h" +#include "util/thread_local.h" namespace rocksdb { @@ -467,6 +470,37 @@ TEST_F(ThreadLocalTest, CompareAndSwap) { ASSERT_EQ(tls.Get(), reinterpret_cast(3)); } +namespace { + +void* AccessThreadLocal(void* arg) { + TEST_SYNC_POINT("AccessThreadLocal:Start"); + ThreadLocalPtr tlp; + tlp.Reset(new std::string("hello RocksDB")); + TEST_SYNC_POINT("AccessThreadLocal:End"); + return nullptr; +} + +} // namespace + +// The following test is disabled as it requires manual steps to run it +// correctly. +// +// Currently we have no way to acess SyncPoint w/o ASAN error when the +// child thread dies after the main thread dies. So if you manually enable +// this test and only see an ASAN error on SyncPoint, it means you pass the +// test. +TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"}, + {"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}}); + + // Triggers the initialization of singletons. + Env::Default(); + pthread_t t; + pthread_create(&t, nullptr, &AccessThreadLocal, nullptr); + TEST_SYNC_POINT("MainThreadDiesFirst:End"); +} + } // namespace rocksdb int main(int argc, char** argv) {