From a203b913c1851897e9ebf0f669d5f56e422dac8a Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Thu, 20 Aug 2015 17:18:47 -0700 Subject: [PATCH] Fixed a rare deadlock in DBTest.ThreadStatusFlush Summary: Currently, ThreadStatusFlush uses two sync-points to ensure there's a flush currently running when calling GetThreadList(). However, one of the sync-point is inside db-mutex, which could cause deadlock in case there's a DB::Get() call. This patch fix this issue by moving the sync-point to a better place where the flush job does not hold the mutex. Test Plan: db_test Reviewers: igor, sdong, anthony, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D45045 --- db/db_test.cc | 11 +++++++---- db/flush_job.cc | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index f7f0db3e6..9412a78b0 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -6293,7 +6293,8 @@ TEST_F(DBTest, ThreadStatusFlush) { rocksdb::SyncPoint::GetInstance()->LoadDependency({ {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"}, - {"DBTest::ThreadStatusFlush:2", "FlushJob::~FlushJob()"}, + {"DBTest::ThreadStatusFlush:2", + "FlushJob::LogAndNotifyTableFileCreation()"}, }); rocksdb::SyncPoint::GetInstance()->EnableProcessing(); @@ -6305,12 +6306,14 @@ TEST_F(DBTest, ThreadStatusFlush) { VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); Put(1, "k1", std::string(100000, 'x')); // Fill memtable - VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); Put(1, "k2", std::string(100000, 'y')); // Trigger flush - // wait for flush to be scheduled - env_->SleepForMicroseconds(250000); + + // The first sync point is to make sure there's one flush job + // running when we perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1"); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1); + // This second sync point is to ensure the flush job will not + // be completed until we already perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2"); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); diff --git a/db/flush_job.cc b/db/flush_job.cc index e2e920378..2576a44ca 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -87,7 +87,6 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd, } FlushJob::~FlushJob() { - TEST_SYNC_POINT("FlushJob::~FlushJob()"); ThreadStatusUtil::ResetThreadStatus(); } @@ -262,6 +261,7 @@ Status FlushJob::WriteLevel0Table(const autovector& mems, EventHelpers::LogAndNotifyTableFileCreation( event_logger_, db_options_.listeners, meta->fd, info); + TEST_SYNC_POINT("FlushJob::LogAndNotifyTableFileCreation()"); } if (!db_options_.disableDataSync && output_file_directory_ != nullptr) {