Fixed a rare deadlock in DBTest.ThreadStatusFlush

Summary:
Currently, ThreadStatusFlush uses two sync-points to ensure
there's a flush currently running when calling GetThreadList().
However, one of the sync-point is inside db-mutex, which could
cause deadlock in case there's a DB::Get() call.

This patch fix this issue by moving the sync-point to a better
place where the flush job does not hold the mutex.

Test Plan: db_test

Reviewers: igor, sdong, anthony, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D45045
main
Yueh-Hsuan Chiang 9 years ago
parent 962aa64292
commit a203b913c1
  1. 11
      db/db_test.cc
  2. 2
      db/flush_job.cc

@ -6293,7 +6293,8 @@ TEST_F(DBTest, ThreadStatusFlush) {
rocksdb::SyncPoint::GetInstance()->LoadDependency({ rocksdb::SyncPoint::GetInstance()->LoadDependency({
{"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"}, {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
{"DBTest::ThreadStatusFlush:2", "FlushJob::~FlushJob()"}, {"DBTest::ThreadStatusFlush:2",
"FlushJob::LogAndNotifyTableFileCreation()"},
}); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); rocksdb::SyncPoint::GetInstance()->EnableProcessing();
@ -6305,12 +6306,14 @@ TEST_F(DBTest, ThreadStatusFlush) {
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
Put(1, "k1", std::string(100000, 'x')); // Fill memtable Put(1, "k1", std::string(100000, 'x')); // Fill memtable
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
Put(1, "k2", std::string(100000, 'y')); // Trigger flush Put(1, "k2", std::string(100000, 'y')); // Trigger flush
// wait for flush to be scheduled
env_->SleepForMicroseconds(250000); // The first sync point is to make sure there's one flush job
// running when we perform VerifyOperationCount().
TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1"); TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
// This second sync point is to ensure the flush job will not
// be completed until we already perform VerifyOperationCount().
TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2"); TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); rocksdb::SyncPoint::GetInstance()->DisableProcessing();

@ -87,7 +87,6 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
} }
FlushJob::~FlushJob() { FlushJob::~FlushJob() {
TEST_SYNC_POINT("FlushJob::~FlushJob()");
ThreadStatusUtil::ResetThreadStatus(); ThreadStatusUtil::ResetThreadStatus();
} }
@ -262,6 +261,7 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
EventHelpers::LogAndNotifyTableFileCreation( EventHelpers::LogAndNotifyTableFileCreation(
event_logger_, db_options_.listeners, event_logger_, db_options_.listeners,
meta->fd, info); meta->fd, info);
TEST_SYNC_POINT("FlushJob::LogAndNotifyTableFileCreation()");
} }
if (!db_options_.disableDataSync && output_file_directory_ != nullptr) { if (!db_options_.disableDataSync && output_file_directory_ != nullptr) {

Loading…
Cancel
Save