Fix a flaky test DBFlushTest.SyncFail (#4633)

Summary:
There is a race condition in DBFlushTest.SyncFail, as illustrated below.
```
time         thread1                             bg_flush_thread
  |     Flush(wait=false, cfd)
  |     refs_before=cfd->current()->TEST_refs()   PickMemtable calls cfd->current()->Ref()
  V
```
The race condition between thread1 getting the ref count of cfd's current
version and bg_flush_thread incrementing the cfd's current version makes it
possible for later assertion on refs_before to fail. Therefore, we add test
sync points to enforce the order and assert on the ref count before and after
PickMemtable is called in bg_flush_thread.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4633

Differential Revision: D12967131

Pulled By: riversand963

fbshipit-source-id: a99d2bacb7869ec5d8d03b24ef2babc0e6ae1a3b
main
Yanqin Jin 6 years ago committed by Facebook Github Bot
parent 7dbee38716
commit 8d7bc76f36
  1. 12
      db/db_flush_test.cc
  2. 2
      db/db_impl_compaction_flush.cc

@ -62,7 +62,6 @@ TEST_F(DBFlushTest, FlushWhileWritingManifest) {
#endif // ROCKSDB_LITE
}
#ifndef TRAVIS
// Disable this test temporarily on Travis as it fails intermittently.
// Github issue: #4151
TEST_F(DBFlushTest, SyncFail) {
@ -73,7 +72,11 @@ TEST_F(DBFlushTest, SyncFail) {
options.env = fault_injection_env.get();
SyncPoint::GetInstance()->LoadDependency(
{{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"},
{{"DBFlushTest::SyncFail:GetVersionRefCount:1",
"DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"},
{"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables",
"DBFlushTest::SyncFail:GetVersionRefCount:2"},
{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"},
{"DBImpl::SyncClosedLogs:Failed", "DBFlushTest::SyncFail:2"}});
SyncPoint::GetInstance()->EnableProcessing();
@ -88,6 +91,10 @@ TEST_F(DBFlushTest, SyncFail) {
// Flush installs a new super-version. Get the ref count after that.
auto current_before = cfd->current();
int refs_before = cfd->current()->TEST_refs();
TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:1");
TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:2");
int refs_after_picking_memtables = cfd->current()->TEST_refs();
ASSERT_EQ(refs_before + 1, refs_after_picking_memtables);
fault_injection_env->SetFilesystemActive(false);
TEST_SYNC_POINT("DBFlushTest::SyncFail:1");
TEST_SYNC_POINT("DBFlushTest::SyncFail:2");
@ -126,7 +133,6 @@ TEST_F(DBFlushTest, SyncSkip) {
Destroy(options);
}
#endif // TRAVIS
TEST_F(DBFlushTest, FlushInLowPriThreadPool) {
// Verify setting an empty high-pri (flush) thread pool causes flushes to be

@ -135,7 +135,9 @@ Status DBImpl::FlushMemTableToOutputFile(
FileMetaData file_meta;
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
flush_job.PickMemTable();
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables");
#ifndef ROCKSDB_LITE
// may temporarily unlock and lock the mutex.

Loading…
Cancel
Save