Fix race condition in BackupEngineTest.ChangeManifestDuringBackupCreation (#9327)

Summary:
The failure looked like this:

```
utilities/backupable/backupable_db_test.cc:3161: Failure
Value of: db_chroot_env_->FileExists(prev_manifest_path).IsNotFound()
  Actual: false
Expected: true
```

The failure could be coerced consistently with the following patch:

```
 diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 80410f671..637636791 100644
 --- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -2772,6 +2772,8 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
     if (job_context.HaveSomethingToClean() ||
         job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
       mutex_.Unlock();
+      bg_cv_.SignalAll();
+      sleep(1);
       TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound");
       // Have to flush the info logs before bg_flush_scheduled_--
       // because if bg_flush_scheduled_ becomes 0 and the lock is
```

The cause was a familiar problem, which is manual flush/compaction may
return before files they obsoleted are removed. The solution is just to
wait for "scheduled" work to complete, which includes all phases
including cleanup.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9327

Test Plan:
after this PR, even the above patch to coerce the bug cannot
cause the test to fail.

Reviewed By: riversand963

Differential Revision: D33252208

Pulled By: ajkr

fbshipit-source-id: 720a7eaca58c7247d221911fffe3d5e1dbf581e9
main
Andrew Kryczka 3 years ago committed by Facebook GitHub Bot
parent 1b076e82db
commit 538d2365e9
  1. 3
      db/db_impl/db_impl.h
  2. 6
      db/db_impl/db_impl_debug.cc
  3. 4
      utilities/backupable/backupable_db_test.cc

@ -974,6 +974,9 @@ class DBImpl : public DB {
Status TEST_AtomicFlushMemTables(const autovector<ColumnFamilyData*>& cfds, Status TEST_AtomicFlushMemTables(const autovector<ColumnFamilyData*>& cfds,
const FlushOptions& flush_opts); const FlushOptions& flush_opts);
// Wait for background threads to complete scheduled work.
Status TEST_WaitForBackgroundWork();
// Wait for memtable compaction // Wait for memtable compaction
Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr); Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr);

@ -156,6 +156,12 @@ Status DBImpl::TEST_AtomicFlushMemTables(
return AtomicFlushMemTables(cfds, flush_opts, FlushReason::kTest); return AtomicFlushMemTables(cfds, flush_opts, FlushReason::kTest);
} }
Status DBImpl::TEST_WaitForBackgroundWork() {
InstrumentedMutexLock l(&mutex_);
WaitForBackgroundWork();
return Status::OK();
}
Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) { Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) {
ColumnFamilyData* cfd; ColumnFamilyData* cfd;
if (column_family == nullptr) { if (column_family == nullptr) {

@ -3158,6 +3158,10 @@ TEST_F(BackupEngineTest, ChangeManifestDuringBackupCreation) {
FillDB(db_.get(), 0, 100, kAutoFlushOnly); FillDB(db_.get(), 0, 100, kAutoFlushOnly);
ASSERT_OK(db_chroot_env_->FileExists(prev_manifest_path)); ASSERT_OK(db_chroot_env_->FileExists(prev_manifest_path));
ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK(db_->Flush(FlushOptions()));
// Even though manual flush completed above, the background thread may not
// have finished its cleanup work. `TEST_WaitForBackgroundWork()` will wait
// until all the background thread's work has completed, including cleanup.
ASSERT_OK(db_impl->TEST_WaitForBackgroundWork());
ASSERT_TRUE(db_chroot_env_->FileExists(prev_manifest_path).IsNotFound()); ASSERT_TRUE(db_chroot_env_->FileExists(prev_manifest_path).IsNotFound());
CloseDBAndBackupEngine(); CloseDBAndBackupEngine();

Loading…
Cancel
Save