Fix deadlock in `fs_test.WALWriteRetryableErrorAutoRecover1` (#7897)

Summary: The recovery thread could hold the db.mutex, which is needed from sync write in main thread. Make sure the write is done before recovery thread starts. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7897 Test Plan: `gtest-parallel ./error_handler_fs_test --gtest_filter=DBErrorHandlingFSTest.WALWriteRetryableErrorAutoRecover1 -r 10000 --workers=200` Reviewed By: zhichao-cao Differential Revision: D26082933 Pulled By: jay-zhuang fbshipit-source-id: 226fc49228c0e5903f86ff45cc3fed3080abdb1f
5 years ago · c6ff4c0b70
parent 4ee991b1e6
commit c6ff4c0b70
1 changed files with 3 additions and 1 deletions
--- a/db/error_handler_fs_test.cc
+++ b/db/error_handler_fs_test.cc
@ -2248,7 +2248,8 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) {
      ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024)));
    }
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
-        {{"RecoverFromRetryableBGIOError:BeforeResume0", "WALWriteError1:0"},
+        {{"WALWriteErrorDone", "RecoverFromRetryableBGIOError:BeforeStart"},
+         {"RecoverFromRetryableBGIOError:BeforeResume0", "WALWriteError1:0"},
         {"WALWriteError1:1", "RecoverFromRetryableBGIOError:BeforeResume1"},
         {"RecoverFromRetryableBGIOError:RecoverSuccess", "WALWriteError1:2"}});

@ -2264,6 +2265,7 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) {
    wopts.sync = true;
    s = dbfull()->Write(wopts, &batch);
    ASSERT_EQ(true, s.IsIOError());
+    TEST_SYNC_POINT("WALWriteErrorDone");

    TEST_SYNC_POINT("WALWriteError1:0");
    fault_fs_->SetFilesystemActive(true);