Deflake DBWALTest.FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL (#11016)

Summary:
**Context/Summary:**
Credit to ajkr's https://github.com/facebook/rocksdb/pull/11016#pullrequestreview-1205020134,
flaky test https://app.circleci.com/pipelines/github/facebook/rocksdb/21985/workflows/5f6cc355-78c1-46d8-89ee-0fd679725a8a/jobs/540878 is due to `Flush()` called in the test returned earlier than obsoleted WAL being found in background flush and SyncWAL() was called (i.e, "sync_point_called" sets to true).  Fix this by making checking `sync_point_called == true` after obsoleted WAL is found and `SyncWAL()` is called. Also rename the "sync_point_called" to be something more specific.

Also, fix a potential flakiness due to manually setting a log threshold to force new manifest creation. This is unreliable so I decided to use sync point to force new manifest creation.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11016

Test Plan: make check

Reviewed By: pdillinger

Differential Revision: D41717786

Pulled By: hx235

fbshipit-source-id: ad1e4701a987285bbe6c8e7d9b05c4db06b4edf4
main
Hui Xiao 2 years ago committed by Facebook GitHub Bot
parent 23af6786a9
commit 15bb4ea084
  1. 39
      db/db_wal_test.cc
  2. 3
      db/version_set.cc

@ -1602,9 +1602,6 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) {
TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
Options options = CurrentOptions();
options.track_and_verify_wals_in_manifest = true;
// Set a small max_manifest_file_size to force manifest creation
// in SyncWAL() for tet purpose
options.max_manifest_file_size = 170;
DestroyAndReopen(options);
// Accumulate memtable m1 and create the 1st wal (i.e, 4.log)
@ -1619,25 +1616,26 @@ TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
// active) log and release the lock
// (2) SyncWAL() proceeds with the lock. It
// creates a new manifest and syncs all the inactive wals before the latest
// (i.e, active log), which is 4.log. SyncWAL() is not aware of the fact
// that 4.log has marked as to be obseleted.
// Prior to the fix, such wal sync will then add a WAL addition record of
// 4.log to the new manifest without any special treatment.
// (i.e, active log), which is 4.log. Note that SyncWAL() is not aware of the
// fact that 4.log has marked as to be obseleted. Prior to the fix, such wal
// sync will then add a WAL addition record of 4.log to the new manifest
// without any special treatment.
// (3) BackgroundFlush() will eventually purge 4.log.
bool sync_point_called = false;
bool new_manifest_created = false;
bool wal_synced = false;
SyncPoint::GetInstance()->SetCallBack(
"FindObsoleteFiles::PostMutexUnlock", [&](void*) {
ASSERT_OK(env_->FileExists(wal_file_path));
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::ProcessManifestWrites:BeforeNewManifest",
[&](void*) { new_manifest_created = true; });
"VersionSet::ProcessManifestWrites:"
"PostDecidingCreateNewManifestOrNot",
[&](void* arg) {
bool* new_descriptor_log = (bool*)arg;
*new_descriptor_log = true;
});
ASSERT_OK(db_->SyncWAL());
ASSERT_TRUE(new_manifest_created);
sync_point_called = true;
wal_synced = true;
});
SyncPoint::GetInstance()->SetCallBack(
@ -1650,21 +1648,22 @@ TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
"PostDeleteWAL");
}
});
SyncPoint::GetInstance()->LoadDependency(
{{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
{{"DBImpl::BackgroundCallFlush:FilesFound",
"PreConfrimObsoletedWALSynced"},
{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
"PostDeleteWAL",
"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
"PreConfrimWALDeleted"}});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(Flush());
ASSERT_TRUE(sync_point_called);
TEST_SYNC_POINT(
"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
"PreConfrimWALDeleted");
TEST_SYNC_POINT("PreConfrimObsoletedWALSynced");
ASSERT_TRUE(wal_synced);
TEST_SYNC_POINT("PreConfrimWALDeleted");
// BackgroundFlush() purged 4.log
// because the memtable associated with the WAL was flushed and new WAL was
// created (i.e, 8.log)

@ -4983,6 +4983,9 @@ Status VersionSet::ProcessManifestWrites(
} else {
pending_manifest_file_number_ = manifest_file_number_;
}
TEST_SYNC_POINT_CALLBACK(
"VersionSet::ProcessManifestWrites:PostDecidingCreateNewManifestOrNot",
&new_descriptor_log);
// Local cached copy of state variable(s). WriteCurrentStateToManifest()
// reads its content after releasing db mutex to avoid race with

Loading…
Cancel
Save