From 147f7b472a86d2c4facd2f1cf36d2735fc83dec0 Mon Sep 17 00:00:00 2001 From: Zitan Chen <11285749+gg814@users.noreply.github.com> Date: Fri, 3 Jul 2020 15:38:35 -0700 Subject: [PATCH] Fix flakiness of BackupableDBTest.TableFileCorruptedBeforeBackup (#7082) Summary: If the corruption of a table file is done before flushing, then db manifest may record the checksum for the corrupted table, which results in "matching checksums" when backup engine tries to verfiy the checksum, and causes a flaky test. Fix the issue by adding `Flush()` before trying to corrupt a table file in *db*. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7082 Test Plan: `buck test` Without the fix, failed 5 of 100 tests. Suspected whether the pseudo randomness causes the issue: doubling `keys_iteration` resulted in 2 of 100 tests failed; deterministically corrupting tables file also caused 2 of 100 tests to fail. With the fix, passed 200 of 200 tests. Reviewed By: pdillinger Differential Revision: D22375421 Pulled By: gg814 fbshipit-source-id: 7304618e7520684b6087e42d0b58329c5ad18329 --- utilities/backupable/backupable_db.cc | 2 +- utilities/backupable/backupable_db_test.cc | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index c85dee1fc..bb8cfde9d 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -1586,7 +1586,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( // b) share_table_files is true and the file exists already. if (kDbFileChecksumFuncName == src_checksum_func_name) { if (src_checksum_str == kUnknownFileChecksum) { - return Status::Aborted("Unkown checksum value for " + fname); + return Status::Aborted("Unknown checksum value for " + fname); } s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, size_limit, &checksum_value); diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 46d7942a7..f33ca73a0 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -1185,6 +1185,7 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) { OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, kNoShare); FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(db_->Flush(FlushOptions())); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // file_checksum_gen_factory is null, and thus table checksum is not @@ -1199,6 +1200,7 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) { OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, kNoShare); FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(db_->Flush(FlushOptions())); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // table file checksum is enabled so we should be able to detect any @@ -1216,6 +1218,7 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) { OpenDBAndBackupEngine(true /* destroy_old_data */); FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(db_->Flush(FlushOptions())); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // cannot detect corruption since DB manifest has no table checksums @@ -1228,6 +1231,7 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) { options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); OpenDBAndBackupEngine(true /* destroy_old_data */); FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(db_->Flush(FlushOptions())); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // corruption is detected