From cc5c68084b90697a3a1dc0c91115527766b80a10 Mon Sep 17 00:00:00 2001 From: Zitan Chen <11285749+gg814@users.noreply.github.com> Date: Wed, 8 Jul 2020 12:15:02 -0700 Subject: [PATCH] Fix flaky BackupableDBTest.TableFileCorruptedBeforeBackup (#7102) Summary: The fix in PR https://github.com/facebook/rocksdb/issues/7082 is not really successful because there is still a small chance that the test will fail. In addtion to flushing, we close the DB and then reopen before corrupting a table file in the DB. Specifically, we corrupt a table file before backup takes place as follows. * Open DB * Fill DB * Flush DB (optional, no flushing here also works) * Close DB * Reopen DB * Corrupt a table file in the DB This should make the test reliable. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7102 Test Plan: `while ./backupable_db_test --gtest_filter=*TableFileCorruptedBeforeBackup*; do true; done` (kept running for an hour or so :) Reviewed By: pdillinger Differential Revision: D22432417 Pulled By: gg814 fbshipit-source-id: d407eee93ff428bb662f80cde1659fbf0149d0cd --- utilities/backupable/backupable_db_test.cc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index f33ca73a0..677bcba56 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -594,6 +594,17 @@ class BackupableDBTest : public testing::Test { return db; } + void CloseAndReopenDB() { + // Close DB + db_.reset(); + + // Open DB + test_db_env_->SetLimitWrittenFiles(1000000); + DB* db; + ASSERT_OK(DB::Open(options_, dbname_, &db)); + db_.reset(db); + } + void OpenDBAndBackupEngine(bool destroy_old_data = false, bool dummy = false, ShareOption shared_option = kShareNoChecksum) { // reset all the defaults @@ -1186,12 +1197,14 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) { kNoShare); FillDB(db_.get(), 0, keys_iteration); ASSERT_OK(db_->Flush(FlushOptions())); + CloseAndReopenDB(); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // file_checksum_gen_factory is null, and thus table checksum is not // verified for creating a new backup; no correction is detected ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); CloseDBAndBackupEngine(); + // delete old files in db ASSERT_OK(DestroyDB(dbname_, options_)); @@ -1201,12 +1214,12 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) { kNoShare); FillDB(db_.get(), 0, keys_iteration); ASSERT_OK(db_->Flush(FlushOptions())); + CloseAndReopenDB(); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // table file checksum is enabled so we should be able to detect any // corruption ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get())); - CloseDBAndBackupEngine(); } @@ -1219,11 +1232,13 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) { OpenDBAndBackupEngine(true /* destroy_old_data */); FillDB(db_.get(), 0, keys_iteration); ASSERT_OK(db_->Flush(FlushOptions())); + CloseAndReopenDB(); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // cannot detect corruption since DB manifest has no table checksums ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); CloseDBAndBackupEngine(); + // delete old files in db ASSERT_OK(DestroyDB(dbname_, options_)); @@ -1232,11 +1247,11 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) { OpenDBAndBackupEngine(true /* destroy_old_data */); FillDB(db_.get(), 0, keys_iteration); ASSERT_OK(db_->Flush(FlushOptions())); + CloseAndReopenDB(); // corrupt a random table file in the DB directory ASSERT_OK(CorruptRandomTableFileInDB()); // corruption is detected ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get())); - CloseDBAndBackupEngine(); }