Fix flaky BackupableDBTest.CustomChecksumTransition (#7254)

Summary:
The flaky test in the title is caused by two problems. First, there is a bug in the BackupEngine that results in skipping computing the default crc32 checksum when `share_table_files` is enabled and the table is already backed up. Second, when `RestoreDBFromBackup` fails and the backup was being restored to the DB directory, it is likely that `RestoreDBFromBackup` has cleaned up the DB directory before it fails, and therefore, files in old backups may collide with files to be backed up if `share_files_with_checksum` is not enabled.

New tests that cover the above problems are added.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7254

Test Plan: `./backupable_db_test`

Reviewed By: pdillinger

Differential Revision: D23118715

Pulled By: gg814

fbshipit-source-id: 7be8de912808944be59e93d602c7431a54c079eb
main
Zitan Chen 5 years ago committed by Facebook GitHub Bot
parent a1aa3f8385
commit 15245e9018
  1. 25
      utilities/backupable/backupable_db.cc
  2. 90
      utilities/backupable/backupable_db_test.cc

@ -1807,7 +1807,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName; std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
// whether the checksum for a table file is available // whether a default or custom checksum for a table file is available
bool has_checksum = false; bool has_checksum = false;
// Set up the custom checksum function. // Set up the custom checksum function.
@ -1956,10 +1956,13 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
backup_env_->DeleteFile(final_dest_path); backup_env_->DeleteFile(final_dest_path);
} else { } else {
// file exists and referenced // file exists and referenced
if (!has_checksum) { if (!has_checksum || checksum_hex.empty()) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, // Either both checksum_hex and custom_checksum_hex need recalculating
size_limit, &checksum_hex, checksum_func, // or only checksum_hex needs recalculating
&custom_checksum_hex); s = CalculateChecksum(
src_dir + fname, db_env_, src_env_options, size_limit,
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1999,10 +2002,13 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the file is present and referenced by a backup // the file is present and referenced by a backup
ROCKS_LOG_INFO(options_.info_log, ROCKS_LOG_INFO(options_.info_log,
"%s already present, calculate checksum", fname.c_str()); "%s already present, calculate checksum", fname.c_str());
if (!has_checksum) { if (!has_checksum || checksum_hex.empty()) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, // Either both checksum_hex and custom_checksum_hex need recalculating
size_limit, &checksum_hex, checksum_func, // or only checksum_hex needs recalculating
&custom_checksum_hex); s = CalculateChecksum(
src_dir + fname, db_env_, src_env_options, size_limit,
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -2475,6 +2481,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(
"try again."); "try again.");
} else if (IsSameChecksumFunc(itr->second->checksum_func_name, } else if (IsSameChecksumFunc(itr->second->checksum_func_name,
file_info->checksum_func_name) && file_info->checksum_func_name) &&
!itr->second->custom_checksum_hex.empty() &&
itr->second->custom_checksum_hex != itr->second->custom_checksum_hex !=
file_info->custom_checksum_hex) { file_info->custom_checksum_hex) {
return Status::Corruption( return Status::Corruption(

@ -1053,12 +1053,23 @@ TEST_F(BackupableDBTest, CustomChecksumTransition) {
sopt); sopt);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseBackupEngine(); CloseBackupEngine();
++i;
// Say, we accidentally change the factory // Say, we accidentally change the factory
backupable_options_->file_checksum_gen_factory = hash32_factory; backupable_options_->file_checksum_gen_factory = hash32_factory;
OpenBackupEngine(); OpenBackupEngine();
ASSERT_NOK(backup_engine_->VerifyBackup(i + 2, true)); // Unable to verify the latest backup.
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 2, dbname_, dbname_)); ASSERT_NOK(backup_engine_->VerifyBackup(i + 1, true));
ASSERT_OK(backup_engine_->DeleteBackup(i + 2)); // Unable to restore the latest backup.
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
CloseBackupEngine();
// Reset the factory to the same as the one used in the db.
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenBackupEngine();
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
ASSERT_OK(backup_engine_->DeleteBackup(i + 1));
--i;
CloseDBAndBackupEngine(); CloseDBAndBackupEngine();
// 3) with one custom checksum function (FileHash32GenFactory) for db // 3) with one custom checksum function (FileHash32GenFactory) for db
@ -1106,6 +1117,79 @@ TEST_F(BackupableDBTest, CustomChecksumTransition) {
} }
} }
TEST_F(BackupableDBTest, CustomChecksumNoNewDbTables) {
const int keys_iteration = 5000;
std::vector<std::shared_ptr<FileChecksumGenFactory>> checksum_factories{
nullptr, GetFileChecksumGenCrc32cFactory(),
std::make_shared<FileHash32GenFactory>(),
std::make_shared<FileHashGenFactory>()};
for (const auto& sopt : kAllShareOptions) {
for (const auto& f : checksum_factories) {
options_.file_checksum_gen_factory = f;
backupable_options_->file_checksum_gen_factory = f;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
// No new table files have been created since the last backup.
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(2, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
AssertBackupConsistency(2, 0, keys_iteration, keys_iteration * 2);
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
// No new table files have been created since the last backup and backup
// engine opening
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(3, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(3, 0, keys_iteration, keys_iteration * 2);
// delete old data
DestroyDB(dbname_, options_);
}
}
}
TEST_F(BackupableDBTest, FileCollision) {
const int keys_iteration = 5000;
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseDBAndBackupEngine();
// If the db directory has been cleaned up, it is sensitive to file
// collision.
DestroyDB(dbname_, options_);
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * 2);
if (sopt != kShareNoChecksum) {
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
} else {
// The new table files created in FillDB() will clash with the old
// backup and sharing tables with no checksum will have the file
// collision problem.
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->PurgeOldBackups(0));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
}
CloseDBAndBackupEngine();
// delete old data
DestroyDB(dbname_, options_);
}
}
// This test verifies that the verifyBackup method correctly identifies // This test verifies that the verifyBackup method correctly identifies
// invalid backups // invalid backups
TEST_P(BackupableDBTestWithParam, VerifyBackup) { TEST_P(BackupableDBTestWithParam, VerifyBackup) {

Loading…
Cancel
Save