From 1569dc48f5016eac0817f9cba72f2944a67929a7 Mon Sep 17 00:00:00 2001 From: Zitan Chen <11285749+gg814@users.noreply.github.com> Date: Fri, 26 Jun 2020 11:39:43 -0700 Subject: [PATCH] `BackupEngine::VerifyBackup` verifies checksum by default (#7014) Summary: A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is true by default. So now `BackupEngine::VerifyBackup` verifies backup files with checksum AND file size by default. When `verify_with_checksum` is false, `BackupEngine::VerifyBackup` only compares file sizes to verify backup files. Also add a test for the case when corruption does not change the file size. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7014 Test Plan: Passed backupable_db_test Reviewed By: zhichao-cao Differential Revision: D22165590 Pulled By: gg814 fbshipit-source-id: 606a7450714e868bceb38598c89fd356c6004f4f --- HISTORY.md | 1 + include/rocksdb/utilities/backupable_db.h | 28 +++++-- utilities/backupable/backupable_db.cc | 38 +++++++-- utilities/backupable/backupable_db_test.cc | 89 +++++++++++++++++++++- 4 files changed, 143 insertions(+), 13 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 3c226aab4..1419eaca1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -13,6 +13,7 @@ ### Public API Change * `DB::GetDbSessionId(std::string& session_id)` is added. `session_id` stores a unique identifier that gets reset every time the DB is opened. This DB session ID should be unique among all open DB instances on all hosts, and should be unique among re-openings of the same or other DBs. This identifier is recorded in the LOG file on the line starting with "DB Session ID:". * `DB::OpenForReadOnly()` now returns `Status::NotFound` when the specified DB directory does not exist. Previously the error returned depended on the underlying `Env`. This change is available in all 6.11 releases as well. +* A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is true by default. So now `BackupEngine::VerifyBackup` verifies checksums and file sizes of backup files by default. Pass `false` for `verify_with_checksum` to maintain the previous behavior and performance of `BackupEngine::VerifyBackup`, by only verifying sizes of backup files. ### New Features * DB identity (`db_id`) and DB session identity (`db_session_id`) are added to table properties and stored in SST files. SST files generated from SstFileWriter and Repairer have DB identity “SST Writer” and “DB Repairer”, respectively. Their DB session IDs are generated in the same way as `DB::GetDbSessionId`. The session ID for SstFileWriter (resp., Repairer) resets every time `SstFileWriter::Open` (resp., `Repairer::Run`) is called. diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index f3c3780e7..76b5e549e 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -293,16 +293,23 @@ class BackupEngineReadOnly { return RestoreDBFromLatestBackup(options, db_dir, wal_dir); } + // If verify_with_checksum is true, this function + // inspects the current checksums and file sizes of backup files to see if + // they match our expectation. + // + // If verify_with_checksum is false, this function // checks that each file exists and that the size of the file matches our - // expectations. it does not check file checksum. + // expectation. It does not check file checksum. // // If this BackupEngine created the backup, it compares the files' current - // sizes against the number of bytes written to them during creation. - // Otherwise, it compares the files' current sizes against their sizes when - // the BackupEngine was opened. + // sizes (and current checksum) against the number of bytes written to + // them (and the checksum calculated) during creation. + // Otherwise, it compares the files' current sizes (and checksums) against + // their sizes (and checksums) when the BackupEngine was opened. // // Returns Status::OK() if all checks are good - virtual Status VerifyBackup(BackupID backup_id) = 0; + virtual Status VerifyBackup(BackupID backup_id, + bool verify_with_checksum = true) = 0; }; // A backup engine for creating new backups. @@ -414,10 +421,17 @@ class BackupEngine { return RestoreDBFromLatestBackup(options, db_dir, wal_dir); } + // If verify_with_checksum is true, this function + // inspects the current checksums and file sizes of backup files to see if + // they match our expectation. + // + // If verify_with_checksum is false, this function // checks that each file exists and that the size of the file matches our - // expectations. it does not check file checksum. + // expectation. It does not check file checksum. + // // Returns Status::OK() if all checks are good - virtual Status VerifyBackup(BackupID backup_id) = 0; + virtual Status VerifyBackup(BackupID backup_id, + bool verify_with_checksum = true) = 0; // Will delete any files left over from incomplete creation or deletion of // a backup. This is not normally needed as those operations also clean up diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 50830cc0e..b0d1a6204 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -128,7 +128,8 @@ class BackupEngineImpl : public BackupEngine { wal_dir); } - Status VerifyBackup(BackupID backup_id) override; + Status VerifyBackup(BackupID backup_id, + bool verify_with_checksum = true) override; Status Initialize(); @@ -1297,7 +1298,9 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options, return s; } -Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { +Status BackupEngineImpl::VerifyBackup(BackupID backup_id, + bool verify_with_checksum) { + // Check if backup_id is corrupted, or valid and registered assert(initialized_); auto corrupt_itr = corrupt_backups_.find(backup_id); if (corrupt_itr != corrupt_backups_.end()) { @@ -1316,6 +1319,7 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { ROCKS_LOG_INFO(options_.info_log, "Verifying backup id %u\n", backup_id); + // Find all existing backup files belong to backup_id std::unordered_map curr_abs_path_to_size; for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(), GetSharedFileWithChecksumRel()}) { @@ -1323,13 +1327,36 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size); } + // For all files registered in backup for (const auto& file_info : backup->GetFiles()) { const auto abs_path = GetAbsolutePath(file_info->filename); + // check existence of the file if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) { return Status::NotFound("File missing: " + abs_path); } + // verify file size if (file_info->size != curr_abs_path_to_size[abs_path]) { - return Status::Corruption("File corrupted: " + abs_path); + std::string size_info("Expected file size is " + + ToString(file_info->size) + + " while found file size is " + + ToString(curr_abs_path_to_size[abs_path])); + return Status::Corruption("File corrupted: File size mismatch for " + + abs_path + ": " + size_info); + } + if (verify_with_checksum) { + // verify file checksum + uint32_t checksum_value = 0; + ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n", + abs_path.c_str()); + CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */, + &checksum_value); + if (file_info->checksum_value != checksum_value) { + std::string checksum_info( + "Expected checksum is " + ToString(file_info->checksum_value) + + " while computed checksum is " + ToString(checksum_value)); + return Status::Corruption("File corrupted: Checksum mismatch for " + + abs_path + ": " + checksum_info); + } } } return Status::OK(); @@ -2132,8 +2159,9 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly { return backup_engine_->RestoreDBFromLatestBackup(options, db_dir, wal_dir); } - Status VerifyBackup(BackupID backup_id) override { - return backup_engine_->VerifyBackup(backup_id); + Status VerifyBackup(BackupID backup_id, + bool verify_with_checksum = true) override { + return backup_engine_->VerifyBackup(backup_id, verify_with_checksum); } Status Initialize() { return backup_engine_->Initialize(); } diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 217e27676..cbb912e7d 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -376,6 +376,27 @@ class FileManager : public EnvWrapper { public: explicit FileManager(Env* t) : EnvWrapper(t), rnd_(5) {} + Status GetRandomFileInDir(const std::string& dir, std::string* fname, + uint64_t* fsize) { + std::vector children; + GetChildrenFileAttributes(dir, &children); + if (children.size() <= 2) { // . and .. + return Status::NotFound("Empty directory: " + dir); + } + assert(fname != nullptr); + while (true) { + int i = rnd_.Next() % children.size(); + if (children[i].name != "." && children[i].name != "..") { + fname->assign(dir + "/" + children[i].name); + *fsize = children[i].size_bytes; + return Status::OK(); + } + } + // should never get here + assert(false); + return Status::NotFound(""); + } + Status DeleteRandomFileInDir(const std::string& dir) { std::vector children; GetChildren(dir, &children); @@ -702,7 +723,6 @@ class BackupableDBTestWithParam : public BackupableDBTest, // invalid backups TEST_P(BackupableDBTestWithParam, VerifyBackup) { const int keys_iteration = 5000; - Random rnd(6); Status s; OpenDBAndBackupEngine(true); // create five backups @@ -1050,6 +1070,73 @@ TEST_F(BackupableDBTest, CorruptionsTest) { AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5); } +// Corrupt a file but maintain its size +TEST_F(BackupableDBTest, CorruptFileMaintainSize) { + const int keys_iteration = 5000; + Status s; + OpenDBAndBackupEngine(true); + // create a backup + FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + CloseDBAndBackupEngine(); + + OpenDBAndBackupEngine(); + // verify with file size + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + // verify with file checksum + ASSERT_OK(backup_engine_->VerifyBackup(1, true)); + + std::string file_to_corrupt; + uint64_t file_size = 0; + // under normal circumstance, there should be at least one nonempty file + while (file_size == 0) { + // get a random file in /private/1 + ASSERT_OK(file_manager_->GetRandomFileInDir(backupdir_ + "/private/1", + &file_to_corrupt, &file_size)); + // corrupt the file by replacing its content by file_size random bytes + ASSERT_OK(file_manager_->CorruptFile(file_to_corrupt, file_size)); + } + // file sizes match + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + // file checksums mismatch + ASSERT_NOK(backup_engine_->VerifyBackup(1, true)); + // sanity check, use default second argument + ASSERT_NOK(backup_engine_->VerifyBackup(1)); + CloseDBAndBackupEngine(); + + // an extra challenge + // set share_files_with_checksum to true and do two more backups + // corrupt all the table files in shared_checksum but maintain their sizes + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, + kShareWithChecksum); + // creat two backups + for (int i = 1; i < 3; ++i) { + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + } + CloseDBAndBackupEngine(); + + OpenDBAndBackupEngine(); + std::vector children; + const std::string dir = backupdir_ + "/shared_checksum"; + ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children)); + for (const auto& child : children) { + if (child.name == "." || child.name == ".." || child.size_bytes == 0) { + continue; + } + // corrupt the file by replacing its content by file_size random bytes + ASSERT_OK( + file_manager_->CorruptFile(dir + "/" + child.name, child.size_bytes)); + } + // file sizes match + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + ASSERT_OK(backup_engine_->VerifyBackup(2, false)); + // file checksums mismatch + ASSERT_NOK(backup_engine_->VerifyBackup(1, true)); + ASSERT_NOK(backup_engine_->VerifyBackup(2, true)); + CloseDBAndBackupEngine(); +} + TEST_F(BackupableDBTest, InterruptCreationTest) { // Interrupt backup creation by failing new writes and failing cleanup of the // partial state. Then verify a subsequent backup can still succeed.