From d9f42aa60db1dc66156a2350441129ef3d5aa626 Mon Sep 17 00:00:00 2001 From: Mayank Pundir Date: Wed, 2 Sep 2015 09:03:55 -0700 Subject: [PATCH] Adding a verifyBackup method to BackupEngine Summary: This diff adds a verifyBackup method to BackupEngine. The method verifies the name and size of each file in the backup. Test Plan: Unit test cases created and passing. Reviewers: igor, benj Subscribers: zelaine.fong, yhchiang, sdong, lgalanis, dhruba, AaronFeldman Differential Revision: https://reviews.facebook.net/D46029 --- include/rocksdb/utilities/backupable_db.h | 10 +++++ utilities/backupable/backupable_db.cc | 45 +++++++++++++++++++ utilities/backupable/backupable_db_test.cc | 51 ++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 45c4a9bad..f08752d88 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -197,6 +197,11 @@ class BackupEngineReadOnly { virtual Status RestoreDBFromLatestBackup( const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& restore_options = RestoreOptions()) = 0; + + // checks that each file exists and that the size of the file matches our + // expectations. it does not check file checksum. + // Returns Status::OK() if all checks are good + virtual Status VerifyBackup(BackupID backup_id) = 0; }; // Please see the documentation in BackupableDB and RestoreBackupableDB @@ -223,6 +228,11 @@ class BackupEngine { const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& restore_options = RestoreOptions()) = 0; + // checks that each file exists and that the size of the file matches our + // expectations. it does not check file checksum. + // Returns Status::OK() if all checks are good + virtual Status VerifyBackup(BackupID backup_id) = 0; + virtual Status GarbageCollect() = 0; }; diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index cdcc0a621..a574cb4d8 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -109,6 +109,8 @@ class BackupEngineImpl : public BackupEngine { restore_options); } + virtual Status VerifyBackup(BackupID backup_id) override; + Status Initialize(); private: @@ -1006,6 +1008,45 @@ Status BackupEngineImpl::RestoreDBFromBackup( return s; } +Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { + assert(initialized_); + auto corrupt_itr = corrupt_backups_.find(backup_id); + if (corrupt_itr != corrupt_backups_.end()) { + return corrupt_itr->second.first; + } + + auto backup_itr = backups_.find(backup_id); + if (backup_itr == backups_.end()) { + return Status::NotFound(); + } + + auto& backup = backup_itr->second; + if (backup->Empty()) { + return Status::NotFound(); + } + + Log(options_.info_log, "Verifying backup id %u\n", backup_id); + + uint64_t size; + Status result; + std::string file_path; + for (const auto& file_info : backup->GetFiles()) { + const std::string& file = file_info->filename; + file_path = GetAbsolutePath(file); + result = backup_env_->FileExists(file_path); + if (!result.ok()) { + return result; + } + result = backup_env_->GetFileSize(file_path, &size); + if (!result.ok()) { + return result; + } else if (size != file_info->size) { + return Status::Corruption("File corrupted: " + file); + } + } + return Status::OK(); +} + // this operation HAS to be atomic // writing 4 bytes to the file is atomic alright, but we should *never* // do something like 1. delete file, 2. write new file @@ -1585,6 +1626,10 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly { restore_options); } + virtual Status VerifyBackup(BackupID backup_id) override { + return backup_engine_->VerifyBackup(backup_id); + } + Status Initialize() { return backup_engine_->Initialize(); } private: diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index ba53c688a..266ce6849 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -282,6 +282,24 @@ class FileManager : public EnvWrapper { return Status::NotFound(""); } + Status AppendToRandomFileInDir(const std::string& dir, + const std::string& data) { + std::vector children; + GetChildren(dir, &children); + if (children.size() <= 2) { + return Status::NotFound(""); + } + while (true) { + int i = rnd_.Next() % children.size(); + if (children[i] != "." && children[i] != "..") { + return WriteToFile(dir + "/" + children[i], data); + } + } + // should never get here + assert(false); + return Status::NotFound(""); + } + Status CorruptFile(const std::string& fname, uint64_t bytes_to_corrupt) { std::string file_contents; Status s = ReadFileToString(this, fname, &file_contents); @@ -765,6 +783,39 @@ TEST_F(BackupableDBTest, CorruptionsTest) { AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5); } +// This test verifies that the verifyBackup method correctly identifies +// invalid backups +TEST_F(BackupableDBTest, VerifyBackup) { + const int keys_iteration = 5000; + Random rnd(6); + Status s; + OpenDBAndBackupEngine(true); + // create five backups + for (int i = 0; i < 5; ++i) { + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + } + CloseDBAndBackupEngine(); + + OpenDBAndBackupEngine(); + // ---------- case 1. - valid backup ----------- + ASSERT_TRUE(backup_engine_->VerifyBackup(1).ok()); + + // ---------- case 2. - delete a file -----------i + file_manager_->DeleteRandomFileInDir(backupdir_ + "/private/1"); + ASSERT_TRUE(backup_engine_->VerifyBackup(1).IsNotFound()); + + // ---------- case 3. - corrupt a file ----------- + std::string append_data = "Corrupting a random file"; + file_manager_->AppendToRandomFileInDir(backupdir_ + "/private/2", + append_data); + ASSERT_TRUE(backup_engine_->VerifyBackup(2).IsCorruption()); + + // ---------- case 4. - invalid backup ----------- + ASSERT_TRUE(backup_engine_->VerifyBackup(6).IsNotFound()); + CloseDBAndBackupEngine(); +} + // This test verifies we don't delete the latest backup when read-only option is // set TEST_F(BackupableDBTest, NoDeleteWithReadOnly) {