diff --git a/HISTORY.md b/HISTORY.md index 7e689a855..28db16720 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,10 @@ ### Bug fixes * Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound. +### New Features +* A new option `std::shared_ptr file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`. +* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions. + ## 6.12 (2020-07-28) ### Public API Change diff --git a/db/version_edit.h b/db/version_edit.h index f93092eb0..a84e3de09 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -486,6 +486,10 @@ class VersionEdit { return is_column_family_add_ || is_column_family_drop_; } + bool IsColumnFamilyAdd() const { return is_column_family_add_; } + + bool IsColumnFamilyDrop() const { return is_column_family_drop_; } + void MarkAtomicGroup(uint32_t remaining_entries) { is_in_atomic_group_ = true; remaining_entries_ = remaining_entries; diff --git a/include/rocksdb/file_checksum.h b/include/rocksdb/file_checksum.h index 6e38528c0..37b1744ce 100644 --- a/include/rocksdb/file_checksum.h +++ b/include/rocksdb/file_checksum.h @@ -22,9 +22,18 @@ namespace ROCKSDB_NAMESPACE { constexpr char kUnknownFileChecksum[] = ""; // The unknown sst file checksum function name. constexpr char kUnknownFileChecksumFuncName[] = "Unknown"; +// The standard DB file checksum function name. +// This is the name of the checksum function returned by +// GetFileChecksumGenCrc32cFactory(); +constexpr char kStandardDbFileChecksumFuncName[] = "FileChecksumCrc32c"; struct FileChecksumGenContext { std::string file_name; + // The name of the requested checksum generator. + // Checksum factories may use or ignore requested_checksum_func_name, + // and checksum factories written before this field was available are still + // compatible. + std::string requested_checksum_func_name; }; // FileChecksumGenerator is the class to generates the checksum value diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 91723a1d0..6c1fad22f 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -24,10 +24,8 @@ namespace ROCKSDB_NAMESPACE { -// The default DB file checksum function name. -constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c"; // The default BackupEngine file checksum function name. -constexpr char kBackupFileChecksumFuncName[] = "crc32c"; +constexpr char kDefaultBackupFileChecksumFuncName[] = "crc32c"; // BackupTableNameOption describes possible naming schemes for backup // table file names when the table files are stored in the shared_checksum @@ -165,6 +163,33 @@ struct BackupableDBOptions { // db_session_id as a fallback. BackupTableNameOption share_files_with_checksum_naming; + // Option for custom checksum functions. + // When this option is nullptr, BackupEngine will use its default crc32c as + // the checksum function. + // + // When it is not nullptr, BackupEngine will try to find in the factory the + // checksum function that DB used to calculate the file checksums. If such a + // function is found, BackupEngine will use it to create, verify, or restore + // backups, in addition to the default crc32c checksum function. If such a + // function is not found, BackupEngine will return Status::InvalidArgument(). + // Therefore, this option comes into effect only if DB has a custom checksum + // factory and this option is set to the same factory. + // + // + // Note: If share_files_with_checksum and share_table_files are true, + // the appeared in the table filenames will be the custom checksum + // value if db session ids are available (namely, table file naming options + // is kOptionalChecksumAndDbSessionId and the db session ids obtained from + // the table files are nonempty). + // + // Note: We do not require the same setting to this option for backup + // restoration or verification as was set during backup creation but we + // strongly recommend setting it to the same as the DB file checksum function + // for all BackupEngine interactions when practical. + // + // Default: nullptr + std::shared_ptr file_checksum_gen_factory; + void Dump(Logger* logger) const; explicit BackupableDBOptions( @@ -176,7 +201,9 @@ struct BackupableDBOptions { uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024, int _max_valid_backups_to_open = INT_MAX, BackupTableNameOption _share_files_with_checksum_naming = - kOptionalChecksumAndDbSessionId) + kOptionalChecksumAndDbSessionId, + std::shared_ptr _file_checksum_gen_factory = + nullptr) : backup_dir(_backup_dir), backup_env(_backup_env), share_table_files(_share_table_files), @@ -190,7 +217,8 @@ struct BackupableDBOptions { max_background_operations(_max_background_operations), callback_trigger_interval_size(_callback_trigger_interval_size), max_valid_backups_to_open(_max_valid_backups_to_open), - share_files_with_checksum_naming(_share_files_with_checksum_naming) { + share_files_with_checksum_naming(_share_files_with_checksum_naming), + file_checksum_gen_factory(_file_checksum_gen_factory) { assert(share_table_files || !share_files_with_checksum); } }; @@ -327,16 +355,18 @@ class BackupEngineReadOnly { } // If verify_with_checksum is true, this function - // inspects the current checksums and file sizes of backup files to see if - // they match our expectation. + // inspects the default crc32c checksums and file sizes of backup files to + // see if they match our expectation. This function further inspects the + // custom checksums if BackupableDBOptions::file_checksum_gen_factory is + // the same as DBOptions::file_checksum_gen_factory. // // If verify_with_checksum is false, this function // checks that each file exists and that the size of the file matches our // expectation. It does not check file checksum. // // If this BackupEngine created the backup, it compares the files' current - // sizes (and current checksum) against the number of bytes written to - // them (and the checksum calculated) during creation. + // sizes (and current checksums) against the number of bytes written to + // them (and the checksums calculated) during creation. // Otherwise, it compares the files' current sizes (and checksums) against // their sizes (and checksums) when the BackupEngine was opened. // @@ -456,7 +486,9 @@ class BackupEngine { // If verify_with_checksum is true, this function // inspects the current checksums and file sizes of backup files to see if - // they match our expectation. + // they match our expectation. It further inspects the custom checksums + // if BackupableDBOptions::file_checksum_gen_factory is the same as + // DBOptions::file_checksum_gen_factory. // // If verify_with_checksum is false, this function // checks that each file exists and that the size of the file matches our diff --git a/util/file_checksum_helper.h b/util/file_checksum_helper.h index d9a3c8e47..ce56f7353 100644 --- a/util/file_checksum_helper.h +++ b/util/file_checksum_helper.h @@ -49,8 +49,13 @@ class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory { public: std::unique_ptr CreateFileChecksumGenerator( const FileChecksumGenContext& context) override { - return std::unique_ptr( - new FileChecksumGenCrc32c(context)); + if (context.requested_checksum_func_name.empty() || + context.requested_checksum_func_name == "FileChecksumCrc32c") { + return std::unique_ptr( + new FileChecksumGenCrc32c(context)); + } else { + return nullptr; + } } const char* Name() const override { return "FileChecksumGenCrc32cFactory"; } diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 413bdb6de..bcb618d8c 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -28,6 +28,7 @@ #include #include +#include "db/log_reader.h" #include "env/composite_env_wrapper.h" #include "file/filename.h" #include "file/sequence_file_reader.h" @@ -60,6 +61,22 @@ inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) { PutFixed32(&checksum_str, EndianSwapValue(checksum_value)); return ChecksumStrToHex(checksum_str); } +// Checks if the checksum function names are the same. Note that both the +// backup default checksum function and the db standard checksum function are +// crc32c although they have different names. So We treat the db standard +// checksum function name and the backup default checksum function name as +// the same name. +inline bool IsSameChecksumFunc(const std::string& dst_checksum_func_name, + const std::string& src_checksum_func_name) { + return (dst_checksum_func_name == src_checksum_func_name) || + ((dst_checksum_func_name == kDefaultBackupFileChecksumFuncName) && + (src_checksum_func_name == kStandardDbFileChecksumFuncName)) || + ((src_checksum_func_name == kDefaultBackupFileChecksumFuncName) && + (dst_checksum_func_name == kStandardDbFileChecksumFuncName)); +} +inline bool IsSstFile(const std::string& fname) { + return fname.length() > 4 && fname.rfind(".sst") == fname.length() - 4; +} } // namespace void BackupStatistics::IncrementNumberSuccessBackup() { @@ -166,11 +183,15 @@ class BackupEngineImpl : public BackupEngine { struct FileInfo { FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum, - const std::string& id = "", const std::string& sid = "") + const std::string& custom_checksum, + const std::string& checksum_name, const std::string& id = "", + const std::string& sid = "") : refs(0), filename(fname), size(sz), checksum_hex(checksum), + custom_checksum_hex(custom_checksum), + checksum_func_name(checksum_name), db_id(id), db_session_id(sid) {} @@ -181,6 +202,8 @@ class BackupEngineImpl : public BackupEngine { const std::string filename; const uint64_t size; const std::string checksum_hex; + const std::string custom_checksum_hex; + const std::string checksum_func_name; // DB identities // db_id is obtained for potential usage in the future but not used // currently @@ -358,6 +381,78 @@ class BackupEngineImpl : public BackupEngine { return GetBackupMetaDir() + "/" + (tmp ? "." : "") + ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : ""); } + inline Status GetFileNameInfo(const std::string& file, + std::string& local_name, uint64_t& number, + FileType& type) const { + // 1. extract the filename + size_t last_slash = file.find_last_of('/'); + // file will either be shared/, shared_checksum/, + // shared_checksum/, shared_checksum/, + // or private// + assert(last_slash != std::string::npos); + local_name = file.substr(last_slash + 1); + + // if the file was in shared_checksum, extract the real file name + // in this case the file is __., + // _., or __. + if (file.substr(0, last_slash) == GetSharedChecksumDirRel()) { + local_name = GetFileFromChecksumFile(local_name); + } + + // 2. find the filetype + bool ok = ParseFileName(local_name, &number, &type); + if (!ok) { + return Status::Corruption("Backup corrupted: Fail to parse filename " + + local_name); + } + return Status::OK(); + } + inline bool HasCustomChecksumGenFactory() const { + return options_.file_checksum_gen_factory != nullptr; + } + // Returns nullptr if file_checksum_gen_factory is not set or + // file_checksum_gen_factory is not able to create a generator with + // name being requested_checksum_func_name + inline std::unique_ptr GetCustomChecksumGenerator( + const std::string& requested_checksum_func_name = "") const { + std::shared_ptr checksum_factory = + options_.file_checksum_gen_factory; + if (checksum_factory == nullptr) { + return nullptr; + } else { + FileChecksumGenContext gen_context; + gen_context.requested_checksum_func_name = requested_checksum_func_name; + return checksum_factory->CreateFileChecksumGenerator(gen_context); + } + } + // Set the checksum generator by the requested checksum function name + inline Status SetChecksumGenerator( + const std::string& requested_checksum_func_name, + std::unique_ptr& checksum_func) { + if (requested_checksum_func_name != kDefaultBackupFileChecksumFuncName) { + if (!HasCustomChecksumGenFactory()) { + // No custom checksum factory indicates users would like to use the + // backup default checksum function and accept the degraded data + // integrity checking + return Status::OK(); + } else { + checksum_func = + GetCustomChecksumGenerator(requested_checksum_func_name); + // we will use the default backup checksum function if the custom + // checksum functions is the db standard checksum function but is not + // found in the checksum factory passed in; otherwise, we return + // Status::InvalidArgument() + if (checksum_func == nullptr && + requested_checksum_func_name != kStandardDbFileChecksumFuncName) { + return Status::InvalidArgument("Checksum checksum function " + + requested_checksum_func_name + + " not found"); + } + } + } + // The requested checksum function is the default backup checksum function + return Status::OK(); + } // If size_limit == 0, there is no size limit, copy everything. // @@ -369,22 +464,47 @@ class BackupEngineImpl : public BackupEngine { const std::string& src, const std::string& dst, const std::string& contents, Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter, - uint64_t* size = nullptr, std::string* checksum_hex = nullptr, - uint64_t size_limit = 0, + const std::string& backup_checksum_func_name, uint64_t* size = nullptr, + std::string* checksum_hex = nullptr, + std::string* custom_checksum_hex = nullptr, uint64_t size_limit = 0, std::function progress_callback = []() {}); - Status CalculateChecksum(const std::string& src, Env* src_env, - const EnvOptions& src_env_options, - uint64_t size_limit, std::string* checksum_hex); + Status CalculateChecksum( + const std::string& src, Env* src_env, const EnvOptions& src_env_options, + uint64_t size_limit, std::string* checksum_hex, + const std::unique_ptr& checksum_func = nullptr, + std::string* custom_checksum_hex = nullptr); // Obtain db_id and db_session_id from the table properties of file_path Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options, const std::string& file_path, std::string* db_id, std::string* db_session_id); + Status GetFileChecksumsFromManifestInBackup(Env* src_env, + const BackupID& backup_id, + const BackupMeta* backup, + FileChecksumList* checksum_list); + + Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path, + FileChecksumList* checksum_list); + + Status VerifyFileWithCrc32c(Env* src_env, const BackupMeta* backup, + const std::string& rel_path); + + struct LogReporter : public log::Reader::Reporter { + Status* status; + virtual void Corruption(size_t /*bytes*/, const Status& s) override { + if (status->ok()) { + *status = s; + } + } + }; + struct CopyOrCreateResult { uint64_t size; std::string checksum_hex; + std::string custom_checksum_hex; + std::string checksum_func_name; std::string db_id; std::string db_session_id; Status status; @@ -408,6 +528,7 @@ class BackupEngineImpl : public BackupEngine { bool verify_checksum_after_work; std::string src_checksum_func_name; std::string src_checksum_hex; + std::string backup_checksum_func_name; std::string db_id; std::string db_session_id; @@ -424,6 +545,7 @@ class BackupEngineImpl : public BackupEngine { verify_checksum_after_work(false), src_checksum_func_name(kUnknownFileChecksumFuncName), src_checksum_hex(""), + backup_checksum_func_name(kUnknownFileChecksumFuncName), db_id(""), db_session_id("") {} @@ -449,6 +571,7 @@ class BackupEngineImpl : public BackupEngine { verify_checksum_after_work = o.verify_checksum_after_work; src_checksum_func_name = std::move(o.src_checksum_func_name); src_checksum_hex = std::move(o.src_checksum_hex); + backup_checksum_func_name = std::move(o.backup_checksum_func_name); db_id = std::move(o.db_id); db_session_id = std::move(o.db_session_id); return *this; @@ -463,6 +586,8 @@ class BackupEngineImpl : public BackupEngine { const std::string& _src_checksum_func_name = kUnknownFileChecksumFuncName, const std::string& _src_checksum_hex = "", + const std::string& _backup_checksum_func_name = + kUnknownFileChecksumFuncName, const std::string& _db_id = "", const std::string& _db_session_id = "") : src_path(std::move(_src_path)), dst_path(std::move(_dst_path)), @@ -477,6 +602,7 @@ class BackupEngineImpl : public BackupEngine { verify_checksum_after_work(_verify_checksum_after_work), src_checksum_func_name(_src_checksum_func_name), src_checksum_hex(_src_checksum_hex), + backup_checksum_func_name(_backup_checksum_func_name), db_id(_db_id), db_session_id(_db_session_id) {} }; @@ -858,33 +984,49 @@ Status BackupEngineImpl::Initialize() { result.status = CopyOrCreateFile( work_item.src_path, work_item.dst_path, work_item.contents, work_item.src_env, work_item.dst_env, work_item.src_env_options, - work_item.sync, work_item.rate_limiter, &result.size, - &result.checksum_hex, work_item.size_limit, - work_item.progress_callback); + work_item.sync, work_item.rate_limiter, + work_item.backup_checksum_func_name, &result.size, + &result.checksum_hex, &result.custom_checksum_hex, + work_item.size_limit, work_item.progress_callback); + result.checksum_func_name = work_item.backup_checksum_func_name; result.db_id = work_item.db_id; result.db_session_id = work_item.db_session_id; if (result.status.ok() && work_item.verify_checksum_after_work) { - // unknown checksum function name implies no db table file checksum in - // db manifest; work_item.verify_checksum_after_work being true means - // backup engine has calculated its crc32c checksum for the table - // file; therefore, we are able to compare the checksums. + // work_item.verify_checksum_after_work being true means backup engine + // has obtained its crc32c and/or custom checksum for the table file. + // Therefore, we can try to compare the checksums if possible. if (work_item.src_checksum_func_name == kUnknownFileChecksumFuncName || - work_item.src_checksum_func_name == kDbFileChecksumFuncName) { - if (work_item.src_checksum_hex != result.checksum_hex) { + IsSameChecksumFunc(result.checksum_func_name, + work_item.src_checksum_func_name)) { + std::string checksum_to_compare; + std::string checksum_func_name_used; + if (work_item.src_checksum_func_name == + kUnknownFileChecksumFuncName || + work_item.src_checksum_func_name == + kStandardDbFileChecksumFuncName) { + // kUnknownFileChecksumFuncName implies no table file checksums in + // db manifest, but we can compare using the crc32c checksum + checksum_to_compare = result.checksum_hex; + checksum_func_name_used = kStandardDbFileChecksumFuncName; + } else { + checksum_to_compare = result.custom_checksum_hex; + checksum_func_name_used = work_item.src_checksum_func_name; + } + if (work_item.src_checksum_hex != checksum_to_compare) { std::string checksum_info( "Expected checksum is " + work_item.src_checksum_hex + - " while computed checksum is " + result.checksum_hex); - result.status = - Status::Corruption("Checksum mismatch after copying to " + - work_item.dst_path + ": " + checksum_info); + " while computed checksum is " + checksum_to_compare); + result.status = Status::Corruption( + checksum_func_name_used + " mismatch after copying to " + + work_item.dst_path + ": " + checksum_info); } } else { std::string checksum_function_info( "Existing checksum function is " + work_item.src_checksum_func_name + " while provided checksum function is " + - kBackupFileChecksumFuncName); + result.checksum_func_name); ROCKS_LOG_INFO( options_.info_log, "Unable to verify checksum after copying to %s: %s\n", @@ -972,15 +1114,6 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( CheckpointImpl checkpoint(db); uint64_t sequence_number = 0; DBOptions db_options = db->GetDBOptions(); - FileChecksumGenFactory* db_checksum_factory = - db_options.file_checksum_gen_factory.get(); - const std::string kFileChecksumGenFactoryName = - "FileChecksumGenCrc32cFactory"; - bool compare_checksum = - db_checksum_factory != nullptr && - db_checksum_factory->Name() == kFileChecksumGenFactoryName - ? true - : false; EnvOptions src_raw_env_options(db_options); s = checkpoint.CreateCustomCheckpoint( db_options, @@ -1046,7 +1179,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( options.progress_callback, contents); } /* create_file_cb */, &sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64, - compare_checksum); + db_options.file_checksum_gen_factory == nullptr ? false : true); if (s.ok()) { new_backup->SetSequenceNumber(sequence_number); } @@ -1063,7 +1196,8 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( } if (item_status.ok()) { item_status = new_backup.get()->AddFile(std::make_shared( - item.dst_relative, result.size, result.checksum_hex, result.db_id, + item.dst_relative, result.size, result.checksum_hex, + result.custom_checksum_hex, result.checksum_func_name, result.db_id, result.db_session_id)); } if (!item_status.ok()) { @@ -1324,49 +1458,74 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options, DeleteChildren(db_dir); } + Status s; + // Try to obtain checksum info from backuped DB MANIFEST + // The checksum info will be used for validating the checksums of the table + // files after restoration, in addtion to the default backup engine crc32c + // checksums. + std::unique_ptr checksum_list(NewFileChecksumList()); + s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id, backup.get(), + checksum_list.get()); + if (!s.ok()) { + return s; + } + RateLimiter* rate_limiter = options_.restore_rate_limiter.get(); if (rate_limiter) { - copy_file_buffer_size_ = static_cast(rate_limiter->GetSingleBurstBytes()); + copy_file_buffer_size_ = + static_cast(rate_limiter->GetSingleBurstBytes()); } - Status s; std::vector restore_items_to_finish; for (const auto& file_info : backup->GetFiles()) { const std::string& file = file_info->filename; std::string dst; - // 1. extract the filename - size_t slash = file.find_last_of('/'); - // file will either be shared/, shared_checksum/, - // shared_checksum/, shared_checksum/, - // or private// - assert(slash != std::string::npos); - dst = file.substr(slash + 1); - - // if the file was in shared_checksum, extract the real file name - // in this case the file is __., - // _., or __. - if (file.substr(0, slash) == GetSharedChecksumDirRel()) { - dst = GetFileFromChecksumFile(dst); - } - - // 2. find the filetype uint64_t number; FileType type; - bool ok = ParseFileName(dst, &number, &type); - if (!ok) { - return Status::Corruption("Backup corrupted: Fail to parse filename " + - dst); + s = GetFileNameInfo(file, dst, number, type); + if (!s.ok()) { + return s; + } + + std::string src_checksum_func_name = kUnknownFileChecksumFuncName; + std::string src_checksum_str = kUnknownFileChecksum; + std::string src_checksum_hex; + bool has_manifest_checksum = false; + if (type == kTableFile) { + Status file_checksum_status = checksum_list->SearchOneFileChecksum( + number, &src_checksum_str, &src_checksum_func_name); + if (file_checksum_status.ok() && + src_checksum_str != kUnknownFileChecksum && + src_checksum_func_name != kUnknownFileChecksumFuncName) { + src_checksum_hex = ChecksumStrToHex(src_checksum_str); + has_manifest_checksum = true; + } } - // 3. Construct the final path + + // Construct the final path // kLogFile lives in wal_dir and all the rest live in db_dir dst = ((type == kLogFile) ? wal_dir : db_dir) + "/" + dst; ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(), dst.c_str()); + + std::string backup_checksum_func_name = file_info->checksum_func_name; + std::unique_ptr checksum_func; + if (src_checksum_func_name != kUnknownFileChecksumFuncName) { + s = SetChecksumGenerator(src_checksum_func_name, checksum_func); + if (!s.ok()) { + return s; + } + if (checksum_func != nullptr) { + backup_checksum_func_name = checksum_func->Name(); + } + } CopyOrCreateWorkItem copy_or_create_work_item( GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_, EnvOptions() /* src_env_options */, false, rate_limiter, - 0 /* size_limit */); + 0 /* size_limit */, []() {} /* progress_callback */, + has_manifest_checksum, src_checksum_func_name, src_checksum_hex, + backup_checksum_func_name); RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item( copy_or_create_work_item.result.get_future(), file_info->checksum_hex); files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); @@ -1384,7 +1543,11 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options, s = item_status; break; } else if (item.checksum_hex != result.checksum_hex) { - s = Status::Corruption("Checksum check failed"); + // Compare crc32c checksums (especially for non-table files) + std::string checksum_info("Expected checksum is " + item.checksum_hex + + " while computed checksum is " + + result.checksum_hex); + s = Status::Corruption("Crc32c checksum check failed: " + checksum_info); break; } } @@ -1423,6 +1586,17 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id, InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size); } + Status s; + std::unique_ptr checksum_list(NewFileChecksumList()); + if (verify_with_checksum) { + // Try to obtain checksum info from backuped DB MANIFEST + s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id, + backup.get(), checksum_list.get()); + if (!s.ok()) { + return s; + } + } + // For all files registered in backup for (const auto& file_info : backup->GetFiles()) { const auto abs_path = GetAbsolutePath(file_info->filename); @@ -1441,27 +1615,68 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id, } if (verify_with_checksum) { // verify file checksum - std::string checksum_hex; + // try setting checksum_func + std::unique_ptr checksum_func; + std::string src_checksum_func_name = kUnknownFileChecksumFuncName; + std::string src_checksum_str = kUnknownFileChecksum; + std::string src_checksum_hex; + if (IsSstFile(file_info->filename)) { + const std::string& file = file_info->filename; + std::string local_name; + uint64_t number; + FileType type; + s = GetFileNameInfo(file, local_name, number, type); + if (!s.ok()) { + return s; + } + assert(type == kTableFile); + + // Try to get checksum for the table file + Status file_checksum_status = checksum_list->SearchOneFileChecksum( + number, &src_checksum_str, &src_checksum_func_name); + if (file_checksum_status.ok() && + src_checksum_str != kUnknownFileChecksum && + src_checksum_func_name != kUnknownFileChecksumFuncName) { + s = SetChecksumGenerator(src_checksum_func_name, checksum_func); + if (!s.ok()) { + return s; + } + src_checksum_hex = ChecksumStrToHex(src_checksum_str); + } + } + ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n", abs_path.c_str()); + std::string checksum_hex; + std::string custom_checksum_hex; CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */, - &checksum_hex); + &checksum_hex, checksum_func, &custom_checksum_hex); if (file_info->checksum_hex != checksum_hex) { std::string checksum_info( "Expected checksum is " + file_info->checksum_hex + " while computed checksum is " + checksum_hex); - return Status::Corruption("File corrupted: Checksum mismatch for " + + return Status::Corruption("File corrupted: crc32c mismatch for " + abs_path + ": " + checksum_info); } + if (checksum_func != nullptr && src_checksum_hex != custom_checksum_hex) { + std::string checksum_info("Expected checksum is " + src_checksum_hex + + " while computed checksum is " + + custom_checksum_hex); + return Status::Corruption("File corrupted: " + src_checksum_func_name + + " mismatch for " + abs_path + ": " + + checksum_info); + } } } + return Status::OK(); } Status BackupEngineImpl::CopyOrCreateFile( const std::string& src, const std::string& dst, const std::string& contents, Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync, - RateLimiter* rate_limiter, uint64_t* size, std::string* checksum_hex, + RateLimiter* rate_limiter, const std::string& backup_checksum_func_name, + uint64_t* size, std::string* checksum_hex, std::string* custom_checksum_hex, uint64_t size_limit, std::function progress_callback) { assert(src.empty() != contents.empty()); Status s; @@ -1475,6 +1690,13 @@ Status BackupEngineImpl::CopyOrCreateFile( } uint32_t checksum_value = 0; + // Get custom checksum function + std::unique_ptr checksum_func; + s = SetChecksumGenerator(backup_checksum_func_name, checksum_func); + if (!s.ok()) { + return s; + } + // Check if size limit is set. if not, set it to very big number if (size_limit == 0) { size_limit = std::numeric_limits::max(); @@ -1529,6 +1751,10 @@ Status BackupEngineImpl::CopyOrCreateFile( if (checksum_hex != nullptr) { checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); } + if (checksum_func != nullptr && custom_checksum_hex != nullptr) { + checksum_func->Update(data.data(), data.size()); + } + s = dest_writer->Append(data); if (rate_limiter != nullptr) { rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */, @@ -1541,10 +1767,14 @@ Status BackupEngineImpl::CopyOrCreateFile( } } while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0); - // Convert uint32_t checksum to hex checksum if (checksum_hex != nullptr) { + // Convert uint32_t checksum to hex checksum checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); } + if (checksum_func != nullptr && custom_checksum_hex != nullptr) { + checksum_func->Finalize(); + custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum())); + } if (s.ok() && sync) { s = dest_writer->Sync(false); @@ -1572,27 +1802,50 @@ Status BackupEngineImpl::AddBackupFileWorkItem( std::string dst_relative_tmp; Status s; std::string checksum_hex; + std::string custom_checksum_hex; + // The function name of backup checksum function. + std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName; std::string db_id; std::string db_session_id; // whether the checksum for a table file is available bool has_checksum = false; - // Whenever a default checksum function name is passed in, we will compares - // the corresponding checksum values after copying. Note that only table files - // may have a known checksum function name passed in. + // Set up the custom checksum function. + // A nullptr checksum_func indicates the default backup checksum function + // will be used. If checksum_func is not nullptr, then both the default + // backup checksum function and checksum_func will be used. + std::unique_ptr checksum_func; + if (src_checksum_func_name != kUnknownFileChecksumFuncName) { + // DB files have checksum functions + s = SetChecksumGenerator(src_checksum_func_name, checksum_func); + if (!s.ok()) { + return s; + } + if (checksum_func != nullptr) { + backup_checksum_func_name = checksum_func->Name(); + } + } + + // Whenever the db checksum function name matches the backup engine custom + // checksum function name, we will compare the checksum values after copying. + // Note that only table files may have a known checksum name passed in. // - // If no default checksum function name is passed in and db session id is not + // If the checksum function names do not match and db session id is not // available, we will calculate the checksum *before* copying in two cases // (we always calcuate checksums when copying or creating for any file types): // a) share_files_with_checksum is true and file type is table; // b) share_table_files is true and the file exists already. // - // Step 0: Check if default checksum function name is passed in - if (kDbFileChecksumFuncName == src_checksum_func_name) { + // Step 0: Check if a known checksum function name is passed in + if (IsSameChecksumFunc(backup_checksum_func_name, src_checksum_func_name)) { if (src_checksum_str == kUnknownFileChecksum) { return Status::Aborted("Unknown checksum value for " + fname); } - checksum_hex = ChecksumStrToHex(src_checksum_str); + if (checksum_func == nullptr) { + checksum_hex = ChecksumStrToHex(src_checksum_str); + } else { + custom_checksum_hex = ChecksumStrToHex(src_checksum_str); + } has_checksum = true; } @@ -1611,7 +1864,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( // the shared_checksum directory. if (!has_checksum && db_session_id.empty()) { s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, - size_limit, &checksum_hex); + size_limit, &checksum_hex, checksum_func, + &custom_checksum_hex); if (!s.ok()) { return s; } @@ -1630,8 +1884,14 @@ Status BackupEngineImpl::AddBackupFileWorkItem( // shared_checksum/_.sst // Otherwise, dst_relative is of the form // shared_checksum/__.sst + // + // Also, we display custom checksums in the name if possible. dst_relative = GetSharedFileWithChecksum( - dst_relative, has_checksum, checksum_hex, size_bytes, db_session_id); + dst_relative, has_checksum, + checksum_func == nullptr || !UseSessionId(db_session_id) + ? checksum_hex + : custom_checksum_hex, + size_bytes, db_session_id); dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); } else if (shared) { @@ -1698,7 +1958,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( // file exists and referenced if (!has_checksum) { s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, - size_limit, &checksum_hex); + size_limit, &checksum_hex, checksum_func, + &custom_checksum_hex); if (!s.ok()) { return s; } @@ -1717,6 +1978,11 @@ Status BackupEngineImpl::AddBackupFileWorkItem( fname.c_str(), checksum_hex.c_str(), size_bytes); } } + if (checksum_func != nullptr) { + ROCKS_LOG_INFO(options_.info_log, "%s checksum is %s", + backup_checksum_func_name.c_str(), + custom_checksum_hex.c_str()); + } } else if (backuped_file_infos_.find(dst_relative) == backuped_file_infos_.end() && !same_path) { @@ -1735,7 +2001,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( "%s already present, calculate checksum", fname.c_str()); if (!has_checksum) { s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, - size_limit, &checksum_hex); + size_limit, &checksum_hex, checksum_func, + &custom_checksum_hex); if (!s.ok()) { return s; } @@ -1753,7 +2020,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents, db_env_, backup_env_, src_env_options, options_.sync, rate_limiter, size_limit, progress_callback, has_checksum, src_checksum_func_name, - checksum_hex, db_id, db_session_id); + checksum_func == nullptr ? checksum_hex : custom_checksum_hex, + backup_checksum_func_name, db_id, db_session_id); BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( copy_or_create_work_item.result.get_future(), shared, need_to_copy, backup_env_, temp_dest_path, final_dest_path, dst_relative); @@ -1769,6 +2037,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( result.status = s; result.size = size_bytes; result.checksum_hex = std::move(checksum_hex); + result.custom_checksum_hex = std::move(custom_checksum_hex); + result.checksum_func_name = std::move(backup_checksum_func_name); result.db_id = std::move(db_id); result.db_session_id = std::move(db_session_id); promise_result.set_value(std::move(result)); @@ -1776,14 +2046,16 @@ Status BackupEngineImpl::AddBackupFileWorkItem( return s; } -Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, - const EnvOptions& src_env_options, - uint64_t size_limit, - std::string* checksum_hex) { +Status BackupEngineImpl::CalculateChecksum( + const std::string& src, Env* src_env, const EnvOptions& src_env_options, + uint64_t size_limit, std::string* checksum_hex, + const std::unique_ptr& checksum_func, + std::string* custom_checksum_hex) { if (checksum_hex == nullptr) { - return Status::Aborted("Checksum pointer is null"); + return Status::InvalidArgument("Checksum pointer is null"); } uint32_t checksum_value = 0; + if (size_limit == 0) { size_limit = std::numeric_limits::max(); } @@ -1812,10 +2084,18 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, } size_limit -= data.size(); + checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); + if (checksum_func != nullptr && custom_checksum_hex != nullptr) { + checksum_func->Update(data.data(), data.size()); + } } while (data.size() > 0 && size_limit > 0); checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); + if (checksum_func != nullptr && custom_checksum_hex != nullptr) { + checksum_func->Finalize(); + custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum())); + } return s; } @@ -1873,6 +2153,152 @@ Status BackupEngineImpl::GetFileDbIdentities(Env* src_env, return s; } } +Status BackupEngineImpl::GetFileChecksumsFromManifestInBackup( + Env* src_env, const BackupID& backup_id, const BackupMeta* backup, + FileChecksumList* checksum_list) { + if (checksum_list == nullptr) { + return Status::InvalidArgument("checksum_list is nullptr"); + } + + checksum_list->reset(); + Status s; + + // Read CURRENT file to get the latest DB MANIFEST filename in backup_id + // and then read the the MANIFEST file to obtain the checksum info stored + // in the file. + std::string current_rel_path = + GetPrivateFileRel(backup_id, false /* tmp */, "CURRENT"); + s = VerifyFileWithCrc32c(src_env, backup, current_rel_path); + if (!s.ok()) { + return s; + } + + std::string manifest_filename; + s = ReadFileToString(src_env, GetAbsolutePath(current_rel_path), + &manifest_filename); + if (!s.ok()) { + return s; + } + // Remove tailing '\n' if any + while (!manifest_filename.empty() && manifest_filename.back() == '\n') { + manifest_filename.pop_back(); + } + + std::string manifest_rel_path = + GetPrivateFileRel(backup_id, false /* tmp */, manifest_filename); + s = VerifyFileWithCrc32c(src_env, backup, manifest_rel_path); + if (!s.ok()) { + return s; + } + + s = GetFileChecksumsFromManifest(src_env, GetAbsolutePath(manifest_rel_path), + checksum_list); + return s; +} + +Status BackupEngineImpl::GetFileChecksumsFromManifest( + Env* src_env, const std::string& abs_path, + FileChecksumList* checksum_list) { + if (checksum_list == nullptr) { + return Status::InvalidArgument("checksum_list is nullptr"); + } + + checksum_list->reset(); + Status s; + + std::unique_ptr file_reader; + { + std::unique_ptr file; + const std::shared_ptr& fs = src_env->GetFileSystem(); + s = fs->NewSequentialFile(abs_path, + fs->OptimizeForManifestRead(FileOptions()), &file, + nullptr /* dbg */); + if (!s.ok()) { + return s; + } + file_reader.reset(new SequentialFileReader(std::move(file), abs_path)); + } + + LogReporter reporter; + reporter.status = &s; + log::Reader reader(nullptr, std::move(file_reader), &reporter, + true /* checksum */, 0 /* log_number */); + Slice record; + std::string scratch; + // Set of column families initialized with default CF + std::unordered_set cf_set = {0}; + while (reader.ReadRecord(&record, &scratch) && s.ok()) { + VersionEdit edit; + s = edit.DecodeFrom(record); + if (!s.ok()) { + break; + } + // Check current CF status + uint32_t column_family = edit.GetColumnFamily(); + auto cf_set_itr = cf_set.find(column_family); + bool cf_exist = (cf_set_itr != cf_set.end()); + if (edit.IsColumnFamilyAdd()) { + if (cf_exist) { + s = Status::Corruption("Manifest adding the same column family twice"); + break; + } + cf_set.insert(column_family); + } else if (edit.IsColumnFamilyDrop()) { + if (!cf_exist) { + s = Status::Corruption( + "Manifest dropping non-existing column family: " + + ToString(column_family)); + break; + } + cf_set.erase(cf_set_itr); + } else { + if (!cf_exist) { + s = Status::Corruption("Manifest referencing unknown column family: " + + ToString(column_family)); + break; + } + assert(cf_set.find(column_family) != cf_set.end()); + + // Remove the deleted files from the checksum_list + for (const auto& deleted_file : edit.GetDeletedFiles()) { + checksum_list->RemoveOneFileChecksum(deleted_file.second); + } + + // Add the new files to the checksum_list + for (const auto& new_file : edit.GetNewFiles()) { + checksum_list->InsertOneFileChecksum( + new_file.second.fd.GetNumber(), new_file.second.file_checksum, + new_file.second.file_checksum_func_name); + } + } + } + return s; +} + +Status BackupEngineImpl::VerifyFileWithCrc32c(Env* src_env, + const BackupMeta* backup, + const std::string& rel_path) { + const std::shared_ptr file_info = backup->GetFile(rel_path); + if (file_info == nullptr) { + return Status::Corruption(rel_path + " is missing"); + } + + std::string abs_path = GetAbsolutePath(rel_path); + std::string expected_checksum = file_info->checksum_hex; + std::string actual_checksum; + Status s = CalculateChecksum(abs_path, src_env, EnvOptions(), + 0 /* size_limit */, &actual_checksum); + if (!s.ok()) { + return s; + } + if (actual_checksum != expected_checksum) { + std::string checksum_info("Expected checksum is " + expected_checksum + + " while computed checksum is " + actual_checksum); + return Status::Corruption("crc32c mismatch for " + rel_path + ": " + + checksum_info); + } + return s; +} void BackupEngineImpl::DeleteChildren(const std::string& dir, uint32_t file_type_filter) { @@ -2047,6 +2473,13 @@ Status BackupEngineImpl::BackupMeta::AddFile( return Status::Corruption( "Checksum mismatch for existing backup file. Delete old backups and " "try again."); + } else if (IsSameChecksumFunc(itr->second->checksum_func_name, + file_info->checksum_func_name) && + itr->second->custom_checksum_hex != + file_info->custom_checksum_hex) { + return Status::Corruption( + "Custom checksum mismatch for existing backup file. Delete old " + "backups and try again."); } ++itr->second->refs; // increase refcount if already present } @@ -2160,13 +2593,14 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( } uint32_t checksum_value = 0; + std::string checksum_func_name = kUnknownFileChecksumFuncName; if (line.starts_with(checksum_prefix)) { line.remove_prefix(checksum_prefix.size()); - checksum_value = static_cast( - strtoul(line.data(), nullptr, 10)); + checksum_func_name = kDefaultBackupFileChecksumFuncName; + checksum_value = static_cast(strtoul(line.data(), nullptr, 10)); if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) { - return Status::Corruption("Invalid checksum value for " + filename + - " in " + meta_filename_); + return Status::Corruption("Invalid crc32c checksum value for " + + filename + " in " + meta_filename_); } } else { return Status::Corruption("Unknown checksum type for " + filename + @@ -2174,7 +2608,8 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( } files.emplace_back( - new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value))); + new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value), + "" /* custom_checksum_hex */, checksum_func_name)); } if (s.ok() && data.size() > 0) { @@ -2257,7 +2692,7 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { len + file->filename.length() + snprintf(writelen_temp, sizeof(writelen_temp), " crc32 %u\n", ChecksumHexToInt32(file->checksum_hex)); - const char *const_write = writelen_temp; + const char* const_write = writelen_temp; if (newlen >= buf_size) { backup_meta_file->Append(Slice(buf.get(), len)); buf.reset(); diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 6a706ff76..496f1e9bc 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -38,6 +38,148 @@ namespace ROCKSDB_NAMESPACE { namespace { +class DummyFileChecksumGen : public FileChecksumGenerator { + public: + explicit DummyFileChecksumGen(const FileChecksumGenContext& /* context */, + bool state) { + if (state) { + checksum_ = 0; + } else { + checksum_ = 1; + } + } + + void Update(const char* /* data */, size_t /* n */) override {} + + void Finalize() override { + assert(checksum_str_.empty()); + // Store as big endian raw bytes + PutFixed32(&checksum_str_, EndianSwapValue(checksum_)); + } + + std::string GetChecksum() const override { + assert(!checksum_str_.empty()); + return checksum_str_; + } + + const char* Name() const override { return "DummyFileChecksum"; } + + private: + uint32_t checksum_; + std::string checksum_str_; +}; + +class DummyFileChecksumGenFactory : public FileChecksumGenFactory { + public: + explicit DummyFileChecksumGenFactory(bool state = false) : state_(state) {} + + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + if (context.requested_checksum_func_name.empty() || + context.requested_checksum_func_name == "DummyFileChecksum") { + return std::unique_ptr( + new DummyFileChecksumGen(context, state_)); + } else { + return nullptr; + } + } + + const char* Name() const override { return "DummyFileChecksumGenFactory"; } + + private: + bool state_; +}; + +class FileHash32Gen : public FileChecksumGenerator { + public: + explicit FileHash32Gen(const FileChecksumGenContext& /*context*/) { + checksum_ = 0; + } + + void Update(const char* data, size_t n) override { content_.append(data, n); } + + void Finalize() override { + assert(checksum_str_.empty()); + const char* str = content_.c_str(); + checksum_ = Hash(str, strlen(str), 1); + // Store as big endian raw bytes + PutFixed32(&checksum_str_, EndianSwapValue(checksum_)); + } + + std::string GetChecksum() const override { + assert(!checksum_str_.empty()); + return checksum_str_; + } + + const char* Name() const override { return "FileHash32"; } + + private: + std::string content_; + uint32_t checksum_; + std::string checksum_str_; +}; + +class FileHash64Gen : public FileChecksumGenerator { + public: + explicit FileHash64Gen(const FileChecksumGenContext& /*context*/) { + checksum_ = 0; + } + + void Update(const char* data, size_t n) override { content_.append(data, n); } + + void Finalize() override { + assert(checksum_str_.empty()); + const char* str = content_.c_str(); + checksum_ = Hash64(str, strlen(str), 1); + // Store as big endian raw bytes + PutFixed64(&checksum_str_, EndianSwapValue(checksum_)); + } + + std::string GetChecksum() const override { + assert(!checksum_str_.empty()); + return checksum_str_; + } + + const char* Name() const override { return "FileHash64"; } + + private: + std::string content_; + uint64_t checksum_; + std::string checksum_str_; +}; + +class FileHash32GenFactory : public FileChecksumGenFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + if (context.requested_checksum_func_name.empty() || + context.requested_checksum_func_name == "FileHash32") { + return std::unique_ptr(new FileHash32Gen(context)); + } else { + return nullptr; + } + } + + const char* Name() const override { return "FileHash32GenFactory"; } +}; + +class FileHashGenFactory : public FileChecksumGenFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + if (context.requested_checksum_func_name.empty() || + context.requested_checksum_func_name == "FileHash64") { + return std::unique_ptr(new FileHash64Gen(context)); + } else if (context.requested_checksum_func_name == "FileHash32") { + return std::unique_ptr(new FileHash32Gen(context)); + } else { + return nullptr; + } + } + + const char* Name() const override { return "FileHashGenFactory"; } +}; + class DummyDB : public StackableDB { public: /* implicit */ @@ -634,8 +776,8 @@ class BackupableDBTest : public testing::Test { backup_engine_.reset(); } - void OpenBackupEngine() { - backupable_options_->destroy_old_data = false; + void OpenBackupEngine(bool destroy_old_data = false) { + backupable_options_->destroy_old_data = destroy_old_data; BackupEngine* backup_engine; ASSERT_OK(BackupEngine::Open(test_db_env_.get(), *backupable_options_, &backup_engine)); @@ -766,6 +908,204 @@ class BackupableDBTestWithParam : public BackupableDBTest, } }; +TEST_F(BackupableDBTest, DbAndBackupSameCustomChecksum) { + const int keys_iteration = 5000; + options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); + // backup uses it default crc32c + for (const auto& sopt : kAllShareOptions) { + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt); + FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + ASSERT_OK(backup_engine_->VerifyBackup(1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1); + // delete old data + DestroyDB(dbname_, options_); + } + + // backup uses db crc32c + backupable_options_->file_checksum_gen_factory = + GetFileChecksumGenCrc32cFactory(); + for (const auto& sopt : kAllShareOptions) { + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt); + FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + ASSERT_OK(backup_engine_->VerifyBackup(1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1); + // delete old data + DestroyDB(dbname_, options_); + } + + std::shared_ptr hash_factory = + std::make_shared(); + options_.file_checksum_gen_factory = hash_factory; + backupable_options_->file_checksum_gen_factory = hash_factory; + for (const auto& sopt : kAllShareOptions) { + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt); + FillDB(db_.get(), 0, keys_iteration); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + ASSERT_OK(backup_engine_->VerifyBackup(1, false)); + ASSERT_OK(backup_engine_->VerifyBackup(1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1); + // delete old data + DestroyDB(dbname_, options_); + } + + // Mimic a checksum mismatch for custom checksum function by using a dummy + // checksum function with a state + std::shared_ptr dummy_factory_0 = + std::make_shared(false); + std::shared_ptr dummy_factory_1 = + std::make_shared(true); + FileChecksumGenContext context; + // Both factories have the same generator name + std::string dummy_checksum_function_name = + dummy_factory_0->CreateFileChecksumGenerator(context)->Name(); + options_.file_checksum_gen_factory = dummy_factory_0; + for (const auto& sopt : kAllShareOptions) { + backupable_options_->file_checksum_gen_factory = dummy_factory_1; + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt); + FillDB(db_.get(), 0, keys_iteration); + // DB and backup engine do not have the same custom checksum function + // "state" + Status s = backup_engine_->CreateNewBackup(db_.get()); + ASSERT_NOK(s); + ASSERT_TRUE( + s.ToString().find("Corruption: " + dummy_checksum_function_name + + " mismatch") != std::string::npos); + CloseBackupEngine(); + + // Change custom checksum function and try again + backupable_options_->file_checksum_gen_factory = dummy_factory_0; + OpenBackupEngine(true /* destroy_old_data */); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + ASSERT_OK(backup_engine_->VerifyBackup(1, true)); + ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_)); + CloseBackupEngine(); + + // Try verifying or restoring a backup using a different custom checksum + // function "state" + backupable_options_->file_checksum_gen_factory = dummy_factory_1; + OpenBackupEngine(false /* destroy_old_data */); + ASSERT_NOK(backup_engine_->VerifyBackup(1, true)); + ASSERT_NOK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_)); + CloseDBAndBackupEngine(); + + // delete old data + DestroyDB(dbname_, options_); + } +} + +TEST_F(BackupableDBTest, CustomChecksumTransition) { + const int keys_iteration = 5000; + std::shared_ptr hash32_factory = + std::make_shared(); + std::shared_ptr hash_factory = + std::make_shared(); + for (const auto& sopt : kAllShareOptions) { + // 1) with one custom checksum function (FileHash32GenFactory) for both + // db and backup + int i = 0; + options_.file_checksum_gen_factory = hash32_factory; + backupable_options_->file_checksum_gen_factory = hash32_factory; + // open with old backup + OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt); + FillDB(db_.get(), 0, keys_iteration * (i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + // verify the backup with checksum + ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1), + keys_iteration * (i + 2)); + + // 2) with two custom checksum functions (FileHashGenFactory) for db + // but one custom checksum function (FileHash32GenFactory) for backup + ++i; + options_.file_checksum_gen_factory = hash_factory; + backupable_options_->file_checksum_gen_factory = hash32_factory; + // open with old backup + OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */, + sopt); + FillDB(db_.get(), 0, keys_iteration * (i + 1)); + // note that the checksum factory for backup does not know the custom + // checksum function used in the db + ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get())); + // but it knows the custom checksum function for the older backup + ASSERT_OK(backup_engine_->VerifyBackup(i, true)); + // reset the factory to nullptr and try again + CloseBackupEngine(); + backupable_options_->file_checksum_gen_factory = nullptr; + OpenBackupEngine(); + ASSERT_NOK(backup_engine_->DeleteBackup(i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1)); + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1), + keys_iteration * (i + 2)); + // Now set the factory to the same as the one used in the db + backupable_options_->file_checksum_gen_factory = hash_factory; + OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */, + sopt); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + CloseBackupEngine(); + // Say, we accidentally change the factory + backupable_options_->file_checksum_gen_factory = hash32_factory; + OpenBackupEngine(); + ASSERT_NOK(backup_engine_->VerifyBackup(i + 2, true)); + ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 2, dbname_, dbname_)); + ASSERT_OK(backup_engine_->DeleteBackup(i + 2)); + CloseDBAndBackupEngine(); + + // 3) with one custom checksum function (FileHash32GenFactory) for db + // but two custom checksum functions (FileHashGenFactory) for backup + // note that the checksum factory for backup does know the checksum + // function in the db + ++i; + options_.file_checksum_gen_factory = hash32_factory; + backupable_options_->file_checksum_gen_factory = hash_factory; + // open with old backup + OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */, + sopt); + FillDB(db_.get(), 0, keys_iteration * (i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + + ASSERT_OK(backup_engine_->VerifyBackup(i - 1, true)); + ASSERT_OK(backup_engine_->VerifyBackup(i, true)); + ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true)); + CloseDBAndBackupEngine(); + AssertBackupConsistency(i - 1, 0, keys_iteration * (i - 1), + keys_iteration * i); + AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1)); + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1), + keys_iteration * (i + 2)); + + // 4) no custom checksums + ++i; + options_.file_checksum_gen_factory = nullptr; + backupable_options_->file_checksum_gen_factory = nullptr; + OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */, + sopt); + FillDB(db_.get(), 0, keys_iteration * (i + 1)); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); + for (int j = 0; j <= i; ++j) { + ASSERT_OK(backup_engine_->VerifyBackup(j + 1, true)); + } + CloseDBAndBackupEngine(); + for (int j = 0; j <= i; ++j) { + AssertBackupConsistency(j + 1, 0, keys_iteration * (j + 1), + keys_iteration * (j + 2)); + } + + // delete old data + DestroyDB(dbname_, options_); + } +} + // This test verifies that the verifyBackup method correctly identifies // invalid backups TEST_P(BackupableDBTestWithParam, VerifyBackup) {