BackupEngine supports custom file checksums (#7085)

Summary:
A new option `std::shared_ptr<FileChecksumGenFactory> backup_checksum_gen_factory` is added to `BackupableDBOptions`. This allows custom checksum functions to be used for creating, verifying, or restoring backups.

Tests are added.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7085

Test Plan: Passed make check

Reviewed By: pdillinger

Differential Revision: D22390756

Pulled By: gg814

fbshipit-source-id: 3b7756ca444c2129844536b91c3ca09f53b6248f
main
Zitan Chen 4 years ago committed by Facebook GitHub Bot
parent 76609cd38a
commit b578ca2e4d
  1. 4
      HISTORY.md
  2. 4
      db/version_edit.h
  3. 9
      include/rocksdb/file_checksum.h
  4. 52
      include/rocksdb/utilities/backupable_db.h
  5. 5
      util/file_checksum_helper.h
  6. 599
      utilities/backupable/backupable_db.cc
  7. 344
      utilities/backupable/backupable_db_test.cc

@ -3,6 +3,10 @@
### Bug fixes ### Bug fixes
* Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound. * Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound.
### New Features
* A new option `std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`.
* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions.
## 6.12 (2020-07-28) ## 6.12 (2020-07-28)
### Public API Change ### Public API Change

@ -486,6 +486,10 @@ class VersionEdit {
return is_column_family_add_ || is_column_family_drop_; return is_column_family_add_ || is_column_family_drop_;
} }
bool IsColumnFamilyAdd() const { return is_column_family_add_; }
bool IsColumnFamilyDrop() const { return is_column_family_drop_; }
void MarkAtomicGroup(uint32_t remaining_entries) { void MarkAtomicGroup(uint32_t remaining_entries) {
is_in_atomic_group_ = true; is_in_atomic_group_ = true;
remaining_entries_ = remaining_entries; remaining_entries_ = remaining_entries;

@ -22,9 +22,18 @@ namespace ROCKSDB_NAMESPACE {
constexpr char kUnknownFileChecksum[] = ""; constexpr char kUnknownFileChecksum[] = "";
// The unknown sst file checksum function name. // The unknown sst file checksum function name.
constexpr char kUnknownFileChecksumFuncName[] = "Unknown"; constexpr char kUnknownFileChecksumFuncName[] = "Unknown";
// The standard DB file checksum function name.
// This is the name of the checksum function returned by
// GetFileChecksumGenCrc32cFactory();
constexpr char kStandardDbFileChecksumFuncName[] = "FileChecksumCrc32c";
struct FileChecksumGenContext { struct FileChecksumGenContext {
std::string file_name; std::string file_name;
// The name of the requested checksum generator.
// Checksum factories may use or ignore requested_checksum_func_name,
// and checksum factories written before this field was available are still
// compatible.
std::string requested_checksum_func_name;
}; };
// FileChecksumGenerator is the class to generates the checksum value // FileChecksumGenerator is the class to generates the checksum value

@ -24,10 +24,8 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// The default DB file checksum function name.
constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c";
// The default BackupEngine file checksum function name. // The default BackupEngine file checksum function name.
constexpr char kBackupFileChecksumFuncName[] = "crc32c"; constexpr char kDefaultBackupFileChecksumFuncName[] = "crc32c";
// BackupTableNameOption describes possible naming schemes for backup // BackupTableNameOption describes possible naming schemes for backup
// table file names when the table files are stored in the shared_checksum // table file names when the table files are stored in the shared_checksum
@ -165,6 +163,33 @@ struct BackupableDBOptions {
// db_session_id as a fallback. // db_session_id as a fallback.
BackupTableNameOption share_files_with_checksum_naming; BackupTableNameOption share_files_with_checksum_naming;
// Option for custom checksum functions.
// When this option is nullptr, BackupEngine will use its default crc32c as
// the checksum function.
//
// When it is not nullptr, BackupEngine will try to find in the factory the
// checksum function that DB used to calculate the file checksums. If such a
// function is found, BackupEngine will use it to create, verify, or restore
// backups, in addition to the default crc32c checksum function. If such a
// function is not found, BackupEngine will return Status::InvalidArgument().
// Therefore, this option comes into effect only if DB has a custom checksum
// factory and this option is set to the same factory.
//
//
// Note: If share_files_with_checksum and share_table_files are true,
// the <checksum> appeared in the table filenames will be the custom checksum
// value if db session ids are available (namely, table file naming options
// is kOptionalChecksumAndDbSessionId and the db session ids obtained from
// the table files are nonempty).
//
// Note: We do not require the same setting to this option for backup
// restoration or verification as was set during backup creation but we
// strongly recommend setting it to the same as the DB file checksum function
// for all BackupEngine interactions when practical.
//
// Default: nullptr
std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
void Dump(Logger* logger) const; void Dump(Logger* logger) const;
explicit BackupableDBOptions( explicit BackupableDBOptions(
@ -176,7 +201,9 @@ struct BackupableDBOptions {
uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024, uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024,
int _max_valid_backups_to_open = INT_MAX, int _max_valid_backups_to_open = INT_MAX,
BackupTableNameOption _share_files_with_checksum_naming = BackupTableNameOption _share_files_with_checksum_naming =
kOptionalChecksumAndDbSessionId) kOptionalChecksumAndDbSessionId,
std::shared_ptr<FileChecksumGenFactory> _file_checksum_gen_factory =
nullptr)
: backup_dir(_backup_dir), : backup_dir(_backup_dir),
backup_env(_backup_env), backup_env(_backup_env),
share_table_files(_share_table_files), share_table_files(_share_table_files),
@ -190,7 +217,8 @@ struct BackupableDBOptions {
max_background_operations(_max_background_operations), max_background_operations(_max_background_operations),
callback_trigger_interval_size(_callback_trigger_interval_size), callback_trigger_interval_size(_callback_trigger_interval_size),
max_valid_backups_to_open(_max_valid_backups_to_open), max_valid_backups_to_open(_max_valid_backups_to_open),
share_files_with_checksum_naming(_share_files_with_checksum_naming) { share_files_with_checksum_naming(_share_files_with_checksum_naming),
file_checksum_gen_factory(_file_checksum_gen_factory) {
assert(share_table_files || !share_files_with_checksum); assert(share_table_files || !share_files_with_checksum);
} }
}; };
@ -327,16 +355,18 @@ class BackupEngineReadOnly {
} }
// If verify_with_checksum is true, this function // If verify_with_checksum is true, this function
// inspects the current checksums and file sizes of backup files to see if // inspects the default crc32c checksums and file sizes of backup files to
// they match our expectation. // see if they match our expectation. This function further inspects the
// custom checksums if BackupableDBOptions::file_checksum_gen_factory is
// the same as DBOptions::file_checksum_gen_factory.
// //
// If verify_with_checksum is false, this function // If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our // checks that each file exists and that the size of the file matches our
// expectation. It does not check file checksum. // expectation. It does not check file checksum.
// //
// If this BackupEngine created the backup, it compares the files' current // If this BackupEngine created the backup, it compares the files' current
// sizes (and current checksum) against the number of bytes written to // sizes (and current checksums) against the number of bytes written to
// them (and the checksum calculated) during creation. // them (and the checksums calculated) during creation.
// Otherwise, it compares the files' current sizes (and checksums) against // Otherwise, it compares the files' current sizes (and checksums) against
// their sizes (and checksums) when the BackupEngine was opened. // their sizes (and checksums) when the BackupEngine was opened.
// //
@ -456,7 +486,9 @@ class BackupEngine {
// If verify_with_checksum is true, this function // If verify_with_checksum is true, this function
// inspects the current checksums and file sizes of backup files to see if // inspects the current checksums and file sizes of backup files to see if
// they match our expectation. // they match our expectation. It further inspects the custom checksums
// if BackupableDBOptions::file_checksum_gen_factory is the same as
// DBOptions::file_checksum_gen_factory.
// //
// If verify_with_checksum is false, this function // If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our // checks that each file exists and that the size of the file matches our

@ -49,8 +49,13 @@ class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory {
public: public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator( std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override { const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileChecksumCrc32c") {
return std::unique_ptr<FileChecksumGenerator>( return std::unique_ptr<FileChecksumGenerator>(
new FileChecksumGenCrc32c(context)); new FileChecksumGenCrc32c(context));
} else {
return nullptr;
}
} }
const char* Name() const override { return "FileChecksumGenCrc32cFactory"; } const char* Name() const override { return "FileChecksumGenCrc32cFactory"; }

@ -28,6 +28,7 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "db/log_reader.h"
#include "env/composite_env_wrapper.h" #include "env/composite_env_wrapper.h"
#include "file/filename.h" #include "file/filename.h"
#include "file/sequence_file_reader.h" #include "file/sequence_file_reader.h"
@ -60,6 +61,22 @@ inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) {
PutFixed32(&checksum_str, EndianSwapValue(checksum_value)); PutFixed32(&checksum_str, EndianSwapValue(checksum_value));
return ChecksumStrToHex(checksum_str); return ChecksumStrToHex(checksum_str);
} }
// Checks if the checksum function names are the same. Note that both the
// backup default checksum function and the db standard checksum function are
// crc32c although they have different names. So We treat the db standard
// checksum function name and the backup default checksum function name as
// the same name.
inline bool IsSameChecksumFunc(const std::string& dst_checksum_func_name,
const std::string& src_checksum_func_name) {
return (dst_checksum_func_name == src_checksum_func_name) ||
((dst_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(src_checksum_func_name == kStandardDbFileChecksumFuncName)) ||
((src_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(dst_checksum_func_name == kStandardDbFileChecksumFuncName));
}
inline bool IsSstFile(const std::string& fname) {
return fname.length() > 4 && fname.rfind(".sst") == fname.length() - 4;
}
} // namespace } // namespace
void BackupStatistics::IncrementNumberSuccessBackup() { void BackupStatistics::IncrementNumberSuccessBackup() {
@ -166,11 +183,15 @@ class BackupEngineImpl : public BackupEngine {
struct FileInfo { struct FileInfo {
FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum, FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum,
const std::string& id = "", const std::string& sid = "") const std::string& custom_checksum,
const std::string& checksum_name, const std::string& id = "",
const std::string& sid = "")
: refs(0), : refs(0),
filename(fname), filename(fname),
size(sz), size(sz),
checksum_hex(checksum), checksum_hex(checksum),
custom_checksum_hex(custom_checksum),
checksum_func_name(checksum_name),
db_id(id), db_id(id),
db_session_id(sid) {} db_session_id(sid) {}
@ -181,6 +202,8 @@ class BackupEngineImpl : public BackupEngine {
const std::string filename; const std::string filename;
const uint64_t size; const uint64_t size;
const std::string checksum_hex; const std::string checksum_hex;
const std::string custom_checksum_hex;
const std::string checksum_func_name;
// DB identities // DB identities
// db_id is obtained for potential usage in the future but not used // db_id is obtained for potential usage in the future but not used
// currently // currently
@ -358,6 +381,78 @@ class BackupEngineImpl : public BackupEngine {
return GetBackupMetaDir() + "/" + (tmp ? "." : "") + return GetBackupMetaDir() + "/" + (tmp ? "." : "") +
ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : ""); ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : "");
} }
inline Status GetFileNameInfo(const std::string& file,
std::string& local_name, uint64_t& number,
FileType& type) const {
// 1. extract the filename
size_t last_slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(last_slash != std::string::npos);
local_name = file.substr(last_slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, last_slash) == GetSharedChecksumDirRel()) {
local_name = GetFileFromChecksumFile(local_name);
}
// 2. find the filetype
bool ok = ParseFileName(local_name, &number, &type);
if (!ok) {
return Status::Corruption("Backup corrupted: Fail to parse filename " +
local_name);
}
return Status::OK();
}
inline bool HasCustomChecksumGenFactory() const {
return options_.file_checksum_gen_factory != nullptr;
}
// Returns nullptr if file_checksum_gen_factory is not set or
// file_checksum_gen_factory is not able to create a generator with
// name being requested_checksum_func_name
inline std::unique_ptr<FileChecksumGenerator> GetCustomChecksumGenerator(
const std::string& requested_checksum_func_name = "") const {
std::shared_ptr<FileChecksumGenFactory> checksum_factory =
options_.file_checksum_gen_factory;
if (checksum_factory == nullptr) {
return nullptr;
} else {
FileChecksumGenContext gen_context;
gen_context.requested_checksum_func_name = requested_checksum_func_name;
return checksum_factory->CreateFileChecksumGenerator(gen_context);
}
}
// Set the checksum generator by the requested checksum function name
inline Status SetChecksumGenerator(
const std::string& requested_checksum_func_name,
std::unique_ptr<FileChecksumGenerator>& checksum_func) {
if (requested_checksum_func_name != kDefaultBackupFileChecksumFuncName) {
if (!HasCustomChecksumGenFactory()) {
// No custom checksum factory indicates users would like to use the
// backup default checksum function and accept the degraded data
// integrity checking
return Status::OK();
} else {
checksum_func =
GetCustomChecksumGenerator(requested_checksum_func_name);
// we will use the default backup checksum function if the custom
// checksum functions is the db standard checksum function but is not
// found in the checksum factory passed in; otherwise, we return
// Status::InvalidArgument()
if (checksum_func == nullptr &&
requested_checksum_func_name != kStandardDbFileChecksumFuncName) {
return Status::InvalidArgument("Checksum checksum function " +
requested_checksum_func_name +
" not found");
}
}
}
// The requested checksum function is the default backup checksum function
return Status::OK();
}
// If size_limit == 0, there is no size limit, copy everything. // If size_limit == 0, there is no size limit, copy everything.
// //
@ -369,22 +464,47 @@ class BackupEngineImpl : public BackupEngine {
const std::string& src, const std::string& dst, const std::string& src, const std::string& dst,
const std::string& contents, Env* src_env, Env* dst_env, const std::string& contents, Env* src_env, Env* dst_env,
const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter, const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter,
uint64_t* size = nullptr, std::string* checksum_hex = nullptr, const std::string& backup_checksum_func_name, uint64_t* size = nullptr,
uint64_t size_limit = 0, std::string* checksum_hex = nullptr,
std::string* custom_checksum_hex = nullptr, uint64_t size_limit = 0,
std::function<void()> progress_callback = []() {}); std::function<void()> progress_callback = []() {});
Status CalculateChecksum(const std::string& src, Env* src_env, Status CalculateChecksum(
const EnvOptions& src_env_options, const std::string& src, Env* src_env, const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex); uint64_t size_limit, std::string* checksum_hex,
const std::unique_ptr<FileChecksumGenerator>& checksum_func = nullptr,
std::string* custom_checksum_hex = nullptr);
// Obtain db_id and db_session_id from the table properties of file_path // Obtain db_id and db_session_id from the table properties of file_path
Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options, Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options,
const std::string& file_path, std::string* db_id, const std::string& file_path, std::string* db_id,
std::string* db_session_id); std::string* db_session_id);
Status GetFileChecksumsFromManifestInBackup(Env* src_env,
const BackupID& backup_id,
const BackupMeta* backup,
FileChecksumList* checksum_list);
Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
FileChecksumList* checksum_list);
Status VerifyFileWithCrc32c(Env* src_env, const BackupMeta* backup,
const std::string& rel_path);
struct LogReporter : public log::Reader::Reporter {
Status* status;
virtual void Corruption(size_t /*bytes*/, const Status& s) override {
if (status->ok()) {
*status = s;
}
}
};
struct CopyOrCreateResult { struct CopyOrCreateResult {
uint64_t size; uint64_t size;
std::string checksum_hex; std::string checksum_hex;
std::string custom_checksum_hex;
std::string checksum_func_name;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
Status status; Status status;
@ -408,6 +528,7 @@ class BackupEngineImpl : public BackupEngine {
bool verify_checksum_after_work; bool verify_checksum_after_work;
std::string src_checksum_func_name; std::string src_checksum_func_name;
std::string src_checksum_hex; std::string src_checksum_hex;
std::string backup_checksum_func_name;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
@ -424,6 +545,7 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(false), verify_checksum_after_work(false),
src_checksum_func_name(kUnknownFileChecksumFuncName), src_checksum_func_name(kUnknownFileChecksumFuncName),
src_checksum_hex(""), src_checksum_hex(""),
backup_checksum_func_name(kUnknownFileChecksumFuncName),
db_id(""), db_id(""),
db_session_id("") {} db_session_id("") {}
@ -449,6 +571,7 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work = o.verify_checksum_after_work; verify_checksum_after_work = o.verify_checksum_after_work;
src_checksum_func_name = std::move(o.src_checksum_func_name); src_checksum_func_name = std::move(o.src_checksum_func_name);
src_checksum_hex = std::move(o.src_checksum_hex); src_checksum_hex = std::move(o.src_checksum_hex);
backup_checksum_func_name = std::move(o.backup_checksum_func_name);
db_id = std::move(o.db_id); db_id = std::move(o.db_id);
db_session_id = std::move(o.db_session_id); db_session_id = std::move(o.db_session_id);
return *this; return *this;
@ -463,6 +586,8 @@ class BackupEngineImpl : public BackupEngine {
const std::string& _src_checksum_func_name = const std::string& _src_checksum_func_name =
kUnknownFileChecksumFuncName, kUnknownFileChecksumFuncName,
const std::string& _src_checksum_hex = "", const std::string& _src_checksum_hex = "",
const std::string& _backup_checksum_func_name =
kUnknownFileChecksumFuncName,
const std::string& _db_id = "", const std::string& _db_session_id = "") const std::string& _db_id = "", const std::string& _db_session_id = "")
: src_path(std::move(_src_path)), : src_path(std::move(_src_path)),
dst_path(std::move(_dst_path)), dst_path(std::move(_dst_path)),
@ -477,6 +602,7 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(_verify_checksum_after_work), verify_checksum_after_work(_verify_checksum_after_work),
src_checksum_func_name(_src_checksum_func_name), src_checksum_func_name(_src_checksum_func_name),
src_checksum_hex(_src_checksum_hex), src_checksum_hex(_src_checksum_hex),
backup_checksum_func_name(_backup_checksum_func_name),
db_id(_db_id), db_id(_db_id),
db_session_id(_db_session_id) {} db_session_id(_db_session_id) {}
}; };
@ -858,25 +984,41 @@ Status BackupEngineImpl::Initialize() {
result.status = CopyOrCreateFile( result.status = CopyOrCreateFile(
work_item.src_path, work_item.dst_path, work_item.contents, work_item.src_path, work_item.dst_path, work_item.contents,
work_item.src_env, work_item.dst_env, work_item.src_env_options, work_item.src_env, work_item.dst_env, work_item.src_env_options,
work_item.sync, work_item.rate_limiter, &result.size, work_item.sync, work_item.rate_limiter,
&result.checksum_hex, work_item.size_limit, work_item.backup_checksum_func_name, &result.size,
work_item.progress_callback); &result.checksum_hex, &result.custom_checksum_hex,
work_item.size_limit, work_item.progress_callback);
result.checksum_func_name = work_item.backup_checksum_func_name;
result.db_id = work_item.db_id; result.db_id = work_item.db_id;
result.db_session_id = work_item.db_session_id; result.db_session_id = work_item.db_session_id;
if (result.status.ok() && work_item.verify_checksum_after_work) { if (result.status.ok() && work_item.verify_checksum_after_work) {
// unknown checksum function name implies no db table file checksum in // work_item.verify_checksum_after_work being true means backup engine
// db manifest; work_item.verify_checksum_after_work being true means // has obtained its crc32c and/or custom checksum for the table file.
// backup engine has calculated its crc32c checksum for the table // Therefore, we can try to compare the checksums if possible.
// file; therefore, we are able to compare the checksums. if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName ||
IsSameChecksumFunc(result.checksum_func_name,
work_item.src_checksum_func_name)) {
std::string checksum_to_compare;
std::string checksum_func_name_used;
if (work_item.src_checksum_func_name == if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName || kUnknownFileChecksumFuncName ||
work_item.src_checksum_func_name == kDbFileChecksumFuncName) { work_item.src_checksum_func_name ==
if (work_item.src_checksum_hex != result.checksum_hex) { kStandardDbFileChecksumFuncName) {
// kUnknownFileChecksumFuncName implies no table file checksums in
// db manifest, but we can compare using the crc32c checksum
checksum_to_compare = result.checksum_hex;
checksum_func_name_used = kStandardDbFileChecksumFuncName;
} else {
checksum_to_compare = result.custom_checksum_hex;
checksum_func_name_used = work_item.src_checksum_func_name;
}
if (work_item.src_checksum_hex != checksum_to_compare) {
std::string checksum_info( std::string checksum_info(
"Expected checksum is " + work_item.src_checksum_hex + "Expected checksum is " + work_item.src_checksum_hex +
" while computed checksum is " + result.checksum_hex); " while computed checksum is " + checksum_to_compare);
result.status = result.status = Status::Corruption(
Status::Corruption("Checksum mismatch after copying to " + checksum_func_name_used + " mismatch after copying to " +
work_item.dst_path + ": " + checksum_info); work_item.dst_path + ": " + checksum_info);
} }
} else { } else {
@ -884,7 +1026,7 @@ Status BackupEngineImpl::Initialize() {
"Existing checksum function is " + "Existing checksum function is " +
work_item.src_checksum_func_name + work_item.src_checksum_func_name +
" while provided checksum function is " + " while provided checksum function is " +
kBackupFileChecksumFuncName); result.checksum_func_name);
ROCKS_LOG_INFO( ROCKS_LOG_INFO(
options_.info_log, options_.info_log,
"Unable to verify checksum after copying to %s: %s\n", "Unable to verify checksum after copying to %s: %s\n",
@ -972,15 +1114,6 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
CheckpointImpl checkpoint(db); CheckpointImpl checkpoint(db);
uint64_t sequence_number = 0; uint64_t sequence_number = 0;
DBOptions db_options = db->GetDBOptions(); DBOptions db_options = db->GetDBOptions();
FileChecksumGenFactory* db_checksum_factory =
db_options.file_checksum_gen_factory.get();
const std::string kFileChecksumGenFactoryName =
"FileChecksumGenCrc32cFactory";
bool compare_checksum =
db_checksum_factory != nullptr &&
db_checksum_factory->Name() == kFileChecksumGenFactoryName
? true
: false;
EnvOptions src_raw_env_options(db_options); EnvOptions src_raw_env_options(db_options);
s = checkpoint.CreateCustomCheckpoint( s = checkpoint.CreateCustomCheckpoint(
db_options, db_options,
@ -1046,7 +1179,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
options.progress_callback, contents); options.progress_callback, contents);
} /* create_file_cb */, } /* create_file_cb */,
&sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64, &sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64,
compare_checksum); db_options.file_checksum_gen_factory == nullptr ? false : true);
if (s.ok()) { if (s.ok()) {
new_backup->SetSequenceNumber(sequence_number); new_backup->SetSequenceNumber(sequence_number);
} }
@ -1063,7 +1196,8 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
} }
if (item_status.ok()) { if (item_status.ok()) {
item_status = new_backup.get()->AddFile(std::make_shared<FileInfo>( item_status = new_backup.get()->AddFile(std::make_shared<FileInfo>(
item.dst_relative, result.size, result.checksum_hex, result.db_id, item.dst_relative, result.size, result.checksum_hex,
result.custom_checksum_hex, result.checksum_func_name, result.db_id,
result.db_session_id)); result.db_session_id));
} }
if (!item_status.ok()) { if (!item_status.ok()) {
@ -1324,49 +1458,74 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
DeleteChildren(db_dir); DeleteChildren(db_dir);
} }
Status s;
// Try to obtain checksum info from backuped DB MANIFEST
// The checksum info will be used for validating the checksums of the table
// files after restoration, in addtion to the default backup engine crc32c
// checksums.
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id, backup.get(),
checksum_list.get());
if (!s.ok()) {
return s;
}
RateLimiter* rate_limiter = options_.restore_rate_limiter.get(); RateLimiter* rate_limiter = options_.restore_rate_limiter.get();
if (rate_limiter) { if (rate_limiter) {
copy_file_buffer_size_ = static_cast<size_t>(rate_limiter->GetSingleBurstBytes()); copy_file_buffer_size_ =
static_cast<size_t>(rate_limiter->GetSingleBurstBytes());
} }
Status s;
std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish; std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish;
for (const auto& file_info : backup->GetFiles()) { for (const auto& file_info : backup->GetFiles()) {
const std::string& file = file_info->filename; const std::string& file = file_info->filename;
std::string dst; std::string dst;
// 1. extract the filename
size_t slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(slash != std::string::npos);
dst = file.substr(slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, slash) == GetSharedChecksumDirRel()) {
dst = GetFileFromChecksumFile(dst);
}
// 2. find the filetype
uint64_t number; uint64_t number;
FileType type; FileType type;
bool ok = ParseFileName(dst, &number, &type); s = GetFileNameInfo(file, dst, number, type);
if (!ok) { if (!s.ok()) {
return Status::Corruption("Backup corrupted: Fail to parse filename " + return s;
dst);
} }
// 3. Construct the final path
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
bool has_manifest_checksum = false;
if (type == kTableFile) {
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
has_manifest_checksum = true;
}
}
// Construct the final path
// kLogFile lives in wal_dir and all the rest live in db_dir // kLogFile lives in wal_dir and all the rest live in db_dir
dst = ((type == kLogFile) ? wal_dir : db_dir) + dst = ((type == kLogFile) ? wal_dir : db_dir) +
"/" + dst; "/" + dst;
ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(), ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(),
dst.c_str()); dst.c_str());
std::string backup_checksum_func_name = file_info->checksum_func_name;
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
CopyOrCreateWorkItem copy_or_create_work_item( CopyOrCreateWorkItem copy_or_create_work_item(
GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_, GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_,
EnvOptions() /* src_env_options */, false, rate_limiter, EnvOptions() /* src_env_options */, false, rate_limiter,
0 /* size_limit */); 0 /* size_limit */, []() {} /* progress_callback */,
has_manifest_checksum, src_checksum_func_name, src_checksum_hex,
backup_checksum_func_name);
RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item( RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), file_info->checksum_hex); copy_or_create_work_item.result.get_future(), file_info->checksum_hex);
files_to_copy_or_create_.write(std::move(copy_or_create_work_item)); files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
@ -1384,7 +1543,11 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
s = item_status; s = item_status;
break; break;
} else if (item.checksum_hex != result.checksum_hex) { } else if (item.checksum_hex != result.checksum_hex) {
s = Status::Corruption("Checksum check failed"); // Compare crc32c checksums (especially for non-table files)
std::string checksum_info("Expected checksum is " + item.checksum_hex +
" while computed checksum is " +
result.checksum_hex);
s = Status::Corruption("Crc32c checksum check failed: " + checksum_info);
break; break;
} }
} }
@ -1423,6 +1586,17 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size); InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size);
} }
Status s;
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
if (verify_with_checksum) {
// Try to obtain checksum info from backuped DB MANIFEST
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id,
backup.get(), checksum_list.get());
if (!s.ok()) {
return s;
}
}
// For all files registered in backup // For all files registered in backup
for (const auto& file_info : backup->GetFiles()) { for (const auto& file_info : backup->GetFiles()) {
const auto abs_path = GetAbsolutePath(file_info->filename); const auto abs_path = GetAbsolutePath(file_info->filename);
@ -1441,27 +1615,68 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
} }
if (verify_with_checksum) { if (verify_with_checksum) {
// verify file checksum // verify file checksum
std::string checksum_hex; // try setting checksum_func
std::unique_ptr<FileChecksumGenerator> checksum_func;
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
if (IsSstFile(file_info->filename)) {
const std::string& file = file_info->filename;
std::string local_name;
uint64_t number;
FileType type;
s = GetFileNameInfo(file, local_name, number, type);
if (!s.ok()) {
return s;
}
assert(type == kTableFile);
// Try to get checksum for the table file
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
}
ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n", ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n",
abs_path.c_str()); abs_path.c_str());
std::string checksum_hex;
std::string custom_checksum_hex;
CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */, CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */,
&checksum_hex); &checksum_hex, checksum_func, &custom_checksum_hex);
if (file_info->checksum_hex != checksum_hex) { if (file_info->checksum_hex != checksum_hex) {
std::string checksum_info( std::string checksum_info(
"Expected checksum is " + file_info->checksum_hex + "Expected checksum is " + file_info->checksum_hex +
" while computed checksum is " + checksum_hex); " while computed checksum is " + checksum_hex);
return Status::Corruption("File corrupted: Checksum mismatch for " + return Status::Corruption("File corrupted: crc32c mismatch for " +
abs_path + ": " + checksum_info); abs_path + ": " + checksum_info);
} }
if (checksum_func != nullptr && src_checksum_hex != custom_checksum_hex) {
std::string checksum_info("Expected checksum is " + src_checksum_hex +
" while computed checksum is " +
custom_checksum_hex);
return Status::Corruption("File corrupted: " + src_checksum_func_name +
" mismatch for " + abs_path + ": " +
checksum_info);
}
} }
} }
return Status::OK(); return Status::OK();
} }
Status BackupEngineImpl::CopyOrCreateFile( Status BackupEngineImpl::CopyOrCreateFile(
const std::string& src, const std::string& dst, const std::string& contents, const std::string& src, const std::string& dst, const std::string& contents,
Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync, Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync,
RateLimiter* rate_limiter, uint64_t* size, std::string* checksum_hex, RateLimiter* rate_limiter, const std::string& backup_checksum_func_name,
uint64_t* size, std::string* checksum_hex, std::string* custom_checksum_hex,
uint64_t size_limit, std::function<void()> progress_callback) { uint64_t size_limit, std::function<void()> progress_callback) {
assert(src.empty() != contents.empty()); assert(src.empty() != contents.empty());
Status s; Status s;
@ -1475,6 +1690,13 @@ Status BackupEngineImpl::CopyOrCreateFile(
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
// Get custom checksum function
std::unique_ptr<FileChecksumGenerator> checksum_func;
s = SetChecksumGenerator(backup_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
// Check if size limit is set. if not, set it to very big number // Check if size limit is set. if not, set it to very big number
if (size_limit == 0) { if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max(); size_limit = std::numeric_limits<uint64_t>::max();
@ -1529,6 +1751,10 @@ Status BackupEngineImpl::CopyOrCreateFile(
if (checksum_hex != nullptr) { if (checksum_hex != nullptr) {
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
} }
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
s = dest_writer->Append(data); s = dest_writer->Append(data);
if (rate_limiter != nullptr) { if (rate_limiter != nullptr) {
rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */, rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */,
@ -1541,10 +1767,14 @@ Status BackupEngineImpl::CopyOrCreateFile(
} }
} while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0); } while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0);
// Convert uint32_t checksum to hex checksum
if (checksum_hex != nullptr) { if (checksum_hex != nullptr) {
// Convert uint32_t checksum to hex checksum
checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
} }
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
if (s.ok() && sync) { if (s.ok() && sync) {
s = dest_writer->Sync(false); s = dest_writer->Sync(false);
@ -1572,27 +1802,50 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::string dst_relative_tmp; std::string dst_relative_tmp;
Status s; Status s;
std::string checksum_hex; std::string checksum_hex;
std::string custom_checksum_hex;
// The function name of backup checksum function.
std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName;
std::string db_id; std::string db_id;
std::string db_session_id; std::string db_session_id;
// whether the checksum for a table file is available // whether the checksum for a table file is available
bool has_checksum = false; bool has_checksum = false;
// Whenever a default checksum function name is passed in, we will compares // Set up the custom checksum function.
// the corresponding checksum values after copying. Note that only table files // A nullptr checksum_func indicates the default backup checksum function
// may have a known checksum function name passed in. // will be used. If checksum_func is not nullptr, then both the default
// backup checksum function and checksum_func will be used.
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
// DB files have checksum functions
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
// Whenever the db checksum function name matches the backup engine custom
// checksum function name, we will compare the checksum values after copying.
// Note that only table files may have a known checksum name passed in.
// //
// If no default checksum function name is passed in and db session id is not // If the checksum function names do not match and db session id is not
// available, we will calculate the checksum *before* copying in two cases // available, we will calculate the checksum *before* copying in two cases
// (we always calcuate checksums when copying or creating for any file types): // (we always calcuate checksums when copying or creating for any file types):
// a) share_files_with_checksum is true and file type is table; // a) share_files_with_checksum is true and file type is table;
// b) share_table_files is true and the file exists already. // b) share_table_files is true and the file exists already.
// //
// Step 0: Check if default checksum function name is passed in // Step 0: Check if a known checksum function name is passed in
if (kDbFileChecksumFuncName == src_checksum_func_name) { if (IsSameChecksumFunc(backup_checksum_func_name, src_checksum_func_name)) {
if (src_checksum_str == kUnknownFileChecksum) { if (src_checksum_str == kUnknownFileChecksum) {
return Status::Aborted("Unknown checksum value for " + fname); return Status::Aborted("Unknown checksum value for " + fname);
} }
if (checksum_func == nullptr) {
checksum_hex = ChecksumStrToHex(src_checksum_str); checksum_hex = ChecksumStrToHex(src_checksum_str);
} else {
custom_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
has_checksum = true; has_checksum = true;
} }
@ -1611,7 +1864,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the shared_checksum directory. // the shared_checksum directory.
if (!has_checksum && db_session_id.empty()) { if (!has_checksum && db_session_id.empty()) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex); size_limit, &checksum_hex, checksum_func,
&custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1630,8 +1884,14 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// shared_checksum/<file_number>_<db_session_id>.sst // shared_checksum/<file_number>_<db_session_id>.sst
// Otherwise, dst_relative is of the form // Otherwise, dst_relative is of the form
// shared_checksum/<file_number>_<checksum>_<size>.sst // shared_checksum/<file_number>_<checksum>_<size>.sst
//
// Also, we display custom checksums in the name if possible.
dst_relative = GetSharedFileWithChecksum( dst_relative = GetSharedFileWithChecksum(
dst_relative, has_checksum, checksum_hex, size_bytes, db_session_id); dst_relative, has_checksum,
checksum_func == nullptr || !UseSessionId(db_session_id)
? checksum_hex
: custom_checksum_hex,
size_bytes, db_session_id);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
} else if (shared) { } else if (shared) {
@ -1698,7 +1958,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// file exists and referenced // file exists and referenced
if (!has_checksum) { if (!has_checksum) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex); size_limit, &checksum_hex, checksum_func,
&custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1717,6 +1978,11 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
fname.c_str(), checksum_hex.c_str(), size_bytes); fname.c_str(), checksum_hex.c_str(), size_bytes);
} }
} }
if (checksum_func != nullptr) {
ROCKS_LOG_INFO(options_.info_log, "%s checksum is %s",
backup_checksum_func_name.c_str(),
custom_checksum_hex.c_str());
}
} else if (backuped_file_infos_.find(dst_relative) == } else if (backuped_file_infos_.find(dst_relative) ==
backuped_file_infos_.end() && backuped_file_infos_.end() &&
!same_path) { !same_path) {
@ -1735,7 +2001,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
"%s already present, calculate checksum", fname.c_str()); "%s already present, calculate checksum", fname.c_str());
if (!has_checksum) { if (!has_checksum) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex); size_limit, &checksum_hex, checksum_func,
&custom_checksum_hex);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1753,7 +2020,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents, src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents,
db_env_, backup_env_, src_env_options, options_.sync, rate_limiter, db_env_, backup_env_, src_env_options, options_.sync, rate_limiter,
size_limit, progress_callback, has_checksum, src_checksum_func_name, size_limit, progress_callback, has_checksum, src_checksum_func_name,
checksum_hex, db_id, db_session_id); checksum_func == nullptr ? checksum_hex : custom_checksum_hex,
backup_checksum_func_name, db_id, db_session_id);
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), shared, need_to_copy, copy_or_create_work_item.result.get_future(), shared, need_to_copy,
backup_env_, temp_dest_path, final_dest_path, dst_relative); backup_env_, temp_dest_path, final_dest_path, dst_relative);
@ -1769,6 +2037,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
result.status = s; result.status = s;
result.size = size_bytes; result.size = size_bytes;
result.checksum_hex = std::move(checksum_hex); result.checksum_hex = std::move(checksum_hex);
result.custom_checksum_hex = std::move(custom_checksum_hex);
result.checksum_func_name = std::move(backup_checksum_func_name);
result.db_id = std::move(db_id); result.db_id = std::move(db_id);
result.db_session_id = std::move(db_session_id); result.db_session_id = std::move(db_session_id);
promise_result.set_value(std::move(result)); promise_result.set_value(std::move(result));
@ -1776,14 +2046,16 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
return s; return s;
} }
Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, Status BackupEngineImpl::CalculateChecksum(
const EnvOptions& src_env_options, const std::string& src, Env* src_env, const EnvOptions& src_env_options,
uint64_t size_limit, uint64_t size_limit, std::string* checksum_hex,
std::string* checksum_hex) { const std::unique_ptr<FileChecksumGenerator>& checksum_func,
std::string* custom_checksum_hex) {
if (checksum_hex == nullptr) { if (checksum_hex == nullptr) {
return Status::Aborted("Checksum pointer is null"); return Status::InvalidArgument("Checksum pointer is null");
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
if (size_limit == 0) { if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max(); size_limit = std::numeric_limits<uint64_t>::max();
} }
@ -1812,10 +2084,18 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
} }
size_limit -= data.size(); size_limit -= data.size();
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
} while (data.size() > 0 && size_limit > 0); } while (data.size() > 0 && size_limit > 0);
checksum_hex->assign(ChecksumInt32ToHex(checksum_value)); checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
return s; return s;
} }
@ -1873,6 +2153,152 @@ Status BackupEngineImpl::GetFileDbIdentities(Env* src_env,
return s; return s;
} }
} }
Status BackupEngineImpl::GetFileChecksumsFromManifestInBackup(
Env* src_env, const BackupID& backup_id, const BackupMeta* backup,
FileChecksumList* checksum_list) {
if (checksum_list == nullptr) {
return Status::InvalidArgument("checksum_list is nullptr");
}
checksum_list->reset();
Status s;
// Read CURRENT file to get the latest DB MANIFEST filename in backup_id
// and then read the the MANIFEST file to obtain the checksum info stored
// in the file.
std::string current_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, "CURRENT");
s = VerifyFileWithCrc32c(src_env, backup, current_rel_path);
if (!s.ok()) {
return s;
}
std::string manifest_filename;
s = ReadFileToString(src_env, GetAbsolutePath(current_rel_path),
&manifest_filename);
if (!s.ok()) {
return s;
}
// Remove tailing '\n' if any
while (!manifest_filename.empty() && manifest_filename.back() == '\n') {
manifest_filename.pop_back();
}
std::string manifest_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, manifest_filename);
s = VerifyFileWithCrc32c(src_env, backup, manifest_rel_path);
if (!s.ok()) {
return s;
}
s = GetFileChecksumsFromManifest(src_env, GetAbsolutePath(manifest_rel_path),
checksum_list);
return s;
}
Status BackupEngineImpl::GetFileChecksumsFromManifest(
Env* src_env, const std::string& abs_path,
FileChecksumList* checksum_list) {
if (checksum_list == nullptr) {
return Status::InvalidArgument("checksum_list is nullptr");
}
checksum_list->reset();
Status s;
std::unique_ptr<SequentialFileReader> file_reader;
{
std::unique_ptr<FSSequentialFile> file;
const std::shared_ptr<FileSystem>& fs = src_env->GetFileSystem();
s = fs->NewSequentialFile(abs_path,
fs->OptimizeForManifestRead(FileOptions()), &file,
nullptr /* dbg */);
if (!s.ok()) {
return s;
}
file_reader.reset(new SequentialFileReader(std::move(file), abs_path));
}
LogReporter reporter;
reporter.status = &s;
log::Reader reader(nullptr, std::move(file_reader), &reporter,
true /* checksum */, 0 /* log_number */);
Slice record;
std::string scratch;
// Set of column families initialized with default CF
std::unordered_set<uint32_t> cf_set = {0};
while (reader.ReadRecord(&record, &scratch) && s.ok()) {
VersionEdit edit;
s = edit.DecodeFrom(record);
if (!s.ok()) {
break;
}
// Check current CF status
uint32_t column_family = edit.GetColumnFamily();
auto cf_set_itr = cf_set.find(column_family);
bool cf_exist = (cf_set_itr != cf_set.end());
if (edit.IsColumnFamilyAdd()) {
if (cf_exist) {
s = Status::Corruption("Manifest adding the same column family twice");
break;
}
cf_set.insert(column_family);
} else if (edit.IsColumnFamilyDrop()) {
if (!cf_exist) {
s = Status::Corruption(
"Manifest dropping non-existing column family: " +
ToString(column_family));
break;
}
cf_set.erase(cf_set_itr);
} else {
if (!cf_exist) {
s = Status::Corruption("Manifest referencing unknown column family: " +
ToString(column_family));
break;
}
assert(cf_set.find(column_family) != cf_set.end());
// Remove the deleted files from the checksum_list
for (const auto& deleted_file : edit.GetDeletedFiles()) {
checksum_list->RemoveOneFileChecksum(deleted_file.second);
}
// Add the new files to the checksum_list
for (const auto& new_file : edit.GetNewFiles()) {
checksum_list->InsertOneFileChecksum(
new_file.second.fd.GetNumber(), new_file.second.file_checksum,
new_file.second.file_checksum_func_name);
}
}
}
return s;
}
Status BackupEngineImpl::VerifyFileWithCrc32c(Env* src_env,
const BackupMeta* backup,
const std::string& rel_path) {
const std::shared_ptr<FileInfo> file_info = backup->GetFile(rel_path);
if (file_info == nullptr) {
return Status::Corruption(rel_path + " is missing");
}
std::string abs_path = GetAbsolutePath(rel_path);
std::string expected_checksum = file_info->checksum_hex;
std::string actual_checksum;
Status s = CalculateChecksum(abs_path, src_env, EnvOptions(),
0 /* size_limit */, &actual_checksum);
if (!s.ok()) {
return s;
}
if (actual_checksum != expected_checksum) {
std::string checksum_info("Expected checksum is " + expected_checksum +
" while computed checksum is " + actual_checksum);
return Status::Corruption("crc32c mismatch for " + rel_path + ": " +
checksum_info);
}
return s;
}
void BackupEngineImpl::DeleteChildren(const std::string& dir, void BackupEngineImpl::DeleteChildren(const std::string& dir,
uint32_t file_type_filter) { uint32_t file_type_filter) {
@ -2047,6 +2473,13 @@ Status BackupEngineImpl::BackupMeta::AddFile(
return Status::Corruption( return Status::Corruption(
"Checksum mismatch for existing backup file. Delete old backups and " "Checksum mismatch for existing backup file. Delete old backups and "
"try again."); "try again.");
} else if (IsSameChecksumFunc(itr->second->checksum_func_name,
file_info->checksum_func_name) &&
itr->second->custom_checksum_hex !=
file_info->custom_checksum_hex) {
return Status::Corruption(
"Custom checksum mismatch for existing backup file. Delete old "
"backups and try again.");
} }
++itr->second->refs; // increase refcount if already present ++itr->second->refs; // increase refcount if already present
} }
@ -2160,13 +2593,14 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
} }
uint32_t checksum_value = 0; uint32_t checksum_value = 0;
std::string checksum_func_name = kUnknownFileChecksumFuncName;
if (line.starts_with(checksum_prefix)) { if (line.starts_with(checksum_prefix)) {
line.remove_prefix(checksum_prefix.size()); line.remove_prefix(checksum_prefix.size());
checksum_value = static_cast<uint32_t>( checksum_func_name = kDefaultBackupFileChecksumFuncName;
strtoul(line.data(), nullptr, 10)); checksum_value = static_cast<uint32_t>(strtoul(line.data(), nullptr, 10));
if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) { if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) {
return Status::Corruption("Invalid checksum value for " + filename + return Status::Corruption("Invalid crc32c checksum value for " +
" in " + meta_filename_); filename + " in " + meta_filename_);
} }
} else { } else {
return Status::Corruption("Unknown checksum type for " + filename + return Status::Corruption("Unknown checksum type for " + filename +
@ -2174,7 +2608,8 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
} }
files.emplace_back( files.emplace_back(
new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value))); new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value),
"" /* custom_checksum_hex */, checksum_func_name));
} }
if (s.ok() && data.size() > 0) { if (s.ok() && data.size() > 0) {
@ -2257,7 +2692,7 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
len + file->filename.length() + len + file->filename.length() +
snprintf(writelen_temp, sizeof(writelen_temp), " crc32 %u\n", snprintf(writelen_temp, sizeof(writelen_temp), " crc32 %u\n",
ChecksumHexToInt32(file->checksum_hex)); ChecksumHexToInt32(file->checksum_hex));
const char *const_write = writelen_temp; const char* const_write = writelen_temp;
if (newlen >= buf_size) { if (newlen >= buf_size) {
backup_meta_file->Append(Slice(buf.get(), len)); backup_meta_file->Append(Slice(buf.get(), len));
buf.reset(); buf.reset();

@ -38,6 +38,148 @@ namespace ROCKSDB_NAMESPACE {
namespace { namespace {
class DummyFileChecksumGen : public FileChecksumGenerator {
public:
explicit DummyFileChecksumGen(const FileChecksumGenContext& /* context */,
bool state) {
if (state) {
checksum_ = 0;
} else {
checksum_ = 1;
}
}
void Update(const char* /* data */, size_t /* n */) override {}
void Finalize() override {
assert(checksum_str_.empty());
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "DummyFileChecksum"; }
private:
uint32_t checksum_;
std::string checksum_str_;
};
class DummyFileChecksumGenFactory : public FileChecksumGenFactory {
public:
explicit DummyFileChecksumGenFactory(bool state = false) : state_(state) {}
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "DummyFileChecksum") {
return std::unique_ptr<FileChecksumGenerator>(
new DummyFileChecksumGen(context, state_));
} else {
return nullptr;
}
}
const char* Name() const override { return "DummyFileChecksumGenFactory"; }
private:
bool state_;
};
class FileHash32Gen : public FileChecksumGenerator {
public:
explicit FileHash32Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash32"; }
private:
std::string content_;
uint32_t checksum_;
std::string checksum_str_;
};
class FileHash64Gen : public FileChecksumGenerator {
public:
explicit FileHash64Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash64(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed64(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash64"; }
private:
std::string content_;
uint64_t checksum_;
std::string checksum_str_;
};
class FileHash32GenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHash32GenFactory"; }
};
class FileHashGenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash64") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash64Gen(context));
} else if (context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHashGenFactory"; }
};
class DummyDB : public StackableDB { class DummyDB : public StackableDB {
public: public:
/* implicit */ /* implicit */
@ -634,8 +776,8 @@ class BackupableDBTest : public testing::Test {
backup_engine_.reset(); backup_engine_.reset();
} }
void OpenBackupEngine() { void OpenBackupEngine(bool destroy_old_data = false) {
backupable_options_->destroy_old_data = false; backupable_options_->destroy_old_data = destroy_old_data;
BackupEngine* backup_engine; BackupEngine* backup_engine;
ASSERT_OK(BackupEngine::Open(test_db_env_.get(), *backupable_options_, ASSERT_OK(BackupEngine::Open(test_db_env_.get(), *backupable_options_,
&backup_engine)); &backup_engine));
@ -766,6 +908,204 @@ class BackupableDBTestWithParam : public BackupableDBTest,
} }
}; };
TEST_F(BackupableDBTest, DbAndBackupSameCustomChecksum) {
const int keys_iteration = 5000;
options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
// backup uses it default crc32c
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// backup uses db crc32c
backupable_options_->file_checksum_gen_factory =
GetFileChecksumGenCrc32cFactory();
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// Mimic a checksum mismatch for custom checksum function by using a dummy
// checksum function with a state
std::shared_ptr<FileChecksumGenFactory> dummy_factory_0 =
std::make_shared<DummyFileChecksumGenFactory>(false);
std::shared_ptr<FileChecksumGenFactory> dummy_factory_1 =
std::make_shared<DummyFileChecksumGenFactory>(true);
FileChecksumGenContext context;
// Both factories have the same generator name
std::string dummy_checksum_function_name =
dummy_factory_0->CreateFileChecksumGenerator(context)->Name();
options_.file_checksum_gen_factory = dummy_factory_0;
for (const auto& sopt : kAllShareOptions) {
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
// DB and backup engine do not have the same custom checksum function
// "state"
Status s = backup_engine_->CreateNewBackup(db_.get());
ASSERT_NOK(s);
ASSERT_TRUE(
s.ToString().find("Corruption: " + dummy_checksum_function_name +
" mismatch") != std::string::npos);
CloseBackupEngine();
// Change custom checksum function and try again
backupable_options_->file_checksum_gen_factory = dummy_factory_0;
OpenBackupEngine(true /* destroy_old_data */);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseBackupEngine();
// Try verifying or restoring a backup using a different custom checksum
// function "state"
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenBackupEngine(false /* destroy_old_data */);
ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseDBAndBackupEngine();
// delete old data
DestroyDB(dbname_, options_);
}
}
TEST_F(BackupableDBTest, CustomChecksumTransition) {
const int keys_iteration = 5000;
std::shared_ptr<FileChecksumGenFactory> hash32_factory =
std::make_shared<FileHash32GenFactory>();
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
for (const auto& sopt : kAllShareOptions) {
// 1) with one custom checksum function (FileHash32GenFactory) for both
// db and backup
int i = 0;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
// verify the backup with checksum
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 2) with two custom checksum functions (FileHashGenFactory) for db
// but one custom checksum function (FileHash32GenFactory) for backup
++i;
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
// note that the checksum factory for backup does not know the custom
// checksum function used in the db
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
// but it knows the custom checksum function for the older backup
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
// reset the factory to nullptr and try again
CloseBackupEngine();
backupable_options_->file_checksum_gen_factory = nullptr;
OpenBackupEngine();
ASSERT_NOK(backup_engine_->DeleteBackup(i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// Now set the factory to the same as the one used in the db
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseBackupEngine();
// Say, we accidentally change the factory
backupable_options_->file_checksum_gen_factory = hash32_factory;
OpenBackupEngine();
ASSERT_NOK(backup_engine_->VerifyBackup(i + 2, true));
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 2, dbname_, dbname_));
ASSERT_OK(backup_engine_->DeleteBackup(i + 2));
CloseDBAndBackupEngine();
// 3) with one custom checksum function (FileHash32GenFactory) for db
// but two custom checksum functions (FileHashGenFactory) for backup
// note that the checksum factory for backup does know the checksum
// function in the db
++i;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i - 1, true));
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i - 1, 0, keys_iteration * (i - 1),
keys_iteration * i);
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 4) no custom checksums
++i;
options_.file_checksum_gen_factory = nullptr;
backupable_options_->file_checksum_gen_factory = nullptr;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
for (int j = 0; j <= i; ++j) {
ASSERT_OK(backup_engine_->VerifyBackup(j + 1, true));
}
CloseDBAndBackupEngine();
for (int j = 0; j <= i; ++j) {
AssertBackupConsistency(j + 1, 0, keys_iteration * (j + 1),
keys_iteration * (j + 2));
}
// delete old data
DestroyDB(dbname_, options_);
}
}
// This test verifies that the verifyBackup method correctly identifies // This test verifies that the verifyBackup method correctly identifies
// invalid backups // invalid backups
TEST_P(BackupableDBTestWithParam, VerifyBackup) { TEST_P(BackupableDBTestWithParam, VerifyBackup) {

Loading…
Cancel
Save