diff --git a/include/utilities/backupable_db.h b/include/utilities/backupable_db.h index 7c34e08e1..617fe8aef 100644 --- a/include/utilities/backupable_db.h +++ b/include/utilities/backupable_db.h @@ -7,16 +7,19 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef ROCKSDB_LITE #pragma once -#include "utilities/stackable_db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" +#ifndef ROCKSDB_LITE +#define __STDC_FORMAT_MACROS +#include #include #include #include +#include "utilities/stackable_db.h" +#include "rocksdb/env.h" +#include "rocksdb/status.h" + namespace rocksdb { struct BackupableDBOptions { @@ -72,6 +75,14 @@ struct BackupableDBOptions { // Default: 0 uint64_t restore_rate_limit; + // Only used if share_table_files is set to true. If true, will consider that + // backups can come from different databases, hence a sst is not uniquely + // identifed by its name, but by the triple (file name, crc32, file length) + // Default: false + // Note: this is an experimental option, and you'll need to set it manually + // *turn it on only if you know what you're doing* + bool share_files_with_checksum; + void Dump(Logger* logger) const; explicit BackupableDBOptions(const std::string& _backup_dir, @@ -90,7 +101,10 @@ struct BackupableDBOptions { destroy_old_data(_destroy_old_data), backup_log_files(_backup_log_files), backup_rate_limit(_backup_rate_limit), - restore_rate_limit(_restore_rate_limit) {} + restore_rate_limit(_restore_rate_limit), + share_files_with_checksum(false) { + assert(share_table_files || !share_files_with_checksum); + } }; struct RestoreOptions { @@ -233,5 +247,5 @@ class RestoreBackupableDB { BackupEngine* backup_engine_; }; -} // rocksdb namespace +} // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index a11720165..87901e0ef 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -175,8 +175,8 @@ class BackupEngineImpl : public BackupEngine { std::unordered_map* file_infos_; Env* env_; - static const size_t max_backup_meta_file_size_ = 10 * 1024 * 1024; // 10MB - }; // BackupMeta + static const size_t max_backup_meta_file_size_ = 10 * 1024 * 1024; // 10MB + }; // BackupMeta inline std::string GetAbsolutePath( const std::string &relative_path = "") const { @@ -186,6 +186,9 @@ class BackupEngineImpl : public BackupEngine { inline std::string GetPrivateDirRel() const { return "private"; } + inline std::string GetSharedChecksumDirRel() const { + return "shared_checksum"; + } inline std::string GetPrivateFileRel(BackupID backup_id, bool tmp = false, const std::string& file = "") const { @@ -198,6 +201,27 @@ class BackupEngineImpl : public BackupEngine { assert(file.size() == 0 || file[0] != '/'); return "shared/" + file + (tmp ? ".tmp" : ""); } + inline std::string GetSharedFileWithChecksumRel(const std::string& file = "", + bool tmp = false) const { + assert(file.size() == 0 || file[0] != '/'); + return GetSharedChecksumDirRel() + "/" + file + (tmp ? ".tmp" : ""); + } + inline std::string GetSharedFileWithChecksum(const std::string& file, + const uint32_t checksum_value, + const uint64_t file_size) const { + assert(file.size() == 0 || file[0] != '/'); + std::string file_copy = file; + return file_copy.insert(file_copy.find_last_of('.'), + "_" + std::to_string(checksum_value) + + "_" + std::to_string(file_size)); + } + inline std::string GetFileFromChecksumFile(const std::string& file) const { + assert(file.size() == 0 || file[0] != '/'); + std::string file_copy = file; + size_t first_underscore = file_copy.find_first_of('_'); + return file_copy.erase(first_underscore, + file_copy.find_last_of('.') - first_underscore); + } inline std::string GetLatestBackupFile(bool tmp = false) const { return GetAbsolutePath(std::string("LATEST_BACKUP") + (tmp ? ".tmp" : "")); } @@ -225,9 +249,10 @@ class BackupEngineImpl : public BackupEngine { BackupMeta* backup, bool shared, const std::string& src_dir, - const std::string& src_fname, // starts with "/" + const std::string& src_fname, // starts with "/" RateLimiter* rate_limiter, - uint64_t size_limit = 0); + uint64_t size_limit = 0, + bool shared_checksum = false); Status CalculateChecksum(const std::string& src, Env* src_env, @@ -286,9 +311,16 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env, backup_env_->CreateDirIfMissing(GetAbsolutePath()); backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_); if (options_.share_table_files) { - backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel())); - backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()), - &shared_directory_); + if (options_.share_files_with_checksum) { + backup_env_->CreateDirIfMissing(GetAbsolutePath( + GetSharedFileWithChecksumRel())); + backup_env_->NewDirectory(GetAbsolutePath( + GetSharedFileWithChecksumRel()), &shared_directory_); + } else { + backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel())); + backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()), + &shared_directory_); + } } backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel())); backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()), @@ -316,7 +348,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env, &backuped_file_infos_, backup_env_))); } - if (options_.destroy_old_data) { // Destory old data + if (options_.destroy_old_data) { // Destory old data assert(!read_only_); for (auto& backup : backups_) { backup.second.Delete(); @@ -326,7 +358,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env, // start from beginning latest_backup_id_ = 0; // GarbageCollection() will do the actual deletion - } else { // Load data from storage + } else { // Load data from storage // load the backups if any for (auto& backup : backups_) { Status s = backup.second.LoadFromFile(options_.backup_dir); @@ -436,7 +468,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) { type == kCurrentFile); // rules: - // * if it's kTableFile, than it's shared + // * if it's kTableFile, then it's shared // * if it's kDescriptorFile, limit the size to manifest_file_size s = BackupFile(new_backup_id, &new_backup, @@ -444,7 +476,8 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) { db->GetName(), /* src_dir */ live_files[i], /* src_fname */ rate_limiter.get(), - (type == kDescriptorFile) ? manifest_file_size : 0); + (type == kDescriptorFile) ? manifest_file_size : 0, + options_.share_files_with_checksum && type == kTableFile); } // copy WAL files @@ -467,7 +500,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) { if (s.ok()) { // move tmp private backup to real backup folder s = backup_env_->RenameFile( - GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp + GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp GetAbsolutePath(GetPrivateFileRel(new_backup_id, false))); } @@ -614,10 +647,17 @@ Status BackupEngineImpl::RestoreDBFromBackup( std::string dst; // 1. extract the filename size_t slash = file.find_last_of('/'); - // file will either be shared/ or private// + // file will either be shared/, shared_checksum/ + // or private// assert(slash != std::string::npos); dst = file.substr(slash + 1); + // if the file was in shared_checksum, extract the real file name + // in this case the file is __. + if (file.substr(0, slash) == GetSharedChecksumDirRel()) { + dst = GetFileFromChecksumFile(dst); + } + // 2. find the filetype uint64_t number; FileType type; @@ -785,12 +825,33 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup, bool shared, const std::string& src_dir, const std::string& src_fname, RateLimiter* rate_limiter, - uint64_t size_limit) { + uint64_t size_limit, + bool shared_checksum) { assert(src_fname.size() > 0 && src_fname[0] == '/'); std::string dst_relative = src_fname.substr(1); std::string dst_relative_tmp; - if (shared) { + Status s; + uint64_t size; + uint32_t checksum_value = 0; + + if (shared && shared_checksum) { + // add checksum and file length to the file name + s = CalculateChecksum(src_dir + src_fname, + db_env_, + size_limit, + &checksum_value); + if (s.ok()) { + s = db_env_->GetFileSize(src_dir + src_fname, &size); + } + if (!s.ok()) { + return s; + } + dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_value, + size); + dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); + dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); + } else if (shared) { dst_relative_tmp = GetSharedFileRel(dst_relative, true); dst_relative = GetSharedFileRel(dst_relative, false); } else { @@ -799,20 +860,23 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup, } std::string dst_path = GetAbsolutePath(dst_relative); std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp); - Status s; - uint64_t size; // if it's shared, we also need to check if it exists -- if it does, // no need to copy it again - uint32_t checksum_value = 0; if (shared && backup_env_->FileExists(dst_path)) { - backup_env_->GetFileSize(dst_path, &size); // Ignore error - Log(options_.info_log, "%s already present, calculate checksum", - src_fname.c_str()); - s = CalculateChecksum(src_dir + src_fname, - db_env_, - size_limit, - &checksum_value); + if (shared_checksum) { + Log(options_.info_log, + "%s already present, with checksum %u and size %" PRIu64, + src_fname.c_str(), checksum_value, size); + } else { + backup_env_->GetFileSize(dst_path, &size); // Ignore error + Log(options_.info_log, "%s already present, calculate checksum", + src_fname.c_str()); + s = CalculateChecksum(src_dir + src_fname, + db_env_, + size_limit, + &checksum_value); + } } else { Log(options_.info_log, "Copying %s", src_fname.c_str()); s = CopyFile(src_dir + src_fname, @@ -945,7 +1009,7 @@ void BackupEngineImpl::GarbageCollection(bool full_scan) { BackupID backup_id = 0; bool tmp_dir = child.find(".tmp") != std::string::npos; sscanf(child.c_str(), "%u", &backup_id); - if (!tmp_dir && // if it's tmp_dir, delete it + if (!tmp_dir && // if it's tmp_dir, delete it (backup_id == 0 || backups_.find(backup_id) != backups_.end())) { // it's either not a number or it's still alive. continue continue; @@ -989,7 +1053,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(const FileInfo& file_info) { if (itr->second.checksum_value != file_info.checksum_value) { return Status::Corruption("Checksum mismatch for existing backup file"); } - ++itr->second.refs; // increase refcount if already present + ++itr->second.refs; // increase refcount if already present } return Status::OK(); @@ -999,7 +1063,7 @@ void BackupEngineImpl::BackupMeta::Delete(bool delete_meta) { for (const auto& file : files_) { auto itr = file_infos_->find(file); assert(itr != file_infos_->end()); - --(itr->second.refs); // decrease refcount + --(itr->second.refs); // decrease refcount } files_.clear(); // delete meta file @@ -1038,11 +1102,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( uint32_t num_files = 0; int bytes_read = 0; sscanf(data.data(), "%" PRId64 "%n", ×tamp_, &bytes_read); - data.remove_prefix(bytes_read + 1); // +1 for '\n' + data.remove_prefix(bytes_read + 1); // +1 for '\n' sscanf(data.data(), "%" PRIu64 "%n", &sequence_number_, &bytes_read); - data.remove_prefix(bytes_read + 1); // +1 for '\n' + data.remove_prefix(bytes_read + 1); // +1 for '\n' sscanf(data.data(), "%u%n", &num_files, &bytes_read); - data.remove_prefix(bytes_read + 1); // +1 for '\n' + data.remove_prefix(bytes_read + 1); // +1 for '\n' std::vector files; diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 0dd75c099..ef34cf064 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -7,6 +7,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include +#include +#include + #include "rocksdb/types.h" #include "rocksdb/transaction_log.h" #include "utilities/utility_db.h" @@ -16,9 +20,6 @@ #include "util/testutil.h" #include "util/auto_roll_logger.h" -#include -#include - namespace rocksdb { namespace { @@ -375,7 +376,8 @@ class BackupableDBTest { } void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false, - bool share_table_files = true) { + bool share_table_files = true, + bool share_with_checksums = false) { // reset all the defaults test_backup_env_->SetLimitWrittenFiles(1000000); test_db_env_->SetLimitWrittenFiles(1000000); @@ -390,6 +392,7 @@ class BackupableDBTest { } backupable_options_->destroy_old_data = destroy_old_data; backupable_options_->share_table_files = share_table_files; + backupable_options_->share_files_with_checksum = share_with_checksums; db_.reset(new BackupableDB(db, *backupable_options_)); } @@ -794,6 +797,53 @@ TEST(BackupableDBTest, NoShareTableFiles) { } } +// Verify that you can backup and restore with share_files_with_checksum on +TEST(BackupableDBTest, ShareTableFilesWithChecksums) { + const int keys_iteration = 5000; + OpenBackupableDB(true, false, true, true); + for (int i = 0; i < 5; ++i) { + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK(db_->CreateNewBackup(!!(i % 2))); + } + CloseBackupableDB(); + + for (int i = 0; i < 5; ++i) { + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1), + keys_iteration * 6); + } +} + +// Verify that you can backup and restore using share_files_with_checksum set to +// false and then transition this option to true +TEST(BackupableDBTest, ShareTableFilesWithChecksumsTransition) { + const int keys_iteration = 5000; + // set share_files_with_checksum to false + OpenBackupableDB(true, false, true, false); + for (int i = 0; i < 5; ++i) { + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK(db_->CreateNewBackup(true)); + } + CloseBackupableDB(); + + for (int i = 0; i < 5; ++i) { + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1), + keys_iteration * 6); + } + + // set share_files_with_checksum to true and do some more backups + OpenBackupableDB(true, false, true, true); + for (int i = 5; i < 10; ++i) { + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK(db_->CreateNewBackup(true)); + } + CloseBackupableDB(); + + for (int i = 0; i < 5; ++i) { + AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 5 + 1), + keys_iteration * 11); + } +} + TEST(BackupableDBTest, DeleteTmpFiles) { OpenBackupableDB(); CloseBackupableDB();