Add share_files_with_cheksum option to BackupEngine

Summary: added a new option to BackupEngine: if share_files_with_checksum is set to true, sst files are stored in shared_checksum/ and are identified by the triple (file name, checksum, file size) instead of just the file name. This option is targeted at distributed databases that want to backup their primary replica.

Test Plan: unit tests and tested backup and restore on a distributed rocksdb

Reviewers: igor

Reviewed By: igor

Differential Revision: https://reviews.facebook.net/D18393
main
Benjamin Renard 10 years ago
parent 77edbfd642
commit 41e5cf2392
  1. 26
      include/utilities/backupable_db.h
  2. 126
      utilities/backupable/backupable_db.cc
  3. 58
      utilities/backupable/backupable_db_test.cc

@ -7,16 +7,19 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#pragma once
#include "utilities/stackable_db.h"
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#ifndef ROCKSDB_LITE
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <string>
#include <map>
#include <vector>
#include "utilities/stackable_db.h"
#include "rocksdb/env.h"
#include "rocksdb/status.h"
namespace rocksdb {
struct BackupableDBOptions {
@ -72,6 +75,14 @@ struct BackupableDBOptions {
// Default: 0
uint64_t restore_rate_limit;
// Only used if share_table_files is set to true. If true, will consider that
// backups can come from different databases, hence a sst is not uniquely
// identifed by its name, but by the triple (file name, crc32, file length)
// Default: false
// Note: this is an experimental option, and you'll need to set it manually
// *turn it on only if you know what you're doing*
bool share_files_with_checksum;
void Dump(Logger* logger) const;
explicit BackupableDBOptions(const std::string& _backup_dir,
@ -90,7 +101,10 @@ struct BackupableDBOptions {
destroy_old_data(_destroy_old_data),
backup_log_files(_backup_log_files),
backup_rate_limit(_backup_rate_limit),
restore_rate_limit(_restore_rate_limit) {}
restore_rate_limit(_restore_rate_limit),
share_files_with_checksum(false) {
assert(share_table_files || !share_files_with_checksum);
}
};
struct RestoreOptions {
@ -233,5 +247,5 @@ class RestoreBackupableDB {
BackupEngine* backup_engine_;
};
} // rocksdb namespace
} // namespace rocksdb
#endif // ROCKSDB_LITE

@ -175,8 +175,8 @@ class BackupEngineImpl : public BackupEngine {
std::unordered_map<std::string, FileInfo>* file_infos_;
Env* env_;
static const size_t max_backup_meta_file_size_ = 10 * 1024 * 1024; // 10MB
}; // BackupMeta
static const size_t max_backup_meta_file_size_ = 10 * 1024 * 1024; // 10MB
}; // BackupMeta
inline std::string GetAbsolutePath(
const std::string &relative_path = "") const {
@ -186,6 +186,9 @@ class BackupEngineImpl : public BackupEngine {
inline std::string GetPrivateDirRel() const {
return "private";
}
inline std::string GetSharedChecksumDirRel() const {
return "shared_checksum";
}
inline std::string GetPrivateFileRel(BackupID backup_id,
bool tmp = false,
const std::string& file = "") const {
@ -198,6 +201,27 @@ class BackupEngineImpl : public BackupEngine {
assert(file.size() == 0 || file[0] != '/');
return "shared/" + file + (tmp ? ".tmp" : "");
}
inline std::string GetSharedFileWithChecksumRel(const std::string& file = "",
bool tmp = false) const {
assert(file.size() == 0 || file[0] != '/');
return GetSharedChecksumDirRel() + "/" + file + (tmp ? ".tmp" : "");
}
inline std::string GetSharedFileWithChecksum(const std::string& file,
const uint32_t checksum_value,
const uint64_t file_size) const {
assert(file.size() == 0 || file[0] != '/');
std::string file_copy = file;
return file_copy.insert(file_copy.find_last_of('.'),
"_" + std::to_string(checksum_value)
+ "_" + std::to_string(file_size));
}
inline std::string GetFileFromChecksumFile(const std::string& file) const {
assert(file.size() == 0 || file[0] != '/');
std::string file_copy = file;
size_t first_underscore = file_copy.find_first_of('_');
return file_copy.erase(first_underscore,
file_copy.find_last_of('.') - first_underscore);
}
inline std::string GetLatestBackupFile(bool tmp = false) const {
return GetAbsolutePath(std::string("LATEST_BACKUP") + (tmp ? ".tmp" : ""));
}
@ -225,9 +249,10 @@ class BackupEngineImpl : public BackupEngine {
BackupMeta* backup,
bool shared,
const std::string& src_dir,
const std::string& src_fname, // starts with "/"
const std::string& src_fname, // starts with "/"
RateLimiter* rate_limiter,
uint64_t size_limit = 0);
uint64_t size_limit = 0,
bool shared_checksum = false);
Status CalculateChecksum(const std::string& src,
Env* src_env,
@ -286,9 +311,16 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
backup_env_->CreateDirIfMissing(GetAbsolutePath());
backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_);
if (options_.share_table_files) {
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
&shared_directory_);
if (options_.share_files_with_checksum) {
backup_env_->CreateDirIfMissing(GetAbsolutePath(
GetSharedFileWithChecksumRel()));
backup_env_->NewDirectory(GetAbsolutePath(
GetSharedFileWithChecksumRel()), &shared_directory_);
} else {
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
&shared_directory_);
}
}
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()),
@ -316,7 +348,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
&backuped_file_infos_, backup_env_)));
}
if (options_.destroy_old_data) { // Destory old data
if (options_.destroy_old_data) { // Destory old data
assert(!read_only_);
for (auto& backup : backups_) {
backup.second.Delete();
@ -326,7 +358,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
// start from beginning
latest_backup_id_ = 0;
// GarbageCollection() will do the actual deletion
} else { // Load data from storage
} else { // Load data from storage
// load the backups if any
for (auto& backup : backups_) {
Status s = backup.second.LoadFromFile(options_.backup_dir);
@ -436,7 +468,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
type == kCurrentFile);
// rules:
// * if it's kTableFile, than it's shared
// * if it's kTableFile, then it's shared
// * if it's kDescriptorFile, limit the size to manifest_file_size
s = BackupFile(new_backup_id,
&new_backup,
@ -444,7 +476,8 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
db->GetName(), /* src_dir */
live_files[i], /* src_fname */
rate_limiter.get(),
(type == kDescriptorFile) ? manifest_file_size : 0);
(type == kDescriptorFile) ? manifest_file_size : 0,
options_.share_files_with_checksum && type == kTableFile);
}
// copy WAL files
@ -467,7 +500,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
if (s.ok()) {
// move tmp private backup to real backup folder
s = backup_env_->RenameFile(
GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp
GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp
GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)));
}
@ -614,10 +647,17 @@ Status BackupEngineImpl::RestoreDBFromBackup(
std::string dst;
// 1. extract the filename
size_t slash = file.find_last_of('/');
// file will either be shared/<file> or private/<number>/<file>
// file will either be shared/<file>, shared_checksum/<file_crc32_size>
// or private/<number>/<file>
assert(slash != std::string::npos);
dst = file.substr(slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>
if (file.substr(0, slash) == GetSharedChecksumDirRel()) {
dst = GetFileFromChecksumFile(dst);
}
// 2. find the filetype
uint64_t number;
FileType type;
@ -785,12 +825,33 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup,
bool shared, const std::string& src_dir,
const std::string& src_fname,
RateLimiter* rate_limiter,
uint64_t size_limit) {
uint64_t size_limit,
bool shared_checksum) {
assert(src_fname.size() > 0 && src_fname[0] == '/');
std::string dst_relative = src_fname.substr(1);
std::string dst_relative_tmp;
if (shared) {
Status s;
uint64_t size;
uint32_t checksum_value = 0;
if (shared && shared_checksum) {
// add checksum and file length to the file name
s = CalculateChecksum(src_dir + src_fname,
db_env_,
size_limit,
&checksum_value);
if (s.ok()) {
s = db_env_->GetFileSize(src_dir + src_fname, &size);
}
if (!s.ok()) {
return s;
}
dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_value,
size);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
} else if (shared) {
dst_relative_tmp = GetSharedFileRel(dst_relative, true);
dst_relative = GetSharedFileRel(dst_relative, false);
} else {
@ -799,20 +860,23 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup,
}
std::string dst_path = GetAbsolutePath(dst_relative);
std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp);
Status s;
uint64_t size;
// if it's shared, we also need to check if it exists -- if it does,
// no need to copy it again
uint32_t checksum_value = 0;
if (shared && backup_env_->FileExists(dst_path)) {
backup_env_->GetFileSize(dst_path, &size); // Ignore error
Log(options_.info_log, "%s already present, calculate checksum",
src_fname.c_str());
s = CalculateChecksum(src_dir + src_fname,
db_env_,
size_limit,
&checksum_value);
if (shared_checksum) {
Log(options_.info_log,
"%s already present, with checksum %u and size %" PRIu64,
src_fname.c_str(), checksum_value, size);
} else {
backup_env_->GetFileSize(dst_path, &size); // Ignore error
Log(options_.info_log, "%s already present, calculate checksum",
src_fname.c_str());
s = CalculateChecksum(src_dir + src_fname,
db_env_,
size_limit,
&checksum_value);
}
} else {
Log(options_.info_log, "Copying %s", src_fname.c_str());
s = CopyFile(src_dir + src_fname,
@ -945,7 +1009,7 @@ void BackupEngineImpl::GarbageCollection(bool full_scan) {
BackupID backup_id = 0;
bool tmp_dir = child.find(".tmp") != std::string::npos;
sscanf(child.c_str(), "%u", &backup_id);
if (!tmp_dir && // if it's tmp_dir, delete it
if (!tmp_dir && // if it's tmp_dir, delete it
(backup_id == 0 || backups_.find(backup_id) != backups_.end())) {
// it's either not a number or it's still alive. continue
continue;
@ -989,7 +1053,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(const FileInfo& file_info) {
if (itr->second.checksum_value != file_info.checksum_value) {
return Status::Corruption("Checksum mismatch for existing backup file");
}
++itr->second.refs; // increase refcount if already present
++itr->second.refs; // increase refcount if already present
}
return Status::OK();
@ -999,7 +1063,7 @@ void BackupEngineImpl::BackupMeta::Delete(bool delete_meta) {
for (const auto& file : files_) {
auto itr = file_infos_->find(file);
assert(itr != file_infos_->end());
--(itr->second.refs); // decrease refcount
--(itr->second.refs); // decrease refcount
}
files_.clear();
// delete meta file
@ -1038,11 +1102,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
uint32_t num_files = 0;
int bytes_read = 0;
sscanf(data.data(), "%" PRId64 "%n", &timestamp_, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
data.remove_prefix(bytes_read + 1); // +1 for '\n'
sscanf(data.data(), "%" PRIu64 "%n", &sequence_number_, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
data.remove_prefix(bytes_read + 1); // +1 for '\n'
sscanf(data.data(), "%u%n", &num_files, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
data.remove_prefix(bytes_read + 1); // +1 for '\n'
std::vector<FileInfo> files;

@ -7,6 +7,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <string>
#include <algorithm>
#include <iostream>
#include "rocksdb/types.h"
#include "rocksdb/transaction_log.h"
#include "utilities/utility_db.h"
@ -16,9 +20,6 @@
#include "util/testutil.h"
#include "util/auto_roll_logger.h"
#include <string>
#include <algorithm>
namespace rocksdb {
namespace {
@ -375,7 +376,8 @@ class BackupableDBTest {
}
void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false,
bool share_table_files = true) {
bool share_table_files = true,
bool share_with_checksums = false) {
// reset all the defaults
test_backup_env_->SetLimitWrittenFiles(1000000);
test_db_env_->SetLimitWrittenFiles(1000000);
@ -390,6 +392,7 @@ class BackupableDBTest {
}
backupable_options_->destroy_old_data = destroy_old_data;
backupable_options_->share_table_files = share_table_files;
backupable_options_->share_files_with_checksum = share_with_checksums;
db_.reset(new BackupableDB(db, *backupable_options_));
}
@ -794,6 +797,53 @@ TEST(BackupableDBTest, NoShareTableFiles) {
}
}
// Verify that you can backup and restore with share_files_with_checksum on
TEST(BackupableDBTest, ShareTableFilesWithChecksums) {
const int keys_iteration = 5000;
OpenBackupableDB(true, false, true, true);
for (int i = 0; i < 5; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(!!(i % 2)));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * 6);
}
}
// Verify that you can backup and restore using share_files_with_checksum set to
// false and then transition this option to true
TEST(BackupableDBTest, ShareTableFilesWithChecksumsTransition) {
const int keys_iteration = 5000;
// set share_files_with_checksum to false
OpenBackupableDB(true, false, true, false);
for (int i = 0; i < 5; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(true));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * 6);
}
// set share_files_with_checksum to true and do some more backups
OpenBackupableDB(true, false, true, true);
for (int i = 5; i < 10; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(true));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 5 + 1),
keys_iteration * 11);
}
}
TEST(BackupableDBTest, DeleteTmpFiles) {
OpenBackupableDB();
CloseBackupableDB();

Loading…
Cancel
Save