Add BackupEngine API for backup file details (#8042)

Summary:
This API can be used for things like determining how much space
can be freed up by deleting a particular backup, etc.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8042

Test Plan:
validation of the API added to many existing backup unit
tests

Reviewed By: mrambacher

Differential Revision: D26936577

Pulled By: pdillinger

fbshipit-source-id: f0bbd90f0917b9781a6837652fb4616d9247816a
main
Peter Dillinger 4 years ago committed by Facebook GitHub Bot
parent 82b3888433
commit 589ea6bec2
  1. 1
      HISTORY.md
  2. 33
      include/rocksdb/utilities/backupable_db.h
  3. 54
      utilities/backupable/backupable_db.cc
  4. 67
      utilities/backupable/backupable_db_test.cc

@ -17,6 +17,7 @@
### New Features ### New Features
* Support compaction filters for the new implementation of BlobDB. Add `FilterBlobByKey()` to `CompactionFilter`. Subclasses can override this method so that compaction filters can determine whether the actual blob value has to be read during compaction. Use a new `kUndetermined` in `CompactionFilter::Decision` to indicated that further action is necessary for compaction filter to make a decision. * Support compaction filters for the new implementation of BlobDB. Add `FilterBlobByKey()` to `CompactionFilter`. Subclasses can override this method so that compaction filters can determine whether the actual blob value has to be read during compaction. Use a new `kUndetermined` in `CompactionFilter::Decision` to indicated that further action is necessary for compaction filter to make a decision.
* Add support to extend retrieval of checksums for blob files from the MANIFEST when checkpointing. During backup, rocksdb can detect corruption in blob files during file copies. * Add support to extend retrieval of checksums for blob files from the MANIFEST when checkpointing. During backup, rocksdb can detect corruption in blob files during file copies.
* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into bakup space usage.
* Enable backward iteration on keys with user-defined timestamps. * Enable backward iteration on keys with user-defined timestamps.
## 6.18.0 (02/19/2021) ## 6.18.0 (02/19/2021)

@ -269,16 +269,35 @@ struct RestoreOptions {
: keep_log_files(_keep_log_files) {} : keep_log_files(_keep_log_files) {}
}; };
struct BackupFileInfo {
// File name and path relative to the backup_dir directory.
std::string relative_filename;
// Size of the file in bytes, not including filesystem overheads.
uint64_t size;
};
typedef uint32_t BackupID; typedef uint32_t BackupID;
struct BackupInfo { struct BackupInfo {
BackupID backup_id; BackupID backup_id;
// Creation time, according to GetCurrentTime
int64_t timestamp; int64_t timestamp;
// Total size in bytes (based on file payloads, not including filesystem
// overheads or backup meta file)
uint64_t size; uint64_t size;
// Number of backed up files, some of which might be shared with other
// backups. Does not include backup meta file.
uint32_t number_files; uint32_t number_files;
// Backup API user metadata
std::string app_metadata; std::string app_metadata;
// Backup file details, if requested
std::vector<BackupFileInfo> file_details;
BackupInfo() {} BackupInfo() {}
BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size, BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
@ -334,12 +353,15 @@ class BackupEngineReadOnly {
// Returns info about backups in backup_info // Returns info about backups in backup_info
// You can GetBackupInfo safely, even with other BackupEngine performing // You can GetBackupInfo safely, even with other BackupEngine performing
// backups on the same directory // backups on the same directory.
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0; // Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details.
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details = false) const = 0;
// Returns info about corrupt backups in corrupt_backups // Returns info about corrupt backups in corrupt_backups
virtual void GetCorruptedBackups( virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0; std::vector<BackupID>* corrupt_backup_ids) const = 0;
// Restoring DB from backup is NOT safe when there is another BackupEngine // Restoring DB from backup is NOT safe when there is another BackupEngine
// running that might call DeleteBackup() or PurgeOldBackups(). It is caller's // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
@ -457,11 +479,12 @@ class BackupEngine {
virtual void StopBackup() = 0; virtual void StopBackup() = 0;
// Returns info about backups in backup_info // Returns info about backups in backup_info
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0; virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details = false) const = 0;
// Returns info about corrupt backups in corrupt_backups // Returns info about corrupt backups in corrupt_backups
virtual void GetCorruptedBackups( virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0; std::vector<BackupID>* corrupt_backup_ids) const = 0;
// restore from backup with backup_id // restore from backup with backup_id
// IMPORTANT -- if options_.share_table_files == true, // IMPORTANT -- if options_.share_table_files == true,

@ -130,9 +130,11 @@ class BackupEngineImpl : public BackupEngine {
// The returned BackupInfos are in chronological order, which means the // The returned BackupInfos are in chronological order, which means the
// latest backup comes last. // latest backup comes last.
void GetBackupInfo(std::vector<BackupInfo>* backup_info) override; void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const override;
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) override; void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) const override;
using BackupEngine::RestoreDBFromBackup; using BackupEngine::RestoreDBFromBackup;
Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id, Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id,
@ -223,13 +225,13 @@ class BackupEngineImpl : public BackupEngine {
uint64_t GetSize() const { uint64_t GetSize() const {
return size_; return size_;
} }
uint32_t GetNumberFiles() { return static_cast<uint32_t>(files_.size()); } uint32_t GetNumberFiles() const {
return static_cast<uint32_t>(files_.size());
}
void SetSequenceNumber(uint64_t sequence_number) { void SetSequenceNumber(uint64_t sequence_number) {
sequence_number_ = sequence_number; sequence_number_ = sequence_number;
} }
uint64_t GetSequenceNumber() { uint64_t GetSequenceNumber() const { return sequence_number_; }
return sequence_number_;
}
const std::string& GetAppMetadata() const { return app_metadata_; } const std::string& GetAppMetadata() const { return app_metadata_; }
@ -241,9 +243,7 @@ class BackupEngineImpl : public BackupEngine {
Status Delete(bool delete_meta = true); Status Delete(bool delete_meta = true);
bool Empty() { bool Empty() const { return files_.empty(); }
return files_.empty();
}
std::shared_ptr<FileInfo> GetFile(const std::string& filename) const { std::shared_ptr<FileInfo> GetFile(const std::string& filename) const {
auto it = file_infos_->find(filename); auto it = file_infos_->find(filename);
@ -252,7 +252,7 @@ class BackupEngineImpl : public BackupEngine {
return it->second; return it->second;
} }
const std::vector<std::shared_ptr<FileInfo>>& GetFiles() { const std::vector<std::shared_ptr<FileInfo>>& GetFiles() const {
return files_; return files_;
} }
@ -1278,21 +1278,31 @@ Status BackupEngineImpl::DeleteBackupInternal(BackupID backup_id) {
return Status::OK(); return Status::OK();
} }
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info) { void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const {
assert(initialized_); assert(initialized_);
backup_info->reserve(backups_.size()); backup_info->reserve(backups_.size());
for (auto& backup : backups_) { for (auto& backup : backups_) {
if (!backup.second->Empty()) { const BackupMeta& meta = *backup.second;
backup_info->push_back(BackupInfo( if (!meta.Empty()) {
backup.first, backup.second->GetTimestamp(), backup.second->GetSize(), backup_info->push_back(BackupInfo(backup.first, meta.GetTimestamp(),
backup.second->GetNumberFiles(), backup.second->GetAppMetadata())); meta.GetSize(), meta.GetNumberFiles(),
meta.GetAppMetadata()));
if (include_file_details) {
auto& file_details = backup_info->back().file_details;
file_details.reserve(meta.GetFiles().size());
for (auto& file_ptr : meta.GetFiles()) {
BackupFileInfo& info = *file_details.emplace(file_details.end());
info.relative_filename = file_ptr->filename;
info.size = file_ptr->size;
}
}
} }
} }
} }
void void BackupEngineImpl::GetCorruptedBackups(
BackupEngineImpl::GetCorruptedBackups( std::vector<BackupID>* corrupt_backup_ids) const {
std::vector<BackupID>* corrupt_backup_ids) {
assert(initialized_); assert(initialized_);
corrupt_backup_ids->reserve(corrupt_backups_.size()); corrupt_backup_ids->reserve(corrupt_backups_.size());
for (auto& backup : corrupt_backups_) { for (auto& backup : corrupt_backups_) {
@ -2305,11 +2315,13 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
// The returned BackupInfos are in chronological order, which means the // The returned BackupInfos are in chronological order, which means the
// latest backup comes last. // latest backup comes last.
void GetBackupInfo(std::vector<BackupInfo>* backup_info) override { void GetBackupInfo(std::vector<BackupInfo>* backup_info,
backup_engine_->GetBackupInfo(backup_info); bool include_file_details) const override {
backup_engine_->GetBackupInfo(backup_info, include_file_details);
} }
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) override { void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) const override {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids); backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
} }

@ -709,6 +709,69 @@ class BackupableDBTest : public testing::Test {
void CloseBackupEngine() { backup_engine_.reset(nullptr); } void CloseBackupEngine() { backup_engine_.reset(nullptr); }
// cross-cutting test of GetBackupInfo
void AssertBackupInfoConsistency() {
std::vector<BackupInfo> backup_info;
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true);
std::map<std::string, uint64_t> file_sizes;
// Find the files that are supposed to be there
for (auto& backup : backup_info) {
uint64_t sum_for_backup = 0;
for (auto& file : backup.file_details) {
auto e = file_sizes.find(file.relative_filename);
if (e == file_sizes.end()) {
// fprintf(stderr, "Adding %s -> %u\n",
// file.relative_filename.c_str(), (unsigned)file.size);
file_sizes[file.relative_filename] = file.size;
} else {
ASSERT_EQ(file_sizes[file.relative_filename], file.size);
}
sum_for_backup += file.size;
}
ASSERT_EQ(backup.size, sum_for_backup);
}
std::vector<BackupID> corrupt_backup_ids;
backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
bool has_corrupt = corrupt_backup_ids.size() > 0;
// Compare with what's in backup dir
std::vector<std::string> child_dirs;
ASSERT_OK(
test_backup_env_->GetChildren(backupdir_ + "/private", &child_dirs));
for (auto& dir : child_dirs) {
dir = "private/" + dir;
}
child_dirs.push_back("shared"); // might not exist
child_dirs.push_back("shared_checksum"); // might not exist
for (auto& dir : child_dirs) {
std::vector<std::string> children;
test_backup_env_->GetChildren(backupdir_ + "/" + dir, &children)
.PermitUncheckedError();
// fprintf(stderr, "ls %s\n", (backupdir_ + "/" + dir).c_str());
for (auto& file : children) {
uint64_t size;
size = UINT64_MAX; // appease clang-analyze
std::string rel_file = dir + "/" + file;
// fprintf(stderr, "stat %s\n", (backupdir_ + "/" + rel_file).c_str());
ASSERT_OK(
test_backup_env_->GetFileSize(backupdir_ + "/" + rel_file, &size));
auto e = file_sizes.find(rel_file);
if (e == file_sizes.end()) {
// The only case in which we should find files not reported
ASSERT_TRUE(has_corrupt);
} else {
ASSERT_EQ(e->second, size);
file_sizes.erase(e);
}
}
}
// Everything should have been matched
ASSERT_EQ(file_sizes.size(), 0);
}
// restores backup backup_id and asserts the existence of // restores backup backup_id and asserts the existence of
// [start_exist, end_exist> and not-existence of // [start_exist, end_exist> and not-existence of
// [end_exist, end> // [end_exist, end>
@ -724,6 +787,9 @@ class BackupableDBTest : public testing::Test {
opened_backup_engine = true; opened_backup_engine = true;
OpenBackupEngine(); OpenBackupEngine();
} }
AssertBackupInfoConsistency();
// Now perform restore
if (backup_id > 0) { if (backup_id > 0) {
ASSERT_OK(backup_engine_->RestoreDBFromBackup(backup_id, dbname_, dbname_, ASSERT_OK(backup_engine_->RestoreDBFromBackup(backup_id, dbname_, dbname_,
restore_options)); restore_options));
@ -732,6 +798,7 @@ class BackupableDBTest : public testing::Test {
restore_options)); restore_options));
} }
DB* db = OpenDB(); DB* db = OpenDB();
// Check DB contents
AssertExists(db, start_exist, end_exist); AssertExists(db, start_exist, end_exist);
if (end != 0) { if (end != 0) {
AssertEmpty(db, end_exist, end); AssertEmpty(db, end_exist, end);

Loading…
Cancel
Save