Misc Backup API enhancements (#8170)

Summary:
* CreateNewBackup(WithMetadata) returning the BackupID of new backup
through optional new output param. This is especially useful with the
new mutithreading support, so that you can transactionally determine the
ID of a backup you create.
* GetBackupInfo / GetLatestBackupInfo for individual backups, so that
you don't have to comb through a vector of backups if you don't want to.

Updated HISTORY.md (including re: BlobDB support as new feature)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8170

Test Plan:
Added test logic to existing tests, to minimize increase in
cost of running tests

Reviewed By: zhichao-cao

Differential Revision: D27680410

Pulled By: pdillinger

fbshipit-source-id: 1fc45b73d81aae293ccd4a43d9583d7fd915d3eb
main
Peter Dillinger 4 years ago committed by Facebook GitHub Bot
parent 8972dd1ffa
commit bb75092574
  1. 4
      HISTORY.md
  2. 26
      include/rocksdb/utilities/backupable_db.h
  3. 103
      utilities/backupable/backupable_db.cc
  4. 81
      utilities/backupable/backupable_db_test.cc

@ -18,11 +18,13 @@
### Public API change ### Public API change
* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. * Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead.
* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace * Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace
* For new integrated BlobDB, add support for blob files for backup/restore like table files. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up.
* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`. * Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`.
### New Features ### New Features
* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. * Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true.
* Added BackupEngine support for integrated BlobDB, with blob files shared between backups when table files are shared. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up.
* Added an optional output parameter to BackupEngine::CreateNewBackup(WithMetadata) to return the BackupID of the new backup.
* Added BackupEngine::GetBackupInfo / GetLatestBackupInfo for querying individual backups.
## 6.19.0 (03/21/2021) ## 6.19.0 (03/21/2021)
### Bug Fixes ### Bug Fixes

@ -371,6 +371,21 @@ class BackupEngineReadOnlyBase {
public: public:
virtual ~BackupEngineReadOnlyBase() {} virtual ~BackupEngineReadOnlyBase() {}
// Returns info about the latest good backup in backup_info, or NotFound
// no good backup exists.
// Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more.
virtual Status GetLatestBackupInfo(
BackupInfo* backup_info, bool include_file_details = false) const = 0;
// Returns info about a specific backup in backup_info, or NotFound
// or Corruption status if the requested backup id does not exist or is
// known corrupt.
// Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more.
virtual Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const = 0;
// Returns info about backups in backup_info // Returns info about backups in backup_info
// Setting include_file_details=true provides information about each // Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more. // backed-up file in BackupInfo::file_details and more.
@ -439,7 +454,7 @@ class BackupEngineAppendOnlyBase {
// same as CreateNewBackup, but stores extra application metadata. // same as CreateNewBackup, but stores extra application metadata.
virtual Status CreateNewBackupWithMetadata( virtual Status CreateNewBackupWithMetadata(
const CreateBackupOptions& options, DB* db, const CreateBackupOptions& options, DB* db,
const std::string& app_metadata) = 0; const std::string& app_metadata, BackupID* new_backup_id = nullptr) = 0;
// keep here for backward compatibility. // keep here for backward compatibility.
virtual Status CreateNewBackupWithMetadata( virtual Status CreateNewBackupWithMetadata(
@ -451,9 +466,12 @@ class BackupEngineAppendOnlyBase {
return CreateNewBackupWithMetadata(options, db, app_metadata); return CreateNewBackupWithMetadata(options, db, app_metadata);
} }
// Captures the state of the database by creating a new (latest) backup // Captures the state of the database by creating a new (latest) backup.
virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db) { // On success (OK status), the BackupID of the new backup is saved to
return CreateNewBackupWithMetadata(options, db, ""); // *new_backup_id when not nullptr.
virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db,
BackupID* new_backup_id = nullptr) {
return CreateNewBackupWithMetadata(options, db, "", new_backup_id);
} }
// keep here for backward compatibility. // keep here for backward compatibility.

@ -129,7 +129,8 @@ class BackupEngineImpl {
~BackupEngineImpl(); ~BackupEngineImpl();
Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db, Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db,
const std::string& app_metadata); const std::string& app_metadata,
BackupID* new_backup_id_ptr);
Status PurgeOldBackups(uint32_t num_backups_to_keep); Status PurgeOldBackups(uint32_t num_backups_to_keep);
@ -144,6 +145,9 @@ class BackupEngineImpl {
void GetBackupInfo(std::vector<BackupInfo>* backup_info, void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const; bool include_file_details) const;
Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const;
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) const; void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) const;
Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id, Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id,
@ -460,6 +464,10 @@ class BackupEngineImpl {
mutable std::shared_ptr<Env> env_for_open_; mutable std::shared_ptr<Env> env_for_open_;
}; // BackupMeta }; // BackupMeta
void SetBackupInfoFromBackupMeta(BackupID id, const BackupMeta& meta,
BackupInfo* backup_info,
bool include_file_details) const;
inline std::string GetAbsolutePath( inline std::string GetAbsolutePath(
const std::string &relative_path = "") const { const std::string &relative_path = "") const {
assert(relative_path.size() == 0 || relative_path[0] != '/'); assert(relative_path.size() == 0 || relative_path[0] != '/');
@ -802,9 +810,11 @@ class BackupEngineImplThreadSafe : public BackupEngine,
using BackupEngine::CreateNewBackupWithMetadata; using BackupEngine::CreateNewBackupWithMetadata;
Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db, Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db,
const std::string& app_metadata) override { const std::string& app_metadata,
BackupID* new_backup_id) override {
WriteLock lock(&mutex_); WriteLock lock(&mutex_);
return impl_.CreateNewBackupWithMetadata(options, db, app_metadata); return impl_.CreateNewBackupWithMetadata(options, db, app_metadata,
new_backup_id);
} }
Status PurgeOldBackups(uint32_t num_backups_to_keep) override { Status PurgeOldBackups(uint32_t num_backups_to_keep) override {
@ -827,6 +837,19 @@ class BackupEngineImplThreadSafe : public BackupEngine,
return impl_.GarbageCollect(); return impl_.GarbageCollect();
} }
Status GetLatestBackupInfo(BackupInfo* backup_info,
bool include_file_details = false) const override {
ReadLock lock(&mutex_);
return impl_.GetBackupInfo(kLatestBackupIDMarker, backup_info,
include_file_details);
}
Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const override {
ReadLock lock(&mutex_);
return impl_.GetBackupInfo(backup_id, backup_info, include_file_details);
}
void GetBackupInfo(std::vector<BackupInfo>* backup_info, void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const override { bool include_file_details) const override {
ReadLock lock(&mutex_); ReadLock lock(&mutex_);
@ -1181,8 +1204,8 @@ Status BackupEngineImpl::Initialize() {
} }
Status BackupEngineImpl::CreateNewBackupWithMetadata( Status BackupEngineImpl::CreateNewBackupWithMetadata(
const CreateBackupOptions& options, DB* db, const CreateBackupOptions& options, DB* db, const std::string& app_metadata,
const std::string& app_metadata) { BackupID* new_backup_id_ptr) {
assert(initialized_); assert(initialized_);
assert(!read_only_); assert(!read_only_);
if (app_metadata.size() > kMaxAppMetaSize) { if (app_metadata.size() > kMaxAppMetaSize) {
@ -1417,6 +1440,9 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
// in the LATEST_BACKUP file // in the LATEST_BACKUP file
latest_backup_id_ = new_backup_id; latest_backup_id_ = new_backup_id;
latest_valid_backup_id_ = new_backup_id; latest_valid_backup_id_ = new_backup_id;
if (new_backup_id_ptr) {
*new_backup_id_ptr = new_backup_id;
}
ROCKS_LOG_INFO(options_.info_log, "Backup DONE. All is good"); ROCKS_LOG_INFO(options_.info_log, "Backup DONE. All is good");
// backup_speed is in byte/second // backup_speed is in byte/second
@ -1546,30 +1572,61 @@ Status BackupEngineImpl::DeleteBackupNoGC(BackupID backup_id) {
return Status::OK(); return Status::OK();
} }
void BackupEngineImpl::SetBackupInfoFromBackupMeta(
BackupID id, const BackupMeta& meta, BackupInfo* backup_info,
bool include_file_details) const {
*backup_info = BackupInfo(id, meta.GetTimestamp(), meta.GetSize(),
meta.GetNumberFiles(), meta.GetAppMetadata());
if (include_file_details) {
auto& file_details = backup_info->file_details;
file_details.reserve(meta.GetFiles().size());
for (auto& file_ptr : meta.GetFiles()) {
BackupFileInfo& finfo = *file_details.emplace(file_details.end());
finfo.relative_filename = file_ptr->filename;
finfo.size = file_ptr->size;
}
backup_info->name_for_open = GetAbsolutePath(GetPrivateFileRel(id));
backup_info->name_for_open.pop_back(); // remove trailing '/'
backup_info->env_for_open = meta.GetEnvForOpen();
}
}
Status BackupEngineImpl::GetBackupInfo(BackupID backup_id,
BackupInfo* backup_info,
bool include_file_details) const {
assert(initialized_);
if (backup_id == kLatestBackupIDMarker) {
// Note: Read latest_valid_backup_id_ inside of lock
backup_id = latest_valid_backup_id_;
}
auto corrupt_itr = corrupt_backups_.find(backup_id);
if (corrupt_itr != corrupt_backups_.end()) {
return Status::Corruption(corrupt_itr->second.first.ToString());
}
auto backup_itr = backups_.find(backup_id);
if (backup_itr == backups_.end()) {
return Status::NotFound("Backup not found");
}
auto& backup = backup_itr->second;
if (backup->Empty()) {
return Status::NotFound("Backup not found");
}
SetBackupInfoFromBackupMeta(backup_id, *backup, backup_info,
include_file_details);
return Status::OK();
}
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info, void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const { bool include_file_details) const {
assert(initialized_); assert(initialized_);
backup_info->reserve(backups_.size()); backup_info->resize(backups_.size());
size_t i = 0;
for (auto& backup : backups_) { for (auto& backup : backups_) {
const BackupMeta& meta = *backup.second; const BackupMeta& meta = *backup.second;
if (!meta.Empty()) { if (!meta.Empty()) {
backup_info->push_back(BackupInfo(backup.first, meta.GetTimestamp(), SetBackupInfoFromBackupMeta(backup.first, meta, &backup_info->at(i++),
meta.GetSize(), meta.GetNumberFiles(), include_file_details);
meta.GetAppMetadata()));
BackupInfo& binfo = backup_info->back();
if (include_file_details) {
auto& file_details = binfo.file_details;
file_details.reserve(meta.GetFiles().size());
for (auto& file_ptr : meta.GetFiles()) {
BackupFileInfo& finfo = *file_details.emplace(file_details.end());
finfo.relative_filename = file_ptr->filename;
finfo.size = file_ptr->size;
}
binfo.name_for_open =
GetAbsolutePath(GetPrivateFileRel(binfo.backup_id));
binfo.name_for_open.pop_back(); // remove trailing '/'
binfo.env_for_open = meta.GetEnvForOpen();
}
} }
} }
} }

@ -2646,17 +2646,17 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ false)); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ false));
db_.reset(); // CloseDB db_.reset(); // CloseDB
DestroyDB(dbname_, options_); DestroyDB(dbname_, options_);
std::vector<BackupInfo> backup_info; BackupInfo backup_info;
// First, check that we get empty fields without include_file_details // First, check that we get empty fields without include_file_details
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ false); ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
ASSERT_EQ(backup_info.size(), 2); /*with file details*/ false));
ASSERT_EQ(backup_info[0].name_for_open, ""); ASSERT_EQ(backup_info.name_for_open, "");
ASSERT_FALSE(backup_info[0].env_for_open); ASSERT_FALSE(backup_info.env_for_open);
// Now for the real test // Now for the real test
backup_info.clear(); backup_info = BackupInfo();
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true); ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
ASSERT_EQ(backup_info.size(), 2); /*with file details*/ true));
// Caution: DBOptions only holds a raw pointer to Env, so something else // Caution: DBOptions only holds a raw pointer to Env, so something else
// must keep it alive. // must keep it alive.
@ -2668,9 +2668,9 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
opts.create_if_missing = false; opts.create_if_missing = false;
opts.info_log.reset(); opts.info_log.reset();
opts.env = backup_info[0].env_for_open.get(); opts.env = backup_info.env_for_open.get();
std::string name = backup_info[0].name_for_open; std::string name = backup_info.name_for_open;
backup_info.clear(); backup_info = BackupInfo();
ASSERT_OK(DB::OpenForReadOnly(opts, name, &db)); ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
AssertExists(db, 0, 100); AssertExists(db, 0, 100);
@ -2680,13 +2680,13 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
db = nullptr; db = nullptr;
// Case 2: Keeping BackupInfo alive rather than BackupEngine also suffices // Case 2: Keeping BackupInfo alive rather than BackupEngine also suffices
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true); ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 2U, &backup_info,
ASSERT_EQ(backup_info.size(), 2); /*with file details*/ true));
CloseBackupEngine(); CloseBackupEngine();
opts.create_if_missing = true; // check also OK (though pointless) opts.create_if_missing = true; // check also OK (though pointless)
opts.env = backup_info[1].env_for_open.get(); opts.env = backup_info.env_for_open.get();
name = backup_info[1].name_for_open; name = backup_info.name_for_open;
// Note: keeping backup_info[1] alive // Note: keeping backup_info alive
ASSERT_OK(DB::OpenForReadOnly(opts, name, &db)); ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
AssertExists(db, 0, 200); AssertExists(db, 0, 200);
@ -2848,17 +2848,32 @@ TEST_F(BackupableDBTest, BackupWithMetadata) {
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
const std::string metadata = std::to_string(i); const std::string metadata = std::to_string(i);
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK( // Here also test CreateNewBackupWithMetadata with CreateBackupOptions
backup_engine_->CreateNewBackupWithMetadata(db_.get(), metadata, true)); // and outputting saved BackupID.
CreateBackupOptions opts;
opts.flush_before_backup = true;
BackupID new_id = 0;
ASSERT_OK(backup_engine_->CreateNewBackupWithMetadata(opts, db_.get(),
metadata, &new_id));
ASSERT_EQ(new_id, static_cast<BackupID>(i + 1));
} }
CloseDBAndBackupEngine(); CloseDBAndBackupEngine();
OpenDBAndBackupEngine(); OpenDBAndBackupEngine();
std::vector<BackupInfo> backup_infos; { // Verify in bulk BackupInfo
backup_engine_->GetBackupInfo(&backup_infos); std::vector<BackupInfo> backup_infos;
ASSERT_EQ(5, backup_infos.size()); backup_engine_->GetBackupInfo(&backup_infos);
ASSERT_EQ(5, backup_infos.size());
for (int i = 0; i < 5; i++) {
ASSERT_EQ(std::to_string(i), backup_infos[i].app_metadata);
}
}
// Also verify in individual BackupInfo
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
ASSERT_EQ(std::to_string(i), backup_infos[i].app_metadata); BackupInfo backup_info;
ASSERT_OK(backup_engine_->GetBackupInfo(static_cast<BackupID>(i + 1),
&backup_info));
ASSERT_EQ(std::to_string(i), backup_info.app_metadata);
} }
CloseDBAndBackupEngine(); CloseDBAndBackupEngine();
DestroyDB(dbname_, options_); DestroyDB(dbname_, options_);
@ -3285,6 +3300,8 @@ TEST_F(BackupableDBTest, CreateWhenLatestBackupCorrupted) {
// succeed even when latest backup is corrupted. // succeed even when latest backup is corrupted.
const int kNumKeys = 5000; const int kNumKeys = 5000;
OpenDBAndBackupEngine(true /* destroy_old_data */); OpenDBAndBackupEngine(true /* destroy_old_data */);
BackupInfo backup_info;
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
FillDB(db_.get(), 0 /* from */, kNumKeys); FillDB(db_.get(), 0 /* from */, kNumKeys);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
true /* flush_before_backup */)); true /* flush_before_backup */));
@ -3293,12 +3310,26 @@ TEST_F(BackupableDBTest, CreateWhenLatestBackupCorrupted) {
CloseDBAndBackupEngine(); CloseDBAndBackupEngine();
OpenDBAndBackupEngine(); OpenDBAndBackupEngine();
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
true /* flush_before_backup */)); true /* flush_before_backup */));
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).ok());
ASSERT_EQ(2, backup_info.backup_id);
std::vector<BackupInfo> backup_infos; std::vector<BackupInfo> backup_infos;
backup_engine_->GetBackupInfo(&backup_infos); backup_engine_->GetBackupInfo(&backup_infos);
ASSERT_EQ(1, backup_infos.size()); ASSERT_EQ(1, backup_infos.size());
ASSERT_EQ(2, backup_infos[0].backup_id); ASSERT_EQ(2, backup_infos[0].backup_id);
// Verify individual GetBackupInfo by ID
ASSERT_TRUE(backup_engine_->GetBackupInfo(0U, &backup_info).IsNotFound());
ASSERT_TRUE(backup_engine_->GetBackupInfo(1U, &backup_info).IsCorruption());
ASSERT_TRUE(backup_engine_->GetBackupInfo(2U, &backup_info).ok());
ASSERT_TRUE(backup_engine_->GetBackupInfo(3U, &backup_info).IsNotFound());
ASSERT_TRUE(
backup_engine_->GetBackupInfo(999999U, &backup_info).IsNotFound());
} }
TEST_F(BackupableDBTest, WriteOnlyEngineNoSharedFileDeletion) { TEST_F(BackupableDBTest, WriteOnlyEngineNoSharedFileDeletion) {
@ -3456,7 +3487,11 @@ TEST_F(BackupableDBTest, BackgroundThreadCpuPriority) {
CreateBackupOptions options; CreateBackupOptions options;
options.decrease_background_thread_cpu_priority = true; options.decrease_background_thread_cpu_priority = true;
options.background_thread_cpu_priority = CpuPriority::kIdle; options.background_thread_cpu_priority = CpuPriority::kIdle;
ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
// Also check output backup_id with CreateNewBackup
BackupID new_id = 0;
ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get(), &new_id));
ASSERT_EQ(new_id, 5U);
ASSERT_EQ(priority, CpuPriority::kNormal); ASSERT_EQ(priority, CpuPriority::kNormal);
} }

Loading…
Cancel
Save