From b55e2165bed986d2353ebd85494461edbab6920b Mon Sep 17 00:00:00 2001 From: Uddipta Maity Date: Fri, 1 Apr 2016 10:56:52 -0700 Subject: [PATCH] Rocksdb backup can store optional application specific metadata Summary: Rocksdb backup engine maintains metadata about backups in separate files. But, there was no way to add extra application specific data to it. Adding support for that. In some use cases, applications decide to restore a backup based on some metadata. This will help those cases to cheaply decide whether to restore or not. Test Plan: Added a unit test. Existing ones are passing Sample meta file for BinaryMetadata test- ``` 1459454043 0 metadata 6162630A64656600676869 2 private/1/MANIFEST-000001 crc32 1184723444 private/1/CURRENT crc32 3505765120 ``` Reviewers: sdong, ldemailly, andrewkr Reviewed By: andrewkr Subscribers: andrewkr, dhruba, ldemailly Differential Revision: https://reviews.facebook.net/D56007 --- include/rocksdb/utilities/backupable_db.h | 22 ++++++-- utilities/backupable/backupable_db.cc | 60 ++++++++++++++++++---- utilities/backupable/backupable_db_test.cc | 49 ++++++++++++++++++ 3 files changed, 117 insertions(+), 14 deletions(-) diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 06caa5bb0..f77de6749 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -143,13 +143,17 @@ struct BackupInfo { uint64_t size; uint32_t number_files; + std::string app_metadata; BackupInfo() {} BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size, - uint32_t _number_files) - : backup_id(_backup_id), timestamp(_timestamp), size(_size), - number_files(_number_files) {} + uint32_t _number_files, const std::string& _app_metadata) + : backup_id(_backup_id), + timestamp(_timestamp), + size(_size), + number_files(_number_files), + app_metadata(_app_metadata) {} }; class BackupStatistics { @@ -218,9 +222,17 @@ class BackupEngine { const BackupableDBOptions& options, BackupEngine** backup_engine_ptr); - virtual Status CreateNewBackup( - DB* db, bool flush_before_backup = false, + /// same as CreateNewBackup, but stores extra application metadata + virtual Status CreateNewBackupWithMetadata( + DB* db, const std::string& app_metadata, bool flush_before_backup = false, std::function progress_callback = []() {}) = 0; + + virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false, + std::function progress_callback = + []() {}) { + return CreateNewBackupWithMetadata(db, "", flush_before_backup, + progress_callback); + } virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0; virtual Status DeleteBackup(BackupID backup_id) = 0; virtual void StopBackup() = 0; diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index dd544871d..da6972d94 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -88,9 +88,10 @@ class BackupEngineImpl : public BackupEngine { BackupEngineImpl(Env* db_env, const BackupableDBOptions& options, bool read_only = false); ~BackupEngineImpl(); - Status CreateNewBackup(DB* db, bool flush_before_backup = false, - std::function progress_callback = []() { - }) override; + Status CreateNewBackupWithMetadata(DB* db, const std::string& app_metadata, + bool flush_before_backup = false, + std::function progress_callback = + []() {}) override; Status PurgeOldBackups(uint32_t num_backups_to_keep) override; Status DeleteBackup(BackupID backup_id) override; void StopBackup() override { @@ -166,6 +167,12 @@ class BackupEngineImpl : public BackupEngine { return sequence_number_; } + const std::string& GetAppMetadata() const { return app_metadata_; } + + void SetAppMetadata(const std::string& app_metadata) { + app_metadata_ = app_metadata; + } + Status AddFile(std::shared_ptr file_info); Status Delete(bool delete_meta = true); @@ -212,6 +219,7 @@ class BackupEngineImpl : public BackupEngine { // by clients uint64_t sequence_number_; uint64_t size_; + std::string app_metadata_; std::string const meta_filename_; // files with relative paths (without "/" prefix!!) std::vector> files_; @@ -464,6 +472,7 @@ class BackupEngineImpl : public BackupEngine { size_t copy_file_buffer_size_; bool read_only_; BackupStatistics backup_statistics_; + static const size_t kMaxAppMetaSize = 1024 * 1024; // 1MB }; Status BackupEngine::Open(Env* env, const BackupableDBOptions& options, @@ -641,10 +650,14 @@ Status BackupEngineImpl::Initialize() { return Status::OK(); } -Status BackupEngineImpl::CreateNewBackup( - DB* db, bool flush_before_backup, std::function progress_callback) { +Status BackupEngineImpl::CreateNewBackupWithMetadata( + DB* db, const std::string& app_metadata, bool flush_before_backup, + std::function progress_callback) { assert(initialized_); assert(!read_only_); + if (app_metadata.size() > kMaxAppMetaSize) { + return Status::InvalidArgument("App metadata too large"); + } Status s; std::vector live_files; VectorLogPtr live_wal_files; @@ -678,6 +691,7 @@ Status BackupEngineImpl::CreateNewBackup( auto& new_backup = ret.first->second; new_backup->RecordTimestamp(); new_backup->SetSequenceNumber(sequence_number); + new_backup->SetAppMetadata(app_metadata); auto start_backup = backup_env_-> NowMicros(); @@ -957,10 +971,9 @@ void BackupEngineImpl::GetBackupInfo(std::vector* backup_info) { backup_info->reserve(backups_.size()); for (auto& backup : backups_) { if (!backup.second->Empty()) { - backup_info->push_back(BackupInfo( - backup.first, backup.second->GetTimestamp(), - backup.second->GetSize(), - backup.second->GetNumberFiles())); + backup_info->push_back(BackupInfo( + backup.first, backup.second->GetTimestamp(), backup.second->GetSize(), + backup.second->GetNumberFiles(), backup.second->GetAppMetadata())); } } } @@ -1562,9 +1575,12 @@ Status BackupEngineImpl::BackupMeta::Delete(bool delete_meta) { return s; } +Slice kMetaDataPrefix("metadata "); + // each backup meta file is of the format: // // +// (optional) // // // @@ -1597,6 +1613,18 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( data.remove_prefix(next - data.data() + 1); // +1 for '\n' sequence_number_ = strtoull(data.data(), &next, 10); data.remove_prefix(next - data.data() + 1); // +1 for '\n' + + if (data.starts_with(kMetaDataPrefix)) { + // app metadata present + data.remove_prefix(kMetaDataPrefix.size()); + Slice hex_encoded_metadata = GetSliceUntil(&data, '\n'); + bool decode_success = hex_encoded_metadata.DecodeHex(&app_metadata_); + if (!decode_success) { + return Status::Corruption( + "Failed to decode stored hex encoded app metadata"); + } + } + num_files = static_cast(strtoul(data.data(), &next, 10)); data.remove_prefix(next - data.data() + 1); // +1 for '\n' @@ -1678,6 +1706,20 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { len += snprintf(buf.get(), buf_size, "%" PRId64 "\n", timestamp_); len += snprintf(buf.get() + len, buf_size - len, "%" PRIu64 "\n", sequence_number_); + if (!app_metadata_.empty()) { + std::string hex_encoded_metadata = + Slice(app_metadata_).ToString(/* hex */ true); + if (hex_encoded_metadata.size() + kMetaDataPrefix.size() + 1 > + (size_t)(buf_size - len)) { + return Status::Corruption("Buffer too small to fit backup metadata"); + } + memcpy(buf.get() + len, kMetaDataPrefix.data(), kMetaDataPrefix.size()); + len += kMetaDataPrefix.size(); + memcpy(buf.get() + len, hex_encoded_metadata.data(), + hex_encoded_metadata.size()); + len += hex_encoded_metadata.size(); + buf[len++] = '\n'; + } len += snprintf(buf.get() + len, buf_size - len, "%" ROCKSDB_PRIszt "\n", files_.size()); for (const auto& file : files_) { diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 0a23f4df0..d79746a9f 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -1348,6 +1348,55 @@ TEST_F(BackupableDBTest, Issue921Test) { delete backup_engine; } +TEST_F(BackupableDBTest, BackupWithMetadata) { + const int keys_iteration = 5000; + OpenDBAndBackupEngine(true); + // create five backups + for (int i = 0; i < 5; ++i) { + const std::string metadata = std::to_string(i); + FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1)); + ASSERT_OK( + backup_engine_->CreateNewBackupWithMetadata(db_.get(), metadata, true)); + } + CloseDBAndBackupEngine(); + + OpenDBAndBackupEngine(); + std::vector backup_infos; + backup_engine_->GetBackupInfo(&backup_infos); + ASSERT_EQ(5, backup_infos.size()); + for (int i = 0; i < 5; i++) { + ASSERT_EQ(std::to_string(i), backup_infos[i].app_metadata); + } + CloseDBAndBackupEngine(); + DestroyDB(dbname_, Options()); +} + +TEST_F(BackupableDBTest, BinaryMetadata) { + OpenDBAndBackupEngine(true); + std::string binaryMetadata = "abc\ndef"; + binaryMetadata.push_back('\0'); + binaryMetadata.append("ghi"); + ASSERT_OK( + backup_engine_->CreateNewBackupWithMetadata(db_.get(), binaryMetadata)); + CloseDBAndBackupEngine(); + + OpenDBAndBackupEngine(); + std::vector backup_infos; + backup_engine_->GetBackupInfo(&backup_infos); + ASSERT_EQ(1, backup_infos.size()); + ASSERT_EQ(binaryMetadata, backup_infos[0].app_metadata); + CloseDBAndBackupEngine(); + DestroyDB(dbname_, Options()); +} + +TEST_F(BackupableDBTest, MetadataTooLarge) { + OpenDBAndBackupEngine(true); + std::string largeMetadata(1024 * 1024 + 1, 0); + ASSERT_NOK( + backup_engine_->CreateNewBackupWithMetadata(db_.get(), largeMetadata)); + CloseDBAndBackupEngine(); + DestroyDB(dbname_, Options()); +} } // anon namespace } // namespace rocksdb