From f8e90e8753c70f06d22a6e2669349e97e18d7bff Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 1 Mar 2016 19:33:33 -0800 Subject: [PATCH] Get file attributes in bulk for VerifyBackup and CreateNewBackup Summary: For VerifyBackup(), backup files can be spread across "shared/", "shared_checksum/", and "private/" subdirectories, so we have to bulk get all three. For CreateNewBackup(), we make two separate bulk calls: one for the data files and one for WAL files. There is also a new helper function, ExtendPathnameToSizeBytes(), that translates the file attributes vector to a map. I decided to leave GetChildrenFileAttributes()'s (from D53781) return type as vector to keep it consistent with GetChildren(). Depends on D53781. Test Plan: verified relevant unit tests $ ./backupable_db_test Reviewers: IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D53919 --- utilities/backupable/backupable_db.cc | 147 +++++++++++++++------ utilities/backupable/backupable_db_test.cc | 22 +++ 2 files changed, 126 insertions(+), 43 deletions(-) diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 024b789ca..63665e9a6 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -117,6 +117,12 @@ class BackupEngineImpl : public BackupEngine { private: void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0); + // Extends the "result" map with pathname->size mappings for the contents of + // "dir". Pathnames are prefixed with "dir". + Status InsertPathnameToSizeBytes( + const std::string& dir, + std::unordered_map* result); + struct FileInfo { FileInfo(const std::string& fname, uint64_t sz, uint32_t checksum) : refs(0), filename(fname), size(sz), checksum_value(checksum) {} @@ -179,8 +185,10 @@ class BackupEngineImpl : public BackupEngine { return files_; } - Status LoadFromFile(const std::string& backup_dir, - bool use_size_in_file_name); + // @param abs_path_to_size Pre-fetched file sizes (bytes). + Status LoadFromFile( + const std::string& backup_dir, bool use_size_in_file_name, + const std::unordered_map& abs_path_to_size); Status StoreToFile(bool sync); std::string GetInfoString() { @@ -427,7 +435,7 @@ class BackupEngineImpl : public BackupEngine { std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, const std::string& fname, // starts with "/" - RateLimiter* rate_limiter, uint64_t size_limit = 0, + RateLimiter* rate_limiter, uint64_t size_bytes, uint64_t size_limit = 0, bool shared_checksum = false, std::function progress_callback = []() {}, const std::string& contents = std::string()); @@ -573,10 +581,19 @@ Status BackupEngineImpl::Initialize() { return s; } } else { // Load data from storage + std::unordered_map abs_path_to_size; + for (const auto& rel_dir : + {GetSharedFileRel(), GetSharedFileWithChecksumRel()}) { + const auto abs_dir = GetAbsolutePath(rel_dir); + InsertPathnameToSizeBytes(abs_dir, &abs_path_to_size); + } // load the backups if any for (auto& backup : backups_) { + InsertPathnameToSizeBytes( + GetAbsolutePath(GetPrivateFileRel(backup.first)), &abs_path_to_size); Status s = backup.second->LoadFromFile( - options_.backup_dir, options_.use_file_size_in_file_name); + options_.backup_dir, options_.use_file_size_in_file_name, + abs_path_to_size); if (!s.ok()) { Log(options_.info_log, "Backup %u corrupted -- %s", backup.first, s.ToString().c_str()); @@ -685,6 +702,12 @@ Status BackupEngineImpl::CreateNewBackup( std::unordered_set live_dst_paths; live_dst_paths.reserve(live_files.size() + live_wal_files.size()); + // Pre-fetch sizes for data files + std::unordered_map data_path_to_size; + if (s.ok()) { + s = InsertPathnameToSizeBytes(db->GetName(), &data_path_to_size); + } + std::vector backup_items_to_finish; // Add a CopyOrCreateWorkItem to the channel for each live file std::string manifest_fname, current_fname; @@ -709,13 +732,19 @@ Status BackupEngineImpl::CreateNewBackup( manifest_fname = live_files[i]; } + auto data_path_to_size_iter = + data_path_to_size.find(db->GetName() + live_files[i]); + uint64_t size_bytes = data_path_to_size_iter == data_path_to_size.end() + ? port::kMaxUint64 + : data_path_to_size_iter->second; + // rules: // * if it's kTableFile, then it's shared // * if it's kDescriptorFile, limit the size to manifest_file_size s = AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, options_.share_table_files && type == kTableFile, db->GetName(), - live_files[i], rate_limiter.get(), + live_files[i], rate_limiter.get(), size_bytes, (type == kDescriptorFile) ? manifest_file_size : 0, options_.share_files_with_checksum && type == kTableFile, progress_callback); @@ -725,21 +754,37 @@ Status BackupEngineImpl::CreateNewBackup( s = AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, false /* shared */, "" /* src_dir */, CurrentFileName(""), - rate_limiter.get(), 0 /* size_limit */, false /* shared_checksum */, - progress_callback, manifest_fname.substr(1) + "\n"); + rate_limiter.get(), manifest_fname.size(), 0 /* size_limit */, + false /* shared_checksum */, progress_callback, + manifest_fname.substr(1) + "\n"); } + + // Pre-fetch sizes for WAL files + std::unordered_map wal_path_to_size; + if (s.ok()) { + if (db->GetOptions().wal_dir != "") { + s = InsertPathnameToSizeBytes(db->GetOptions().wal_dir, + &wal_path_to_size); + } else { + wal_path_to_size = std::move(data_path_to_size); + } + } + // Add a CopyOrCreateWorkItem to the channel for each WAL file for (size_t i = 0; s.ok() && i < live_wal_files.size(); ++i) { + auto wal_path_to_size_iter = + wal_path_to_size.find(live_wal_files[i]->PathName()); + uint64_t size_bytes = wal_path_to_size_iter == wal_path_to_size.end() + ? port::kMaxUint64 + : wal_path_to_size_iter->second; if (live_wal_files[i]->Type() == kAliveLogFile) { // we only care about live log files // copy the file into backup_dir/files// - s = AddBackupFileWorkItem(live_dst_paths, - backup_items_to_finish, - new_backup_id, - false, /* not shared */ + s = AddBackupFileWorkItem(live_dst_paths, backup_items_to_finish, + new_backup_id, false, /* not shared */ db->GetOptions().wal_dir, live_wal_files[i]->PathName(), - rate_limiter.get()); + rate_limiter.get(), size_bytes); } } @@ -1070,21 +1115,20 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { Log(options_.info_log, "Verifying backup id %u\n", backup_id); - uint64_t size; - Status result; - std::string file_path; + std::unordered_map curr_abs_path_to_size; + for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(), + GetSharedFileWithChecksumRel()}) { + const auto abs_dir = GetAbsolutePath(rel_dir); + InsertPathnameToSizeBytes(abs_dir, &curr_abs_path_to_size); + } + for (const auto& file_info : backup->GetFiles()) { - const std::string& file = file_info->filename; - file_path = GetAbsolutePath(file); - result = backup_env_->FileExists(file_path); - if (!result.ok()) { - return result; + const auto abs_path = GetAbsolutePath(file_info->filename); + if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) { + return Status::NotFound("File missing: " + abs_path); } - result = backup_env_->GetFileSize(file_path, &size); - if (!result.ok()) { - return result; - } else if (size != file_info->size) { - return Status::Corruption("File corrupted: " + file); + if (file_info->size != curr_abs_path_to_size[abs_path]) { + return Status::Corruption("File corrupted: " + abs_path); } } return Status::OK(); @@ -1219,30 +1263,29 @@ Status BackupEngineImpl::AddBackupFileWorkItem( std::unordered_set& live_dst_paths, std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, - const std::string& fname, RateLimiter* rate_limiter, uint64_t size_limit, - bool shared_checksum, std::function progress_callback, - const std::string& contents) { + const std::string& fname, RateLimiter* rate_limiter, uint64_t size_bytes, + uint64_t size_limit, bool shared_checksum, + std::function progress_callback, const std::string& contents) { assert(!fname.empty() && fname[0] == '/'); assert(contents.empty() != src_dir.empty()); std::string dst_relative = fname.substr(1); std::string dst_relative_tmp; Status s; - uint64_t size; uint32_t checksum_value = 0; if (shared && shared_checksum) { // add checksum and file length to the file name s = CalculateChecksum(src_dir + fname, db_env_, size_limit, &checksum_value); - if (s.ok()) { - s = db_env_->GetFileSize(src_dir + fname, &size); - } if (!s.ok()) { - return s; + return s; + } + if (size_bytes == port::kMaxUint64) { + return Status::NotFound("File missing: " + src_dir + fname); } - dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_value, - size); + dst_relative = + GetSharedFileWithChecksum(dst_relative, checksum_value, size_bytes); dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true); dst_relative = GetSharedFileWithChecksumRel(dst_relative, false); } else if (shared) { @@ -1282,7 +1325,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( if (shared_checksum) { Log(options_.info_log, "%s already present, with checksum %u and size %" PRIu64, - fname.c_str(), checksum_value, size); + fname.c_str(), checksum_value, size_bytes); } else if (backuped_file_infos_.find(dst_relative) == backuped_file_infos_.end() && !same_path) { // file already exists, but it's not referenced by any backup. overwrite @@ -1295,7 +1338,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem( backup_env_->DeleteFile(dst_path); } else { // the file is present and referenced by a backup - db_env_->GetFileSize(src_dir + fname, &size); // Ignore error Log(options_.info_log, "%s already present, calculate checksum", fname.c_str()); s = CalculateChecksum(src_dir + fname, db_env_, size_limit, @@ -1324,7 +1366,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item)); CopyOrCreateResult result; result.status = s; - result.size = size; + result.size = size_bytes; result.checksum_value = checksum_value; promise_result.set_value(std::move(result)); } @@ -1390,6 +1432,22 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir, } } +Status BackupEngineImpl::InsertPathnameToSizeBytes( + const std::string& dir, std::unordered_map* result) { + assert(result != nullptr); + std::vector files_attrs; + Status status = backup_env_->GetChildrenFileAttributes(dir, &files_attrs); + if (!status.ok()) { + return status; + } + const bool slash_needed = dir.empty() || dir.back() != '/'; + for (const auto& file_attrs : files_attrs) { + result->emplace(dir + (slash_needed ? "/" : "") + file_attrs.name, + file_attrs.size_bytes); + } + return Status::OK(); +} + Status BackupEngineImpl::GarbageCollect() { assert(!read_only_); Log(options_.info_log, "Starting garbage collection"); @@ -1561,8 +1619,9 @@ bool TEST_GetFileSizeFromBackupFileName(const std::string full_name, // // // ... -Status BackupEngineImpl::BackupMeta::LoadFromFile(const std::string& backup_dir, - bool use_size_in_file_name) { +Status BackupEngineImpl::BackupMeta::LoadFromFile( + const std::string& backup_dir, bool use_size_in_file_name, + const std::unordered_map& abs_path_to_size) { assert(Empty()); Status s; unique_ptr backup_meta_file; @@ -1606,9 +1665,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(const std::string& backup_dir, } else { if (!use_size_in_file_name || !GetFileSizeFromBackupFileName(filename, &size)) { - s = env_->GetFileSize(backup_dir + "/" + filename, &size); - if (!s.ok()) { - return s; + std::string abs_path = backup_dir + "/" + filename; + try { + size = abs_path_to_size.at(abs_path); + } catch (std::out_of_range& e) { + return Status::NotFound("Size missing for pathname: " + abs_path); } } } diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 95810533e..0379cdd07 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -232,6 +232,23 @@ class TestEnv : public EnvWrapper { return EnvWrapper::GetChildren(dir, r); } + // Some test cases do not actually create the test files (e.g., see + // DummyDB::live_files_) - for those cases, we mock those files' attributes + // so CreateNewBackup() can get their attributes. + void SetFilenamesForMockedAttrs(const std::vector& filenames) { + filenames_for_mocked_attrs_ = filenames; + } + Status GetChildrenFileAttributes( + const std::string& dir, std::vector* r) override { + if (filenames_for_mocked_attrs_.size() > 0) { + for (const auto& filename : filenames_for_mocked_attrs_) { + r->push_back({dir + filename, 10 /* size_bytes */}); + } + return Status::OK(); + } + return EnvWrapper::GetChildrenFileAttributes(dir, r); + } + void SetCreateDirIfMissingFailure(bool fail) { create_dir_if_missing_failure_ = fail; } @@ -255,6 +272,7 @@ class TestEnv : public EnvWrapper { port::Mutex mutex_; bool dummy_sequential_file_ = false; std::vector written_files_; + std::vector filenames_for_mocked_attrs_; uint64_t limit_written_files_ = 1000000; uint64_t limit_delete_files_ = 1000000; @@ -780,6 +798,7 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { dummy_db_->live_files_ = { "/00010.sst", "/00011.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); std::vector should_have_written = { "/shared/00010.sst.tmp", "/shared/00011.sst.tmp", @@ -796,6 +815,7 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { dummy_db_->live_files_ = { "/00010.sst", "/00015.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); // should not open 00010.sst - it's already there should_have_written = { @@ -846,6 +866,7 @@ TEST_F(BackupableDBTest, DifferentEnvs) { dummy_db_->live_files_ = { "/00010.sst", "/00011.sst", "/CURRENT", "/MANIFEST-01" }; dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_backup_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); CloseDBAndBackupEngine(); @@ -857,6 +878,7 @@ TEST_F(BackupableDBTest, DifferentEnvs) { CloseDBAndBackupEngine(); DestroyDB(dbname_, Options()); + test_backup_env_->SetFilenamesForMockedAttrs({}); AssertBackupConsistency(0, 0, 100, 500); }