Avoid directory renames in BackupEngine

Summary:
We used to name private directories like "1.tmp" while BackupEngine populated them, and then rename without the ".tmp" suffix (i.e., rename "1.tmp" to "1") after all files were copied. On glusterfs, directory renames like this require operations across many hosts, and partial failures have caused operational problems.

Fortunately we don't need to rename private directories. We already have a meta-file that uses the tempfile-rename pattern to commit a backup atomically after all its files have been successfully copied. So we can copy private files directly to their final location, so now there's no directory rename.
Closes https://github.com/facebook/rocksdb/pull/3749

Differential Revision: D7705610

Pulled By: ajkr

fbshipit-source-id: fd724a28dd2bf993ce323a5f2cb7e7d6980cc346
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent 2e72a5899b
commit a8a28da215
  1. 76
      utilities/backupable/backupable_db.cc
  2. 15
      utilities/backupable/backupable_db_test.cc

@ -747,6 +747,19 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
BackupID new_backup_id = latest_backup_id_ + 1;
assert(backups_.find(new_backup_id) == backups_.end());
auto private_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id));
Status s = backup_env_->FileExists(private_dir);
if (s.ok()) {
// maybe last backup failed and left partial state behind, clean it up.
// need to do this before updating backups_ such that a private dir
// named after new_backup_id will be cleaned up
s = GarbageCollect();
} else if (s.IsNotFound()) {
// normal case, the new backup's private dir doesn't exist yet
s = Status::OK();
}
auto ret = backups_.insert(std::make_pair(
new_backup_id, unique_ptr<BackupMeta>(new BackupMeta(
GetBackupMetaFile(new_backup_id, false /* tmp */),
@ -762,18 +775,8 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
ROCKS_LOG_INFO(options_.info_log,
"Started the backup process -- creating backup %u",
new_backup_id);
auto private_tmp_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id, true));
Status s = backup_env_->FileExists(private_tmp_dir);
if (s.ok()) {
// maybe last backup failed and left partial state behind, clean it up
s = GarbageCollect();
} else if (s.IsNotFound()) {
// normal case, the new backup's private dir doesn't exist yet
s = Status::OK();
}
if (s.ok()) {
s = backup_env_->CreateDir(private_tmp_dir);
s = backup_env_->CreateDir(private_dir);
}
RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
@ -859,18 +862,6 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
// we copied all the files, enable file deletions
db->EnableFileDeletions(false);
if (s.ok()) {
// move tmp private backup to real backup folder
ROCKS_LOG_INFO(
options_.info_log,
"Moving tmp backup directory to the real one: %s -> %s\n",
GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)).c_str(),
GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)).c_str());
s = backup_env_->RenameFile(
GetAbsolutePath(GetPrivateFileRel(new_backup_id, true)), // tmp
GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)));
}
auto backup_time = backup_env_->NowMicros() - start_backup;
if (s.ok()) {
@ -1314,22 +1305,33 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
dst_relative_tmp = GetSharedFileRel(dst_relative, true);
dst_relative = GetSharedFileRel(dst_relative, false);
} else {
dst_relative_tmp = GetPrivateFileRel(backup_id, true, dst_relative);
dst_relative = GetPrivateFileRel(backup_id, false, dst_relative);
}
std::string dst_path = GetAbsolutePath(dst_relative);
std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp);
// We copy into `temp_dest_path` and, once finished, rename it to
// `final_dest_path`. This allows files to atomically appear at
// `final_dest_path`. We can copy directly to the final path when atomicity
// is unnecessary, like for files in private backup directories.
const std::string* copy_dest_path;
std::string temp_dest_path;
std::string final_dest_path = GetAbsolutePath(dst_relative);
if (!dst_relative_tmp.empty()) {
temp_dest_path = GetAbsolutePath(dst_relative_tmp);
copy_dest_path = &temp_dest_path;
} else {
copy_dest_path = &final_dest_path;
}
// if it's shared, we also need to check if it exists -- if it does, no need
// to copy it again.
bool need_to_copy = true;
// true if dst_path is the same path as another live file
// true if final_dest_path is the same path as another live file
const bool same_path =
live_dst_paths.find(dst_path) != live_dst_paths.end();
live_dst_paths.find(final_dest_path) != live_dst_paths.end();
bool file_exists = false;
if (shared && !same_path) {
Status exist = backup_env_->FileExists(dst_path);
Status exist = backup_env_->FileExists(final_dest_path);
if (exist.ok()) {
file_exists = true;
} else if (exist.IsNotFound()) {
@ -1358,7 +1360,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
"overwrite the file.",
fname.c_str());
need_to_copy = true;
backup_env_->DeleteFile(dst_path);
backup_env_->DeleteFile(final_dest_path);
} else {
// the file is present and referenced by a backup
ROCKS_LOG_INFO(options_.info_log,
@ -1367,25 +1369,25 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
&checksum_value);
}
}
live_dst_paths.insert(dst_path);
live_dst_paths.insert(final_dest_path);
if (!contents.empty() || need_to_copy) {
ROCKS_LOG_INFO(options_.info_log, "Copying %s to %s", fname.c_str(),
dst_path_tmp.c_str());
copy_dest_path->c_str());
CopyOrCreateWorkItem copy_or_create_work_item(
src_dir.empty() ? "" : src_dir + fname, dst_path_tmp, contents, db_env_,
backup_env_, options_.sync, rate_limiter, size_limit,
src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents,
db_env_, backup_env_, options_.sync, rate_limiter, size_limit,
progress_callback);
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), shared, need_to_copy,
backup_env_, dst_path_tmp, dst_path, dst_relative);
backup_env_, temp_dest_path, final_dest_path, dst_relative);
files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item));
} else {
std::promise<CopyOrCreateResult> promise_result;
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
promise_result.get_future(), shared, need_to_copy, backup_env_,
dst_path_tmp, dst_path, dst_relative);
temp_dest_path, final_dest_path, dst_relative);
backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item));
CopyOrCreateResult result;
result.status = s;
@ -1551,7 +1553,7 @@ Status BackupEngineImpl::GarbageCollect() {
}
// here we have to delete the dir and all its children
std::string full_private_path =
GetAbsolutePath(GetPrivateFileRel(backup_id, tmp_dir));
GetAbsolutePath(GetPrivateFileRel(backup_id));
std::vector<std::string> subchildren;
backup_env_->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {

@ -811,9 +811,8 @@ TEST_F(BackupableDBTest, NoDoubleCopy) {
test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
std::vector<std::string> should_have_written = {
"/shared/.00010.sst.tmp", "/shared/.00011.sst.tmp",
"/private/1.tmp/CURRENT", "/private/1.tmp/MANIFEST-01",
"/private/1.tmp/00011.log", "/meta/.1.tmp"};
"/shared/.00010.sst.tmp", "/shared/.00011.sst.tmp", "/private/1/CURRENT",
"/private/1/MANIFEST-01", "/private/1/00011.log", "/meta/.1.tmp"};
AppendPath(backupdir_, should_have_written);
test_backup_env_->AssertWrittenFiles(should_have_written);
@ -829,9 +828,9 @@ TEST_F(BackupableDBTest, NoDoubleCopy) {
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
// should not open 00010.sst - it's already there
should_have_written = {"/shared/.00015.sst.tmp", "/private/2.tmp/CURRENT",
"/private/2.tmp/MANIFEST-01",
"/private/2.tmp/00011.log", "/meta/.2.tmp"};
should_have_written = {"/shared/.00015.sst.tmp", "/private/2/CURRENT",
"/private/2/MANIFEST-01", "/private/2/00011.log",
"/meta/.2.tmp"};
AppendPath(backupdir_, should_have_written);
test_backup_env_->AssertWrittenFiles(should_have_written);
@ -976,7 +975,7 @@ TEST_F(BackupableDBTest, InterruptCreationTest) {
backup_engine_->CreateNewBackup(db_.get(), !!(rnd.Next() % 2)).ok());
CloseDBAndBackupEngine();
// should also fail cleanup so the tmp directory stays behind
ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1.tmp/"));
ASSERT_OK(backup_chroot_env_->FileExists(backupdir_ + "/private/1/"));
OpenDBAndBackupEngine(false /* destroy_old_data */);
test_backup_env_->SetLimitWrittenFiles(1000000);
@ -1171,7 +1170,7 @@ TEST_F(BackupableDBTest, DeleteTmpFiles) {
shared_tmp += "/shared";
}
shared_tmp += "/.00006.sst.tmp";
std::string private_tmp_dir = backupdir_ + "/private/10.tmp";
std::string private_tmp_dir = backupdir_ + "/private/10";
std::string private_tmp_file = private_tmp_dir + "/00003.sst";
file_manager_->WriteToFile(shared_tmp, "tmp");
file_manager_->CreateDir(private_tmp_dir);

Loading…
Cancel
Save