SetOptions Backup Race Condition (#4108)

Summary:
Prior to this PR, there was a race condition between `DBImpl::SetOptions` and `BackupEngine::CreateNewBackup`, as illustrated below.
```
Time                  thread 1                           thread 2
  |   CreateNewBackup -> GetLiveFiles
  |                                         SetOptions -> RenameTempFileToOptionsFile
  |                                         SetOptions -> RenameTempFileToOptionsFile
  |                                         SetOptions -> RenameTempFileToOptionsFile // unlink oldest OPTIONS file
  |   copy the oldest OPTIONS // IO error!
  V
```
Proposed fix is to check the value of `DBImpl::disable_obsolete_files_deletion_` before calling `DeleteObsoleteOptionsFiles`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4108

Differential Revision: D8796360

Pulled By: riversand963

fbshipit-source-id: 02045317f793ea4c7d4400a5bf333b8502fa3e82
main
Yanqin Jin 6 years ago committed by Facebook Github Bot
parent 440621aab8
commit 331cb63641
  1. 4
      db/db_impl.cc
  2. 35
      db/db_impl_files.cc
  3. 41
      db/obsolete_files_test.cc
  4. 27
      utilities/backupable/backupable_db_test.cc

@ -2735,7 +2735,9 @@ Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) {
// Retry if the file name happen to conflict with an existing one.
s = GetEnv()->RenameFile(file_name, options_file_name);
DeleteObsoleteOptionsFiles();
if (0 == disable_delete_obsolete_files_) {
DeleteObsoleteOptionsFiles();
}
return s;
#else
(void)file_name;

@ -350,9 +350,32 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
std::vector<std::string> old_info_log_files;
InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
dbname_);
// File numbers of most recent two OPTIONS file in candidate_files (found in
// previos FindObsoleteFiles(full_scan=true))
// At this point, there must not be any duplicate file numbers in
// candidate_files.
uint64_t optsfile_num1 = std::numeric_limits<uint64_t>::min();
uint64_t optsfile_num2 = std::numeric_limits<uint64_t>::min();
for (const auto& candidate_file : candidate_files) {
const std::string& fname = candidate_file.file_name;
uint64_t number;
FileType type;
if (!ParseFileName(fname, &number, info_log_prefix.prefix, &type) ||
type != kOptionsFile) {
continue;
}
if (number > optsfile_num1) {
optsfile_num2 = optsfile_num1;
optsfile_num1 = number;
} else if (number > optsfile_num2) {
optsfile_num2 = number;
}
}
std::unordered_set<uint64_t> files_to_del;
for (const auto& candidate_file : candidate_files) {
std::string to_delete = candidate_file.file_name;
const std::string& to_delete = candidate_file.file_name;
uint64_t number;
FileType type;
// Ignore file if we cannot recognize it.
@ -401,11 +424,19 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
old_info_log_files.push_back(to_delete);
}
break;
case kOptionsFile:
keep = (number >= optsfile_num2);
TEST_SYNC_POINT_CALLBACK(
"DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:1",
reinterpret_cast<void*>(&number));
TEST_SYNC_POINT_CALLBACK(
"DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:2",
reinterpret_cast<void*>(&keep));
break;
case kCurrentFile:
case kDBLockFile:
case kIdentityFile:
case kMetaDatabase:
case kOptionsFile:
case kBlobFile:
keep = true;
break;

@ -198,6 +198,47 @@ TEST_F(ObsoleteFilesTest, RaceForObsoleteFileDeletion) {
CloseDB();
}
TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) {
createLevel0Files(2, 50000);
CheckFileTypeCounts(options_.wal_dir, 1, 0, 0);
std::vector<uint64_t> optsfiles_nums;
std::vector<bool> optsfiles_keep;
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:1", [&](void* arg) {
optsfiles_nums.push_back(*reinterpret_cast<uint64_t*>(arg));
});
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:2", [&](void* arg) {
optsfiles_keep.push_back(*reinterpret_cast<bool*>(arg));
});
SyncPoint::GetInstance()->EnableProcessing();
DBImpl* dbi = static_cast<DBImpl*>(db_);
ASSERT_OK(dbi->DisableFileDeletions());
for (int i = 0; i != 4; ++i) {
if (i % 2) {
ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
{{"paranoid_file_checks", "false"}}));
} else {
ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
{{"paranoid_file_checks", "true"}}));
}
}
ASSERT_OK(dbi->EnableFileDeletions(true /* force */));
ASSERT_EQ(optsfiles_nums.size(), optsfiles_keep.size());
int size = static_cast<int>(optsfiles_nums.size());
int kept_opts_files_count = 0;
for (int i = 0; i != size; ++i) {
if (optsfiles_keep[i]) {
++kept_opts_files_count;
}
}
ASSERT_EQ(2, kept_opts_files_count);
CloseDB();
}
} //namespace rocksdb
int main(int argc, char** argv) {

@ -1019,6 +1019,33 @@ TEST_F(BackupableDBTest, BackupOptions) {
CloseDBAndBackupEngine();
}
TEST_F(BackupableDBTest, SetOptionsBackupRaceCondition) {
OpenDBAndBackupEngine(true);
SyncPoint::GetInstance()->LoadDependency(
{{"CheckpointImpl::CreateCheckpoint:SavedLiveFiles1",
"BackupableDBTest::SetOptionsBackupRaceCondition:BeforeSetOptions"},
{"BackupableDBTest::SetOptionsBackupRaceCondition:AfterSetOptions",
"CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"}});
SyncPoint::GetInstance()->EnableProcessing();
rocksdb::port::Thread setoptions_thread{[this]() {
TEST_SYNC_POINT(
"BackupableDBTest::SetOptionsBackupRaceCondition:BeforeSetOptions");
DBImpl* dbi = static_cast<DBImpl*>(db_.get());
// Change arbitrary option to trigger OPTIONS file deletion
ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
{{"paranoid_file_checks", "false"}}));
ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
{{"paranoid_file_checks", "true"}}));
ASSERT_OK(dbi->SetOptions(dbi->DefaultColumnFamily(),
{{"paranoid_file_checks", "false"}}));
TEST_SYNC_POINT(
"BackupableDBTest::SetOptionsBackupRaceCondition:AfterSetOptions");
}};
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
setoptions_thread.join();
CloseDBAndBackupEngine();
}
// This test verifies we don't delete the latest backup when read-only option is
// set
TEST_F(BackupableDBTest, NoDeleteWithReadOnly) {

Loading…
Cancel
Save