Add APIs PauseBackgroundWork() and ContinueBackgroundWork()

Summary:
To support a new MongoDB capability, we need to make sure that we don't do any IO for a short period of time. For background, see:
* https://jira.mongodb.org/browse/SERVER-20704
* https://jira.mongodb.org/browse/SERVER-18899

To implement that, I add a new API calls PauseBackgroundWork() and ContinueBackgroundWork() which reuse the capability we already have in place for RefitLevel() function.

Test Plan: Added a new test in db_test. Made sure that test fails when PauseBackgroundWork() is commented out.

Reviewers: IslamAbdelRahman, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D47901
main
Igor Canadi 9 years ago
parent a39897369a
commit 115427ef63
  1. 75
      db/db_impl.cc
  2. 7
      db/db_impl.h
  3. 35
      db/db_test.cc
  4. 7
      include/rocksdb/db.h
  5. 7
      include/rocksdb/utilities/stackable_db.h

@ -260,7 +260,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
wal_manager_(db_options_, env_options_), wal_manager_(db_options_, env_options_),
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
event_logger_(db_options_.info_log.get()), event_logger_(db_options_.info_log.get()),
bg_work_gate_closed_(false), bg_work_paused_(0),
refitting_level_(false), refitting_level_(false),
opened_successfully_(false) { opened_successfully_(false) {
env_->GetAbsolutePath(dbname, &db_absolute_path_); env_->GetAbsolutePath(dbname, &db_absolute_path_);
@ -1548,8 +1548,14 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
} }
if (options.change_level) { if (options.change_level) {
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[RefitLevel] waiting for background threads to stop");
s = PauseBackgroundWork();
if (s.ok()) {
s = ReFitLevel(cfd, final_output_level, options.target_level); s = ReFitLevel(cfd, final_output_level, options.target_level);
} }
ContinueBackgroundWork();
}
LogFlush(db_options_.info_log); LogFlush(db_options_.info_log);
{ {
@ -1747,6 +1753,25 @@ Status DBImpl::CompactFilesImpl(
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
Status DBImpl::PauseBackgroundWork() {
InstrumentedMutexLock guard_lock(&mutex_);
bg_work_paused_++;
while (bg_compaction_scheduled_ > 0 || bg_flush_scheduled_ > 0) {
bg_cv_.Wait();
}
return Status::OK();
}
Status DBImpl::ContinueBackgroundWork() {
InstrumentedMutexLock guard_lock(&mutex_);
assert(bg_work_paused_ > 0);
bg_work_paused_--;
if (bg_work_paused_ == 0) {
MaybeScheduleFlushOrCompaction();
}
return Status::OK();
}
void DBImpl::NotifyOnCompactionCompleted( void DBImpl::NotifyOnCompactionCompleted(
ColumnFamilyData* cfd, Compaction *c, const Status &st, ColumnFamilyData* cfd, Compaction *c, const Status &st,
const CompactionJobStats& compaction_job_stats, const CompactionJobStats& compaction_job_stats,
@ -1857,14 +1882,18 @@ int DBImpl::FindMinimumEmptyLevelFitting(ColumnFamilyData* cfd,
return minimum_level; return minimum_level;
} }
// REQUIREMENT: block all background work by calling PauseBackgroundWork()
// before calling this function
Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
assert(level < cfd->NumberLevels()); assert(level < cfd->NumberLevels());
if (target_level >= cfd->NumberLevels()) { if (target_level >= cfd->NumberLevels()) {
return Status::InvalidArgument("Target level exceeds number of levels"); return Status::InvalidArgument("Target level exceeds number of levels");
} }
SuperVersion* superversion_to_free = nullptr; std::unique_ptr<SuperVersion> superversion_to_free;
SuperVersion* new_superversion = new SuperVersion(); std::unique_ptr<SuperVersion> new_superversion(new SuperVersion());
Status status;
InstrumentedMutexLock guard_lock(&mutex_); InstrumentedMutexLock guard_lock(&mutex_);
@ -1872,40 +1901,26 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
if (refitting_level_) { if (refitting_level_) {
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[ReFitLevel] another thread is refitting"); "[ReFitLevel] another thread is refitting");
delete new_superversion;
return Status::NotSupported("another thread is refitting"); return Status::NotSupported("another thread is refitting");
} }
refitting_level_ = true; refitting_level_ = true;
// wait for all background threads to stop const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
bg_work_gate_closed_ = true;
while (bg_compaction_scheduled_ > 0 || bg_flush_scheduled_) {
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[RefitLevel] waiting for background threads to stop: %d %d",
bg_compaction_scheduled_, bg_flush_scheduled_);
bg_cv_.Wait();
}
const MutableCFOptions mutable_cf_options =
*cfd->GetLatestMutableCFOptions();
// move to a smaller level // move to a smaller level
int to_level = target_level; int to_level = target_level;
if (target_level < 0) { if (target_level < 0) {
to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level); to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level);
} }
Status status;
auto* vstorage = cfd->current()->storage_info(); auto* vstorage = cfd->current()->storage_info();
if (to_level > level) { if (to_level > level) {
if (level == 0) { if (level == 0) {
delete new_superversion;
return Status::NotSupported( return Status::NotSupported(
"Cannot change from level 0 to other levels."); "Cannot change from level 0 to other levels.");
} }
// Check levels are empty for a trivial move // Check levels are empty for a trivial move
for (int l = level + 1; l <= to_level; l++) { for (int l = level + 1; l <= to_level; l++) {
if (vstorage->NumLevelFiles(l) > 0) { if (vstorage->NumLevelFiles(l) > 0) {
delete new_superversion;
return Status::NotSupported( return Status::NotSupported(
"Levels between source and target are not empty for a move."); "Levels between source and target are not empty for a move.");
} }
@ -1913,8 +1928,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
} }
if (to_level != level) { if (to_level != level) {
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log, Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"[%s] Before refitting:\n%s", "[%s] Before refitting:\n%s", cfd->GetName().c_str(),
cfd->GetName().c_str(), cfd->current()->DebugString().data()); cfd->current()->DebugString().data());
VersionEdit edit; VersionEdit edit;
edit.SetColumnFamily(cfd->GetID()); edit.SetColumnFamily(cfd->GetID());
@ -1926,14 +1941,13 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
f->marked_for_compaction); f->marked_for_compaction);
} }
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log, Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"[%s] Apply version edit:\n%s", "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
cfd->GetName().c_str(), edit.DebugString().data()); edit.DebugString().data());
status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_, status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_,
directories_.GetDbDir()); directories_.GetDbDir());
superversion_to_free = InstallSuperVersionAndScheduleWork( superversion_to_free.reset(InstallSuperVersionAndScheduleWork(
cfd, new_superversion, mutable_cf_options); cfd, new_superversion.release(), mutable_cf_options));
new_superversion = nullptr;
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log, Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"[%s] LogAndApply: %s\n", cfd->GetName().c_str(), "[%s] LogAndApply: %s\n", cfd->GetName().c_str(),
@ -1941,16 +1955,13 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
if (status.ok()) { if (status.ok()) {
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log, Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"[%s] After refitting:\n%s", "[%s] After refitting:\n%s", cfd->GetName().c_str(),
cfd->GetName().c_str(), cfd->current()->DebugString().data()); cfd->current()->DebugString().data());
} }
} }
refitting_level_ = false; refitting_level_ = false;
bg_work_gate_closed_ = false;
delete superversion_to_free;
delete new_superversion;
return status; return status;
} }
@ -2203,8 +2214,8 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
// Compaction may introduce data race to DB open // Compaction may introduce data race to DB open
return; return;
} }
if (bg_work_gate_closed_) { if (bg_work_paused_ > 0) {
// gate closed for background work // we paused the background work
return; return;
} else if (shutting_down_.load(std::memory_order_acquire)) { } else if (shutting_down_.load(std::memory_order_acquire)) {
// DB is being deleted; no more background compactions // DB is being deleted; no more background compactions

@ -139,6 +139,9 @@ class DBImpl : public DB {
const int output_level, const int output_level,
const int output_path_id = -1) override; const int output_path_id = -1) override;
virtual Status PauseBackgroundWork() override;
virtual Status ContinueBackgroundWork() override;
using DB::SetOptions; using DB::SetOptions;
Status SetOptions( Status SetOptions(
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
@ -746,8 +749,8 @@ class DBImpl : public DB {
// Unified interface for logging events // Unified interface for logging events
EventLogger event_logger_; EventLogger event_logger_;
// A value of true temporarily disables scheduling of background work // A value of >0 temporarily disables scheduling of background work
bool bg_work_gate_closed_; int bg_work_paused_;
// Guard against multiple concurrent refitting // Guard against multiple concurrent refitting
bool refitting_level_; bool refitting_level_;

@ -5664,6 +5664,14 @@ class ModelDB: public DB {
return Status::NotSupported("Not supported operation."); return Status::NotSupported("Not supported operation.");
} }
Status PauseBackgroundWork() override {
return Status::NotSupported("Not supported operation.");
}
Status ContinueBackgroundWork() override {
return Status::NotSupported("Not supported operation.");
}
using DB::NumberLevels; using DB::NumberLevels;
virtual int NumberLevels(ColumnFamilyHandle* column_family) override { virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
return 1; return 1;
@ -9634,6 +9642,33 @@ TEST_F(DBTest, AddExternalSstFileMultiThreaded) {
INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam, INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
::testing::Values(1, 4)); ::testing::Values(1, 4));
TEST_F(DBTest, PauseBackgroundWorkTest) {
Options options;
options.write_buffer_size = 100000; // Small write buffer
options = CurrentOptions(options);
Reopen(options);
std::vector<std::thread> threads;
std::atomic<bool> done;
db_->PauseBackgroundWork();
threads.emplace_back([&]() {
Random rnd(301);
for (int i = 0; i < 10000; ++i) {
Put(RandomString(&rnd, 10), RandomString(&rnd, 10));
}
done.store(true);
});
env_->SleepForMicroseconds(200000);
// make sure the thread is not done
ASSERT_EQ(false, done.load());
db_->ContinueBackgroundWork();
for (auto& t : threads) {
t.join();
}
// now it's done
ASSERT_EQ(true, done.load());
}
} // namespace rocksdb } // namespace rocksdb
#endif #endif

@ -524,6 +524,13 @@ class DB {
return CompactFiles(compact_options, DefaultColumnFamily(), return CompactFiles(compact_options, DefaultColumnFamily(),
input_file_names, output_level, output_path_id); input_file_names, output_level, output_path_id);
} }
// This function will wait until all currently running background processes
// finish. After it returns, no background process will be run until
// UnblockBackgroundWork is called
virtual Status PauseBackgroundWork() = 0;
virtual Status ContinueBackgroundWork() = 0;
// Number of levels used for this DB. // Number of levels used for this DB.
virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0; virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); } virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }

@ -169,6 +169,13 @@ class StackableDB : public DB {
output_level, output_path_id); output_level, output_path_id);
} }
virtual Status PauseBackgroundWork() override {
return db_->PauseBackgroundWork();
}
virtual Status ContinueBackgroundWork() override {
return db_->ContinueBackgroundWork();
}
using DB::NumberLevels; using DB::NumberLevels;
virtual int NumberLevels(ColumnFamilyHandle* column_family) override { virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
return db_->NumberLevels(column_family); return db_->NumberLevels(column_family);

Loading…
Cancel
Save