From 91ddd587cc658b2899a4fe2b2b734e71e41c29a6 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 2 Jun 2014 17:23:55 -0700 Subject: [PATCH] Only signal cond variable if need to Summary: At the end of BackgroundCallCompaction(), we call SignalAll(), even though we don't need to. If compaction hasn't done anything and there's another compaction running, there is no need to signal on the condition variable. Doing so creates a tight feedback loop which results in log files like: wait for memtable flush compaction nothing to do wait for memtable flush compaction nothing to do This change eliminates that Test Plan: make check Also: icanadi@dev1440 ~ $ grep "nothing to do" /fast-rocksdb-tmp/rocksdb_test/column_family_test/LOG | wc -l 7435 icanadi@dev1440 ~ $ grep "nothing to do" /fast-rocksdb-tmp/rocksdb_test/column_family_test/LOG | wc -l 372 First version is before the change, second version is after the change. Reviewers: dhruba, ljin, haobo, yhchiang, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D18855 --- db/db_impl.cc | 19 +++++++++++++++++-- db/db_impl.h | 11 ++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 536a52e9f..326abc8db 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2068,7 +2068,15 @@ void DBImpl::BackgroundCallCompaction() { if (madeProgress || bg_schedule_needed_) { MaybeScheduleFlushOrCompaction(); } - bg_cv_.SignalAll(); + if (madeProgress || bg_compaction_scheduled_ == 0 || bg_manual_only_ > 0) { + // signal if + // * madeProgress -- need to wakeup MakeRoomForWrite + // * bg_compaction_scheduled_ == 0 -- need to wakeup ~DBImpl + // * bg_manual_only_ > 0 -- need to wakeup RunManualCompaction + // If none of this is true, there is no need to signal since nobody is + // waiting for it + bg_cv_.SignalAll(); + } // IMPORTANT: there should be no code after calling SignalAll. This call may // signal the DB destructor that it's OK to proceed with destruction. In // that case, all DB variables will be dealloacated and referencing them @@ -3930,6 +3938,10 @@ Status DBImpl::MakeRoomForWrite( uint64_t rate_limit_delay_millis = 0; Status s; double score; + // Once we schedule background work, we shouldn't schedule it again, since it + // might generate a tight feedback loop, constantly scheduling more background + // work, even if additional background work is not needed + bool schedule_background_work = true; while (true) { if (!bg_error_.ok()) { @@ -3973,7 +3985,10 @@ Status DBImpl::MakeRoomForWrite( DelayLoggingAndReset(); Log(options_.info_log, "[%s] wait for memtable flush...\n", cfd->GetName().c_str()); - MaybeScheduleFlushOrCompaction(); + if (schedule_background_work) { + MaybeScheduleFlushOrCompaction(); + schedule_background_work = false; + } uint64_t stall; { StopWatch sw(env_, options_.statistics.get(), diff --git a/db/db_impl.h b/db/db_impl.h index faa5c4c63..3c1bc5fc5 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -450,7 +450,16 @@ class DBImpl : public DB { // State below is protected by mutex_ port::Mutex mutex_; port::AtomicPointer shutting_down_; - port::CondVar bg_cv_; // Signalled when background work finishes + // This condition variable is signaled on these conditions: + // * whenever bg_compaction_scheduled_ goes down to 0 + // * if bg_manual_only_ > 0, whenever a compaction finishes, even if it hasn't + // made any progress + // * whenever a compaction made any progress + // * whenever bg_flush_scheduled_ value decreases (i.e. whenever a flush is + // done, even if it didn't make any progress) + // * whenever there is an error in background flush or compaction + // * whenever bg_logstats_scheduled_ turns to false + port::CondVar bg_cv_; uint64_t logfile_number_; unique_ptr log_; bool log_empty_;