From d52f334cbd4bceee23f13470f137220c8a75686b Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 16 Mar 2017 18:10:37 -0700 Subject: [PATCH] Break stalls when no bg work is happening Summary: Current stall will keep sleeping even if there is no Flush/Compactions to wait for, I changed the logic to break the stall if we are not flushing or compacting db_bench command used ``` # fillrandom # memtable size = 10MB # value size = 1 MB # num = 1000 # use /dev/shm ./db_bench --benchmarks="fillrandom,stats" --value_size=1048576 --write_buffer_size=10485760 --num=1000 --delayed_write_rate=XXXXX --db="/dev/shm/new_stall" | grep "Cumulative stall" ``` ``` Current results # delayed_write_rate = 1000 Kb/sec Cumulative stall: 00:00:9.031 H:M:S # delayed_write_rate = 200 Kb/sec Cumulative stall: 00:00:22.314 H:M:S # delayed_write_rate = 100 Kb/sec Cumulative stall: 00:00:42.784 H:M:S # delayed_write_rate = 50 Kb/sec Cumulative stall: 00:01:23.785 H:M:S # delayed_write_rate = 25 Kb/sec Cumulative stall: 00:02:45.702 H:M:S ``` ``` New results # delayed_write_rate = 1000 Kb/sec Cumulative stall: 00:00:9.017 H:M:S # delayed_write_rate = 200 Kb/sec Cumulative stall: 00 Closes https://github.com/facebook/rocksdb/pull/1884 Differential Revision: D4585439 Pulled By: IslamAbdelRahman fbshipit-source-id: aed2198 --- db/db_impl.cc | 23 ++++++++++++++++++----- db/write_controller.cc | 4 ++-- db/write_controller.h | 5 +++-- util/stop_watch.h | 2 ++ 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 138cf024b..852e16deb 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -5134,17 +5134,30 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, bool delayed = false; { StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed); - auto delay = write_controller_.GetDelay(env_, num_bytes); + uint64_t delay = write_controller_.GetDelay(env_, num_bytes); if (delay > 0) { if (write_options.no_slowdown) { return Status::Incomplete(); } - mutex_.Unlock(); - delayed = true; TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep"); - // hopefully we don't have to sleep more than 2 billion microseconds - env_->SleepForMicroseconds(static_cast(delay)); + + mutex_.Unlock(); + // We will delay the write until we have slept for delay ms or + // we don't need a delay anymore + const uint64_t kDelayInterval = 1000; + uint64_t stall_end = sw.start_time() + delay; + while (write_controller_.NeedsDelay()) { + if (env_->NowMicros() >= stall_end) { + // We already delayed this write `delay` microseconds + break; + } + + delayed = true; + // Sleep for 0.001 seconds + env_->SleepForMicroseconds(kDelayInterval); + } mutex_.Lock(); + } while (bg_error_.ok() && write_controller_.IsStopped()) { diff --git a/db/write_controller.cc b/db/write_controller.cc index d6c379fd6..2b5d8bf8c 100644 --- a/db/write_controller.cc +++ b/db/write_controller.cc @@ -44,7 +44,7 @@ uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) { if (total_stopped_ > 0) { return 0; } - if (total_delayed_ == 0) { + if (total_delayed_.load() == 0) { return 0; } @@ -115,7 +115,7 @@ StopWriteToken::~StopWriteToken() { DelayWriteToken::~DelayWriteToken() { controller_->total_delayed_--; - assert(controller_->total_delayed_ >= 0); + assert(controller_->total_delayed_.load() >= 0); } CompactionPressureToken::~CompactionPressureToken() { diff --git a/db/write_controller.h b/db/write_controller.h index b84092ca6..b3f6ba8c7 100644 --- a/db/write_controller.h +++ b/db/write_controller.h @@ -7,6 +7,7 @@ #include +#include #include namespace rocksdb { @@ -45,7 +46,7 @@ class WriteController { // these three metods are querying the state of the WriteController bool IsStopped() const; - bool NeedsDelay() const { return total_delayed_ > 0; } + bool NeedsDelay() const { return total_delayed_.load() > 0; } bool NeedSpeedupCompaction() const { return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0; } @@ -87,7 +88,7 @@ class WriteController { friend class CompactionPressureToken; int total_stopped_; - int total_delayed_; + std::atomic total_delayed_; int total_compaction_pressure_; uint64_t bytes_left_; uint64_t last_refill_time_; diff --git a/util/stop_watch.h b/util/stop_watch.h index bf2f6ebd0..de1772809 100644 --- a/util/stop_watch.h +++ b/util/stop_watch.h @@ -40,6 +40,8 @@ class StopWatch { } } + uint64_t start_time() const { return start_time_; } + private: Env* const env_; Statistics* statistics_;