From a3594867bacc7e78a7c35a7b32a13d933e8d238d Mon Sep 17 00:00:00 2001 From: Stanislau Hlebik Date: Thu, 26 Jun 2014 16:45:27 -0700 Subject: [PATCH] Cache some conditions for DBImpl::MakeRoomForWrite Summary: Task 4580155. Some conditions in DBImpl::MakeRoomForWrite can be cached in ColumnFamilyData, because theirs value can be changed only during compaction, adding new memtable and/or add recalculation of compaction score. These conditions are: cfd->imm()->size() == cfd->options()->max_write_buffer_number - 1 cfd->current()->NumLevelFiles(0) >= cfd->options()->level0_stop_writes_trigger cfd->options()->soft_rate_limit > 0.0 && (score = cfd->current()->MaxCompactionScore()) > cfd->options()->soft_rate_limit cfd->options()->hard_rate_limit > 1.0 && (score = cfd->current()->MaxCompactionScore()) > cfd->options()->hard_rate_limit P.S. As it's my first diff, Siying suggested to add everybody as a reviewers for this diff. Sorry, if I forgot someone or add someone by mistake. Test Plan: make all check Reviewers: haobo, xjin, dhruba, yhchiang, zagfox, ljin, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19311 --- db/column_family.cc | 38 +++++++++++++++++++++++++++++++++++++- db/column_family.h | 40 ++++++++++++++++++++++++++++++++++++++++ db/db_impl.cc | 19 ++++++------------- 3 files changed, 83 insertions(+), 14 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index c4132b367..2d7ac23ae 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -243,6 +243,8 @@ ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id, const ColumnFamilyOptions* cf_options = &options_; cf_options->Dump(options_.info_log.get()); } + + RecalculateWriteStallConditions(); } // DB mutex held @@ -295,6 +297,35 @@ ColumnFamilyData::~ColumnFamilyData() { } } +void ColumnFamilyData::RecalculateWriteStallConditions() { + need_wait_for_num_memtables_ = + (imm()->size() == options()->max_write_buffer_number - 1); + + if (current_ != nullptr) { + need_wait_for_num_level0_files_ = + (current_->NumLevelFiles(0) >= options()->level0_stop_writes_trigger); + } else { + need_wait_for_num_level0_files_ = false; + } + + RecalculateWriteStallRateLimitsConditions(); +} + +void ColumnFamilyData::RecalculateWriteStallRateLimitsConditions() { + if (current_ != nullptr) { + exceeds_hard_rate_limit_ = + (options()->hard_rate_limit > 1.0 && + current_->MaxCompactionScore() > options()->hard_rate_limit); + + exceeds_soft_rate_limit_ = + (options()->soft_rate_limit > 0.0 && + current_->MaxCompactionScore() > options()->soft_rate_limit); + } else { + exceeds_hard_rate_limit_ = false; + exceeds_soft_rate_limit_ = false; + } +} + const EnvOptions* ColumnFamilyData::soptions() const { return &(column_family_set_->storage_options_); } @@ -316,7 +347,9 @@ void ColumnFamilyData::CreateNewMemtable() { } Compaction* ColumnFamilyData::PickCompaction(LogBuffer* log_buffer) { - return compaction_picker_->PickCompaction(current_, log_buffer); + auto result = compaction_picker_->PickCompaction(current_, log_buffer); + RecalculateWriteStallRateLimitsConditions(); + return result; } Compaction* ColumnFamilyData::CompactRange(int input_level, int output_level, @@ -420,6 +453,9 @@ SuperVersion* ColumnFamilyData::InstallSuperVersion( if (column_family_set_->db_options_->allow_thread_local) { ResetThreadLocalSuperVersions(); } + + RecalculateWriteStallConditions(); + if (old_superversion != nullptr && old_superversion->Unref()) { old_superversion->Cleanup(); return old_superversion; // will let caller delete outside of mutex diff --git a/db/column_family.h b/db/column_family.h index 991bb0112..826fcc669 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -229,6 +229,22 @@ class ColumnFamilyData { return need_slowdown_for_num_level0_files_; } + bool NeedWaitForNumLevel0Files() const { + return need_wait_for_num_level0_files_; + } + + bool NeedWaitForNumMemtables() const { + return need_wait_for_num_memtables_; + } + + bool ExceedsSoftRateLimit() const { + return exceeds_soft_rate_limit_; + } + + bool ExceedsHardRateLimit() const { + return exceeds_hard_rate_limit_; + } + private: friend class ColumnFamilySet; ColumnFamilyData(const std::string& dbname, uint32_t id, @@ -238,6 +254,14 @@ class ColumnFamilyData { const EnvOptions& storage_options, ColumnFamilySet* column_family_set); + // Recalculate some small conditions, which are changed only during + // compaction, adding new memtable and/or + // recalculation of compaction score. These values are used in + // DBImpl::MakeRoomForWrite function to decide, if it need to make + // a write stall + void RecalculateWriteStallConditions(); + void RecalculateWriteStallRateLimitsConditions(); + uint32_t id_; const std::string name_; Version* dummy_versions_; // Head of circular doubly-linked list of versions. @@ -282,6 +306,22 @@ class ColumnFamilyData { // we have too many level 0 files bool need_slowdown_for_num_level0_files_; + // These 4 variables are updated only after compaction, + // adding new memtable, flushing memtables to files + // and/or add recalculation of compaction score. + // That's why theirs values are cached in ColumnFamilyData. + // Recalculation is made by RecalculateWriteStallConditions and + // RecalculateWriteStallRateLimitsConditions function. They are used + // in DBImpl::MakeRoomForWrite function to decide, if it need + // to sleep during write operation + bool need_wait_for_num_memtables_; + + bool need_wait_for_num_level0_files_; + + bool exceeds_hard_rate_limit_; + + bool exceeds_soft_rate_limit_; + // An object that keeps all the compaction stats // and picks the next compaction std::unique_ptr compaction_picker_; diff --git a/db/db_impl.cc b/db/db_impl.cc index 0fb8271bc..562f68917 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -4026,8 +4026,7 @@ Status DBImpl::MakeRoomForWrite( DelayLoggingAndReset(); } break; - } else if (cfd->imm()->size() == - cfd->options()->max_write_buffer_number - 1) { + } else if (cfd->NeedWaitForNumMemtables()) { // We have filled up the current memtable, but the previous // ones are still being flushed, so we wait. DelayLoggingAndReset(); @@ -4048,9 +4047,7 @@ Status DBImpl::MakeRoomForWrite( STALL_MEMTABLE_COMPACTION_MICROS, stall); cfd->internal_stats()->RecordWriteStall( InternalStats::MEMTABLE_COMPACTION, stall); - } else if (cfd->current()->NumLevelFiles(0) >= - cfd->options()->level0_stop_writes_trigger) { - // There are too many level-0 files. + } else if (cfd->NeedWaitForNumLevel0Files()) { DelayLoggingAndReset(); Log(options_.info_log, "[%s] wait for fewer level0 files...\n", cfd->GetName().c_str()); @@ -4064,12 +4061,10 @@ Status DBImpl::MakeRoomForWrite( RecordTick(options_.statistics.get(), STALL_L0_NUM_FILES_MICROS, stall); cfd->internal_stats()->RecordWriteStall(InternalStats::LEVEL0_NUM_FILES, stall); - } else if (allow_hard_rate_limit_delay && - cfd->options()->hard_rate_limit > 1.0 && - (score = cfd->current()->MaxCompactionScore()) > - cfd->options()->hard_rate_limit) { + } else if (allow_hard_rate_limit_delay && cfd->ExceedsHardRateLimit()) { // Delay a write when the compaction score for any level is too large. int max_level = cfd->current()->MaxCompactionScoreLevel(); + score = cfd->current()->MaxCompactionScore(); mutex_.Unlock(); uint64_t delayed; { @@ -4090,10 +4085,8 @@ Status DBImpl::MakeRoomForWrite( allow_hard_rate_limit_delay = false; } mutex_.Lock(); - } else if (allow_soft_rate_limit_delay && - cfd->options()->soft_rate_limit > 0.0 && - (score = cfd->current()->MaxCompactionScore()) > - cfd->options()->soft_rate_limit) { + } else if (allow_soft_rate_limit_delay && cfd->ExceedsSoftRateLimit()) { + score = cfd->current()->MaxCompactionScore(); // Delay a write when the compaction score for any level is too large. // TODO: add statistics mutex_.Unlock();