From 56e77f09676d2e169e817090b889fc6418d34f37 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 18 Nov 2015 18:10:20 -0800 Subject: [PATCH] Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit Summary: Deprecate options.soft_rate_limit, which is hard to tune, with options.soft_pending_compaction_bytes_limit, which would trigger the slowdown if estimated pending compaction bytes exceeds the threshold. The hope is to make it more striaght-forward to tune. Test Plan: Modify DBTest.SoftLimit to cover options.soft_pending_compaction_bytes_limit instead; run all unit tests. Reviewers: IslamAbdelRahman, yhchiang, rven, kradhakrishnan, igor, anthony Reviewed By: anthony Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D51117 --- HISTORY.md | 6 +- db/column_family.cc | 19 ++--- db/db_bench.cc | 5 +- db/db_test.cc | 156 ++++++++++++++++++------------------- db/internal_stats.cc | 60 ++++++-------- db/internal_stats.h | 21 +---- db/version_set.cc | 11 --- db/version_set.h | 8 -- include/rocksdb/options.h | 9 ++- util/mutable_cf_options.cc | 4 +- util/mutable_cf_options.h | 7 +- util/options.cc | 7 +- util/options_helper.cc | 6 +- util/options_helper.h | 3 + util/testutil.h | 2 +- 15 files changed, 149 insertions(+), 175 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 58475daa2..c66fec379 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,10 @@ # Rocksdb Change Log +## Unreleased +### Public API Changes +* Change names in CompactionPri and add a new one. +* Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. + ## 4.3.0 (12/8/2015) ### New Features * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. @@ -9,7 +14,6 @@ ### Public API Changes * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. -* Change names in CompactionPri and add a new one. ## 4.2.0 (11/9/2015) ### New Features diff --git a/db/column_family.cc b/db/column_family.cc index 60baac775..a6fae98f3 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -437,8 +437,6 @@ void ColumnFamilyData::RecalculateWriteStallConditions( const MutableCFOptions& mutable_cf_options) { if (current_ != nullptr) { auto* vstorage = current_->storage_info(); - const double score = vstorage->max_compaction_score(); - const int max_level = vstorage->max_compaction_score_level(); auto write_controller = column_family_set_->write_controller_; if (imm()->NumNotFlushed() >= mutable_cf_options.max_write_buffer_number) { @@ -467,8 +465,8 @@ void ColumnFamilyData::RecalculateWriteStallConditions( internal_stats_->AddCFStats( InternalStats::HARD_PENDING_COMPACTION_BYTES_LIMIT, 1); Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, - "[%s] Stopping writes because estimated pending compaction " - "bytes exceed %" PRIu64, + "[%s] Stopping writes because of estimated pending compaction " + "bytes %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes()); } else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 && vstorage->l0_delay_trigger_count() >= @@ -482,13 +480,16 @@ void ColumnFamilyData::RecalculateWriteStallConditions( Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, "[%s] Stalling writes because we have %d level-0 files", name_.c_str(), vstorage->l0_delay_trigger_count()); - } else if (mutable_cf_options.soft_rate_limit > 0.0 && - score > mutable_cf_options.soft_rate_limit) { + } else if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0 && + vstorage->estimated_compaction_needed_bytes() >= + mutable_cf_options.soft_pending_compaction_bytes_limit) { write_controller_token_ = write_controller->GetDelayToken(); - internal_stats_->RecordLevelNSlowdown(max_level, true); + internal_stats_->AddCFStats( + InternalStats::SOFT_PENDING_COMPACTION_BYTES_LIMIT, 1); Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, - "[%s] Stalling writes because we hit soft limit on level %d", - name_.c_str(), max_level); + "[%s] Stalling writes because of estimated pending compaction " + "bytes %" PRIu64, + name_.c_str(), vstorage->estimated_compaction_needed_bytes()); } else { write_controller_token_.reset(); } diff --git a/db/db_bench.cc b/db/db_bench.cc index de0bf9271..cb3d13bef 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -632,10 +632,13 @@ static bool ValidateRateLimit(const char* flagname, double value) { } return true; } -DEFINE_double(soft_rate_limit, 0.0, ""); +DEFINE_double(soft_rate_limit, 0.0, "DEPRECATED"); DEFINE_double(hard_rate_limit, 0.0, "DEPRECATED"); +DEFINE_uint64(soft_pending_compaction_bytes_limit, 64ull * 1024 * 1024 * 1024, + "Slowdown writes if pending compaction bytes exceed this number"); + DEFINE_uint64(hard_pending_compaction_bytes_limit, 128ull * 1024 * 1024 * 1024, "Stop writes if pending compaction bytes exceed this number"); diff --git a/db/db_test.cc b/db/db_test.cc index f1a6313f3..7bcd3a1fb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -7470,7 +7470,7 @@ TEST_F(DBTest, DynamicLevelCompressionPerLevel2) { options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; - options.soft_rate_limit = 1.1; + options.soft_pending_compaction_bytes_limit = 1024 * 1024; // Use file size to distinguish levels // L1: 10, L2: 20, L3 40, L4 80 @@ -7586,7 +7586,7 @@ TEST_F(DBTest, DynamicCompactionOptions) { options.env = env_; options.create_if_missing = true; options.compression = kNoCompression; - options.soft_rate_limit = 1.1; + options.soft_pending_compaction_bytes_limit = 1024 * 1024; options.write_buffer_size = k64KB; options.arena_block_size = 4 * k4KB; options.max_write_buffer_number = 2; @@ -8491,7 +8491,7 @@ TEST_F(DBTest, TablePropertiesNeedCompactTest) { options.target_file_size_base = 2048; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; - options.soft_rate_limit = 1.1; + options.soft_pending_compaction_bytes_limit = 1024 * 1024; options.num_levels = 8; std::shared_ptr collector_factory = @@ -8995,118 +8995,116 @@ TEST_F(DBTest, SoftLimit) { options = CurrentOptions(options); options.write_buffer_size = 100000; // Small write buffer options.max_write_buffer_number = 256; - options.level0_file_num_compaction_trigger = 3; + options.level0_file_num_compaction_trigger = 1; options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 999999; - options.delayed_write_rate = 200000; // About 200KB/s limited rate - options.soft_rate_limit = 1.1; + options.delayed_write_rate = 20000; // About 200KB/s limited rate + options.soft_pending_compaction_bytes_limit = 200000; options.target_file_size_base = 99999999; // All into one file options.max_bytes_for_level_base = 50000; + options.max_bytes_for_level_multiplier = 10; + options.max_background_compactions = 1; options.compression = kNoCompression; Reopen(options); Put(Key(0), ""); - // Only allow two compactions - port::Mutex mut; - port::CondVar cv(&mut); - std::atomic compaction_cnt(0); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void* arg) { - // Three flushes and the first compaction, - // three flushes and the second compaction go through. - MutexLock l(&mut); - while (compaction_cnt.load() >= 8) { - cv.Wait(); - } - compaction_cnt.fetch_add(1); - }); - - std::atomic sleep_count(0); - rocksdb::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Sleep", [&](void* arg) { sleep_count.fetch_add(1); }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + test::SleepingBackgroundTask sleeping_task_low; + // Block compactions + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + sleeping_task_low.WaitUntilSleeping(); + // Create 3 L0 files, making score of L0 to be 3. for (int i = 0; i < 3; i++) { Put(Key(i), std::string(5000, 'x')); Put(Key(100 - i), std::string(5000, 'x')); + // Flush the file. File size is around 30KB. Flush(); } - while (compaction_cnt.load() < 4 || NumTableFilesAtLevel(0) > 0) { - env_->SleepForMicroseconds(1000); - } + ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + sleeping_task_low.Reset(); + dbfull()->TEST_WaitForCompact(); + // Now there is one L1 file but doesn't trigger soft_rate_limit + // The L1 file size is around 30KB. ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_EQ(sleep_count.load(), 0); + ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); + + // Only allow one compactin going through. + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void* arg) { + // Schedule a sleeping task. + sleeping_task_low.Reset(); + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, + &sleeping_task_low, Env::Priority::LOW); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + sleeping_task_low.WaitUntilSleeping(); + // Create 3 L0 files, making score of L0 to be 3 for (int i = 0; i < 3; i++) { Put(Key(10 + i), std::string(5000, 'x')); Put(Key(90 - i), std::string(5000, 'x')); + // Flush the file. File size is around 30KB. Flush(); } - while (compaction_cnt.load() < 8 || NumTableFilesAtLevel(0) > 0) { - env_->SleepForMicroseconds(1000); - } + + // Wake up sleep task to enable compaction to run and waits + // for it to go to sleep state again to make sure one compaction + // goes through. + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilSleeping(); + + // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB + // Given level multiplier 10, estimated pending compaction is around 100KB + // doesn't trigger soft_pending_compaction_bytes_limit ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_EQ(sleep_count.load(), 0); + ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - // Slowdown is triggered now - for (int i = 0; i < 10; i++) { - Put(Key(i), std::string(100, 'x')); + // Create 3 L0 files, making score of L0 to be 3, higher than L0. + for (int i = 0; i < 3; i++) { + Put(Key(20 + i), std::string(5000, 'x')); + Put(Key(80 - i), std::string(5000, 'x')); + // Flush the file. File size is around 30KB. + Flush(); } - ASSERT_GT(sleep_count.load(), 0); + // Wake up sleep task to enable compaction to run and waits + // for it to go to sleep state again to make sure one compaction + // goes through. + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilSleeping(); - { - MutexLock l(&mut); - compaction_cnt.store(7); - cv.SignalAll(); - } - while (NumTableFilesAtLevel(1) > 0) { - env_->SleepForMicroseconds(1000); - } + // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB + // Given level multiplier 10, estimated pending compaction is around 400KB + // triggerring soft_pending_compaction_bytes_limit + ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - // Slowdown is not triggered any more. - sleep_count.store(0); - // Slowdown is not triggered now - for (int i = 0; i < 10; i++) { - Put(Key(i), std::string(100, 'x')); - } - ASSERT_EQ(sleep_count.load(), 0); + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilSleeping(); + + ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); // shrink level base so L2 will hit soft limit easier. ASSERT_OK(dbfull()->SetOptions({ {"max_bytes_for_level_base", "5000"}, })); - compaction_cnt.store(7); - Flush(); - - while (NumTableFilesAtLevel(0) == 0) { - env_->SleepForMicroseconds(1000); - } - - // Slowdown is triggered now - for (int i = 0; i < 10; i++) { - Put(Key(i), std::string(100, 'x')); - } - ASSERT_GT(sleep_count.load(), 0); - - { - MutexLock l(&mut); - compaction_cnt.store(7); - cv.SignalAll(); - } - while (NumTableFilesAtLevel(2) != 0) { - env_->SleepForMicroseconds(1000); - } + Put("", ""); + Flush(); + ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - // Slowdown is not triggered anymore - sleep_count.store(0); - for (int i = 0; i < 10; i++) { - Put(Key(i), std::string(100, 'x')); - } - ASSERT_EQ(sleep_count.load(), 0); + sleeping_task_low.WaitUntilSleeping(); rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); } #endif // ROCKSDB_LITE diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 6bf19701e..417c88ce3 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -35,8 +35,7 @@ void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) { "\n** Compaction Stats [%s] **\n" "Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) " "Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) " - "Comp(sec) Comp(cnt) Avg(sec) " - "Stall(cnt) KeyIn KeyDrop\n" + "Comp(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop\n" "--------------------------------------------------------------------" "-----------------------------------------------------------" "--------------------------------------\n", @@ -44,9 +43,9 @@ void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) { } void PrintLevelStats(char* buf, size_t len, const std::string& name, - int num_files, int being_compacted, double total_file_size, double score, - double w_amp, uint64_t stalls, - const InternalStats::CompactionStats& stats) { + int num_files, int being_compacted, double total_file_size, + double score, double w_amp, + const InternalStats::CompactionStats& stats) { uint64_t bytes_read = stats.bytes_read_non_output_levels + stats.bytes_read_output_level; int64_t bytes_new = @@ -70,10 +69,8 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name, "%9.0f " /* Comp(sec) */ "%9d " /* Comp(cnt) */ "%8.3f " /* Avg(sec) */ - "%10" PRIu64 - " " /* Stall(cnt) */ - "%7s " /* KeyIn */ - "%6s\n", /* KeyDrop */ + "%7s " /* KeyIn */ + "%6s\n", /* KeyDrop */ name.c_str(), num_files, being_compacted, total_file_size / kMB, score, bytes_read / kGB, stats.bytes_read_non_output_levels / kGB, @@ -82,7 +79,7 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name, bytes_read / kMB / elapsed, stats.bytes_written / kMB / elapsed, stats.micros / kMicrosInSec, stats.count, stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count, - stalls, num_input_records.c_str(), num_dropped_records.c_str()); + num_input_records.c_str(), num_dropped_records.c_str()); } } @@ -677,27 +674,13 @@ void InternalStats::DumpCFStats(std::string* value) { int total_files = 0; int total_files_being_compacted = 0; double total_file_size = 0; - uint64_t total_slowdown_count_soft = 0; - uint64_t total_slowdown_count_hard = 0; - uint64_t total_stall_count = 0; for (int level = 0; level < number_levels_; level++) { int files = vstorage->NumLevelFiles(level); total_files += files; total_files_being_compacted += files_being_compacted[level]; if (comp_stats_[level].micros > 0 || files > 0) { - uint64_t stalls = - level == 0 ? (cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL] + - cf_stats_count_[LEVEL0_NUM_FILES_TOTAL] + - cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT] + - cf_stats_count_[MEMTABLE_COMPACTION]) - : (stall_leveln_slowdown_count_soft_[level] + - stall_leveln_slowdown_count_hard_[level]); - stats_sum.Add(comp_stats_[level]); total_file_size += vstorage->NumLevelBytes(level); - total_stall_count += stalls; - total_slowdown_count_soft += stall_leveln_slowdown_count_soft_[level]; - total_slowdown_count_hard += stall_leveln_slowdown_count_hard_[level]; double w_amp = (comp_stats_[level].bytes_read_non_output_levels == 0) ? 0.0 : static_cast(comp_stats_[level].bytes_written) / @@ -705,17 +688,23 @@ void InternalStats::DumpCFStats(std::string* value) { PrintLevelStats(buf, sizeof(buf), "L" + ToString(level), files, files_being_compacted[level], vstorage->NumLevelBytes(level), compaction_score[level], - w_amp, stalls, comp_stats_[level]); + w_amp, comp_stats_[level]); value->append(buf); } } uint64_t curr_ingest = cf_stats_value_[BYTES_FLUSHED]; // Cumulative summary double w_amp = stats_sum.bytes_written / static_cast(curr_ingest + 1); + uint64_t total_stall_count = + cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL] + + cf_stats_count_[LEVEL0_NUM_FILES_TOTAL] + + cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT] + + cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT] + + cf_stats_count_[MEMTABLE_COMPACTION]; // Stats summary across levels PrintLevelStats(buf, sizeof(buf), "Sum", total_files, - total_files_being_compacted, total_file_size, 0, w_amp, - total_stall_count, stats_sum); + total_files_being_compacted, total_file_size, 0, w_amp, + stats_sum); value->append(buf); // Interval summary uint64_t interval_ingest = @@ -723,9 +712,7 @@ void InternalStats::DumpCFStats(std::string* value) { CompactionStats interval_stats(stats_sum); interval_stats.Subtract(cf_stats_snapshot_.comp_stats); w_amp = interval_stats.bytes_written / static_cast(interval_ingest); - PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, - w_amp, total_stall_count - cf_stats_snapshot_.stall_count, - interval_stats); + PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, w_amp, interval_stats); value->append(buf); snprintf(buf, sizeof(buf), @@ -742,19 +729,20 @@ void InternalStats::DumpCFStats(std::string* value) { "%" PRIu64 " level0_numfiles_with_compaction, " "%" PRIu64 - " pending_compaction_bytes, " + " stop for pending_compaction_bytes, " "%" PRIu64 - " memtable_compaction, " + " slowdown for pending_compaction_bytes, " "%" PRIu64 - " leveln_slowdown_soft, " - "%" PRIu64 " leveln_slowdown_hard\n", + " memtable_compaction, " + "interval %" PRIu64 " total count\n", cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL], cf_stats_count_[LEVEL0_SLOWDOWN_WITH_COMPACTION], cf_stats_count_[LEVEL0_NUM_FILES_TOTAL], cf_stats_count_[LEVEL0_NUM_FILES_WITH_COMPACTION], cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT], - cf_stats_count_[MEMTABLE_COMPACTION], total_slowdown_count_soft, - total_slowdown_count_hard); + cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT], + cf_stats_count_[MEMTABLE_COMPACTION], + total_stall_count - cf_stats_snapshot_.stall_count); value->append(buf); cf_stats_snapshot_.ingest_bytes = curr_ingest; diff --git a/db/internal_stats.h b/db/internal_stats.h index 0e3413bc7..b22c79951 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -88,6 +88,7 @@ class InternalStats { MEMTABLE_COMPACTION, LEVEL0_NUM_FILES_TOTAL, LEVEL0_NUM_FILES_WITH_COMPACTION, + SOFT_PENDING_COMPACTION_BYTES_LIMIT, HARD_PENDING_COMPACTION_BYTES_LIMIT, WRITE_STALLS_ENUM_MAX, BYTES_FLUSHED, @@ -111,8 +112,6 @@ class InternalStats { cf_stats_value_(INTERNAL_CF_STATS_ENUM_MAX), cf_stats_count_(INTERNAL_CF_STATS_ENUM_MAX), comp_stats_(num_levels), - stall_leveln_slowdown_count_hard_(num_levels), - stall_leveln_slowdown_count_soft_(num_levels), file_read_latency_(num_levels), bg_error_count_(0), number_levels_(num_levels), @@ -126,10 +125,6 @@ class InternalStats { cf_stats_value_[i] = 0; cf_stats_count_[i] = 0; } - for (int i = 0; i < num_levels; ++i) { - stall_leveln_slowdown_count_hard_[i] = 0; - stall_leveln_slowdown_count_soft_[i] = 0; - } } // Per level compaction stats. comp_stats_[level] stores the stats for @@ -237,14 +232,6 @@ class InternalStats { comp_stats_[level].bytes_moved += amount; } - void RecordLevelNSlowdown(int level, bool soft) { - if (soft) { - ++stall_leveln_slowdown_count_soft_[level]; - } else { - ++stall_leveln_slowdown_count_hard_[level]; - } - } - void AddCFStats(InternalCFStatsType type, uint64_t value) { cf_stats_value_[type] += value; ++cf_stats_count_[type]; @@ -282,9 +269,6 @@ class InternalStats { std::vector cf_stats_count_; // Per-ColumnFamily/level compaction stats std::vector comp_stats_; - // These count the number of microseconds for which MakeRoomForWrite stalls. - std::vector stall_leveln_slowdown_count_hard_; - std::vector stall_leveln_slowdown_count_soft_; std::vector file_read_latency_; // Used to compute per-interval statistics @@ -361,6 +345,7 @@ class InternalStats { MEMTABLE_COMPACTION, LEVEL0_NUM_FILES_TOTAL, LEVEL0_NUM_FILES_WITH_COMPACTION, + SOFT_PENDING_COMPACTION_BYTES_LIMIT, HARD_PENDING_COMPACTION_BYTES_LIMIT, WRITE_STALLS_ENUM_MAX, BYTES_FLUSHED, @@ -407,8 +392,6 @@ class InternalStats { void IncBytesMoved(int level, uint64_t amount) {} - void RecordLevelNSlowdown(int level, bool soft) {} - void AddCFStats(InternalCFStatsType type, uint64_t value) {} void AddDBStats(InternalDBStatsType type, uint64_t value) {} diff --git a/db/version_set.cc b/db/version_set.cc index a62eeb05a..a9422a84b 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1156,9 +1156,6 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded( void VersionStorageInfo::ComputeCompactionScore( const MutableCFOptions& mutable_cf_options, const CompactionOptionsFIFO& compaction_options_fifo) { - double max_score = 0; - int max_score_level = 0; - for (int level = 0; level <= MaxInputLevel(); level++) { double score; if (level == 0) { @@ -1209,19 +1206,11 @@ void VersionStorageInfo::ComputeCompactionScore( } score = static_cast(level_bytes_no_compacting) / MaxBytesForLevel(level); - if (max_score < score) { - max_score = score; - max_score_level = level; - } } compaction_level_[level] = level; compaction_score_[level] = score; } - // update the max compaction score in levels 1 to n-1 - max_compaction_score_ = max_score; - max_compaction_score_level_ = max_score_level; - // sort all the levels based on their score. Higher scores get listed // first. Use bubble sort because the number of entries are small. for (int i = 0; i < num_levels() - 2; i++) { diff --git a/db/version_set.h b/db/version_set.h index 8d3d2c9c8..b7482499f 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -146,12 +146,6 @@ class VersionStorageInfo { int MaxInputLevel() const; - // Returns the maxmimum compaction score for levels 1 to max - double max_compaction_score() const { return max_compaction_score_; } - - // See field declaration - int max_compaction_score_level() const { return max_compaction_score_level_; } - // Return level number that has idx'th highest score int CompactionScoreLevel(int idx) const { return compaction_level_[idx]; } @@ -387,8 +381,6 @@ class VersionStorageInfo { // These are used to pick the best compaction level std::vector compaction_score_; std::vector compaction_level_; - double max_compaction_score_ = 0.0; // max score in l1 to ln-1 - int max_compaction_score_level_ = 0; // level on which max score occurs int l0_delay_trigger_count_ = 0; // Count used to trigger slow down and stop // for number of L0 files. diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ab120b6ce..5e82b2523 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -505,6 +505,7 @@ struct ColumnFamilyOptions { // Dynamically changeable through SetOptions() API int max_grandparent_overlap_factor; + // DEPRECATED -- this options is no longer used // Puts are delayed to options.delayed_write_rate when any level has a // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0. // @@ -513,9 +514,15 @@ struct ColumnFamilyOptions { // Dynamically changeable through SetOptions() API double soft_rate_limit; - // DEPRECATED -- this options is no longer usde + // DEPRECATED -- this options is no longer used double hard_rate_limit; + // All writes will be slowed down to at least delayed_write_rate if estimated + // bytes needed to be compaction exceed this threshold. + // + // Default: 0 (disabled) + uint64_t soft_pending_compaction_bytes_limit; + // All writes are stopped if estimated bytes needed to be compaction exceed // this threshold. // diff --git a/util/mutable_cf_options.cc b/util/mutable_cf_options.cc index fafd15415..582c0eadf 100644 --- a/util/mutable_cf_options.cc +++ b/util/mutable_cf_options.cc @@ -82,8 +82,8 @@ void MutableCFOptions::Dump(Logger* log) const { filter_deletes); Log(log, " disable_auto_compactions: %d", disable_auto_compactions); - Log(log, " soft_rate_limit: %lf", - soft_rate_limit); + Log(log, " soft_pending_compaction_bytes_limit: %" PRIu64, + soft_pending_compaction_bytes_limit); Log(log, " hard_pending_compaction_bytes_limit: %" PRIu64, hard_pending_compaction_bytes_limit); Log(log, " level0_file_num_compaction_trigger: %d", diff --git a/util/mutable_cf_options.h b/util/mutable_cf_options.h index 9b1dc0c74..209aa3d51 100644 --- a/util/mutable_cf_options.h +++ b/util/mutable_cf_options.h @@ -24,7 +24,8 @@ struct MutableCFOptions { filter_deletes(options.filter_deletes), inplace_update_num_locks(options.inplace_update_num_locks), disable_auto_compactions(options.disable_auto_compactions), - soft_rate_limit(options.soft_rate_limit), + soft_pending_compaction_bytes_limit( + options.soft_pending_compaction_bytes_limit), hard_pending_compaction_bytes_limit( options.hard_pending_compaction_bytes_limit), level0_file_num_compaction_trigger( @@ -62,7 +63,7 @@ struct MutableCFOptions { filter_deletes(false), inplace_update_num_locks(0), disable_auto_compactions(false), - soft_rate_limit(0), + soft_pending_compaction_bytes_limit(0), hard_pending_compaction_bytes_limit(0), level0_file_num_compaction_trigger(0), level0_slowdown_writes_trigger(0), @@ -114,7 +115,7 @@ struct MutableCFOptions { // Compaction related options bool disable_auto_compactions; - double soft_rate_limit; + uint64_t soft_pending_compaction_bytes_limit; uint64_t hard_pending_compaction_bytes_limit; int level0_file_num_compaction_trigger; int level0_slowdown_writes_trigger; diff --git a/util/options.cc b/util/options.cc index d4e1a19c9..2230b774e 100644 --- a/util/options.cc +++ b/util/options.cc @@ -104,6 +104,7 @@ ColumnFamilyOptions::ColumnFamilyOptions() max_grandparent_overlap_factor(10), soft_rate_limit(0.0), hard_rate_limit(0.0), + soft_pending_compaction_bytes_limit(0), hard_pending_compaction_bytes_limit(0), rate_limit_delay_max_milliseconds(1000), arena_block_size(0), @@ -164,6 +165,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) source_compaction_factor(options.source_compaction_factor), max_grandparent_overlap_factor(options.max_grandparent_overlap_factor), soft_rate_limit(options.soft_rate_limit), + soft_pending_compaction_bytes_limit( + options.soft_pending_compaction_bytes_limit), hard_pending_compaction_bytes_limit( options.hard_pending_compaction_bytes_limit), rate_limit_delay_max_milliseconds( @@ -515,8 +518,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const { Header(log, " Options.arena_block_size: %" ROCKSDB_PRIszt, arena_block_size); - Header(log, " Options.soft_rate_limit: %.2f", - soft_rate_limit); + Header(log, " Options.soft_pending_compaction_bytes_limit: %" PRIu64, + soft_pending_compaction_bytes_limit); Header(log, " Options.hard_pending_compaction_bytes_limit: %" PRIu64, hard_pending_compaction_bytes_limit); Header(log, " Options.rate_limit_delay_max_milliseconds: %u", diff --git a/util/options_helper.cc b/util/options_helper.cc index 4d7bc347c..c397c2f56 100644 --- a/util/options_helper.cc +++ b/util/options_helper.cc @@ -539,7 +539,10 @@ bool ParseCompactionOptions(const std::string& name, const std::string& value, if (name == "disable_auto_compactions") { new_options->disable_auto_compactions = ParseBoolean(name, value); } else if (name == "soft_rate_limit") { - new_options->soft_rate_limit = ParseDouble(value); + // Deprecated options but still leave it here to avoid older options + // strings can be consumed. + } else if (name == "soft_pending_compaction_bytes_limit") { + new_options->soft_pending_compaction_bytes_limit = ParseUint64(value); } else if (name == "hard_pending_compaction_bytes_limit") { new_options->hard_pending_compaction_bytes_limit = ParseUint64(value); } else if (name == "hard_rate_limit") { @@ -1386,7 +1389,6 @@ ColumnFamilyOptions BuildColumnFamilyOptions( // Compaction related options cf_opts.disable_auto_compactions = mutable_cf_options.disable_auto_compactions; - cf_opts.soft_rate_limit = mutable_cf_options.soft_rate_limit; cf_opts.level0_file_num_compaction_trigger = mutable_cf_options.level0_file_num_compaction_trigger; cf_opts.level0_slowdown_writes_trigger = diff --git a/util/options_helper.h b/util/options_helper.h index 373ffc6be..84d547cfc 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -328,6 +328,9 @@ static std::unordered_map cf_options_type_info = { {"verify_checksums_in_compaction", {offsetof(struct ColumnFamilyOptions, verify_checksums_in_compaction), OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"soft_pending_compaction_bytes_limit", + {offsetof(struct ColumnFamilyOptions, soft_pending_compaction_bytes_limit), + OptionType::kUInt64T, OptionVerificationType::kNormal}}, {"hard_pending_compaction_bytes_limit", {offsetof(struct ColumnFamilyOptions, hard_pending_compaction_bytes_limit), OptionType::kUInt64T, OptionVerificationType::kNormal}}, diff --git a/util/testutil.h b/util/testutil.h index 641091c82..68e2f8c9e 100644 --- a/util/testutil.h +++ b/util/testutil.h @@ -316,7 +316,7 @@ class SleepingBackgroundTask { } void WaitUntilSleeping() { MutexLock l(&mutex_); - while (!sleeping_) { + while (!sleeping_ || !should_sleep_) { bg_cv_.Wait(); } }