Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit

Summary: Deprecate options.soft_rate_limit, which is hard to tune, with options.soft_pending_compaction_bytes_limit, which would trigger the slowdown if estimated pending compaction bytes exceeds the threshold. The hope is to make it more striaght-forward to tune.

Test Plan: Modify DBTest.SoftLimit to cover options.soft_pending_compaction_bytes_limit instead; run all unit tests.

Reviewers: IslamAbdelRahman, yhchiang, rven, kradhakrishnan, igor, anthony

Reviewed By: anthony

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D51117
main
sdong 9 years ago
parent d6e1035a1f
commit 56e77f0967
  1. 6
      HISTORY.md
  2. 19
      db/column_family.cc
  3. 5
      db/db_bench.cc
  4. 156
      db/db_test.cc
  5. 52
      db/internal_stats.cc
  6. 21
      db/internal_stats.h
  7. 11
      db/version_set.cc
  8. 8
      db/version_set.h
  9. 9
      include/rocksdb/options.h
  10. 4
      util/mutable_cf_options.cc
  11. 7
      util/mutable_cf_options.h
  12. 7
      util/options.cc
  13. 6
      util/options_helper.cc
  14. 3
      util/options_helper.h
  15. 2
      util/testutil.h

@ -1,5 +1,10 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased
### Public API Changes
* Change names in CompactionPri and add a new one.
* Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit.
## 4.3.0 (12/8/2015) ## 4.3.0 (12/8/2015)
### New Features ### New Features
* CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key.
@ -9,7 +14,6 @@
### Public API Changes ### Public API Changes
* When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families.
* Change names in CompactionPri and add a new one.
## 4.2.0 (11/9/2015) ## 4.2.0 (11/9/2015)
### New Features ### New Features

@ -437,8 +437,6 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
const MutableCFOptions& mutable_cf_options) { const MutableCFOptions& mutable_cf_options) {
if (current_ != nullptr) { if (current_ != nullptr) {
auto* vstorage = current_->storage_info(); auto* vstorage = current_->storage_info();
const double score = vstorage->max_compaction_score();
const int max_level = vstorage->max_compaction_score_level();
auto write_controller = column_family_set_->write_controller_; auto write_controller = column_family_set_->write_controller_;
if (imm()->NumNotFlushed() >= mutable_cf_options.max_write_buffer_number) { if (imm()->NumNotFlushed() >= mutable_cf_options.max_write_buffer_number) {
@ -467,8 +465,8 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
internal_stats_->AddCFStats( internal_stats_->AddCFStats(
InternalStats::HARD_PENDING_COMPACTION_BYTES_LIMIT, 1); InternalStats::HARD_PENDING_COMPACTION_BYTES_LIMIT, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stopping writes because estimated pending compaction " "[%s] Stopping writes because of estimated pending compaction "
"bytes exceed %" PRIu64, "bytes %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes()); name_.c_str(), vstorage->estimated_compaction_needed_bytes());
} else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 && } else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
vstorage->l0_delay_trigger_count() >= vstorage->l0_delay_trigger_count() >=
@ -482,13 +480,16 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we have %d level-0 files", "[%s] Stalling writes because we have %d level-0 files",
name_.c_str(), vstorage->l0_delay_trigger_count()); name_.c_str(), vstorage->l0_delay_trigger_count());
} else if (mutable_cf_options.soft_rate_limit > 0.0 && } else if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0 &&
score > mutable_cf_options.soft_rate_limit) { vstorage->estimated_compaction_needed_bytes() >=
mutable_cf_options.soft_pending_compaction_bytes_limit) {
write_controller_token_ = write_controller->GetDelayToken(); write_controller_token_ = write_controller->GetDelayToken();
internal_stats_->RecordLevelNSlowdown(max_level, true); internal_stats_->AddCFStats(
InternalStats::SOFT_PENDING_COMPACTION_BYTES_LIMIT, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log, Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we hit soft limit on level %d", "[%s] Stalling writes because of estimated pending compaction "
name_.c_str(), max_level); "bytes %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes());
} else { } else {
write_controller_token_.reset(); write_controller_token_.reset();
} }

@ -632,10 +632,13 @@ static bool ValidateRateLimit(const char* flagname, double value) {
} }
return true; return true;
} }
DEFINE_double(soft_rate_limit, 0.0, ""); DEFINE_double(soft_rate_limit, 0.0, "DEPRECATED");
DEFINE_double(hard_rate_limit, 0.0, "DEPRECATED"); DEFINE_double(hard_rate_limit, 0.0, "DEPRECATED");
DEFINE_uint64(soft_pending_compaction_bytes_limit, 64ull * 1024 * 1024 * 1024,
"Slowdown writes if pending compaction bytes exceed this number");
DEFINE_uint64(hard_pending_compaction_bytes_limit, 128ull * 1024 * 1024 * 1024, DEFINE_uint64(hard_pending_compaction_bytes_limit, 128ull * 1024 * 1024 * 1024,
"Stop writes if pending compaction bytes exceed this number"); "Stop writes if pending compaction bytes exceed this number");

@ -7470,7 +7470,7 @@ TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
options.level0_file_num_compaction_trigger = 2; options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2; options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 2; options.level0_stop_writes_trigger = 2;
options.soft_rate_limit = 1.1; options.soft_pending_compaction_bytes_limit = 1024 * 1024;
// Use file size to distinguish levels // Use file size to distinguish levels
// L1: 10, L2: 20, L3 40, L4 80 // L1: 10, L2: 20, L3 40, L4 80
@ -7586,7 +7586,7 @@ TEST_F(DBTest, DynamicCompactionOptions) {
options.env = env_; options.env = env_;
options.create_if_missing = true; options.create_if_missing = true;
options.compression = kNoCompression; options.compression = kNoCompression;
options.soft_rate_limit = 1.1; options.soft_pending_compaction_bytes_limit = 1024 * 1024;
options.write_buffer_size = k64KB; options.write_buffer_size = k64KB;
options.arena_block_size = 4 * k4KB; options.arena_block_size = 4 * k4KB;
options.max_write_buffer_number = 2; options.max_write_buffer_number = 2;
@ -8491,7 +8491,7 @@ TEST_F(DBTest, TablePropertiesNeedCompactTest) {
options.target_file_size_base = 2048; options.target_file_size_base = 2048;
options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_base = 10240;
options.max_bytes_for_level_multiplier = 4; options.max_bytes_for_level_multiplier = 4;
options.soft_rate_limit = 1.1; options.soft_pending_compaction_bytes_limit = 1024 * 1024;
options.num_levels = 8; options.num_levels = 8;
std::shared_ptr<TablePropertiesCollectorFactory> collector_factory = std::shared_ptr<TablePropertiesCollectorFactory> collector_factory =
@ -8995,118 +8995,116 @@ TEST_F(DBTest, SoftLimit) {
options = CurrentOptions(options); options = CurrentOptions(options);
options.write_buffer_size = 100000; // Small write buffer options.write_buffer_size = 100000; // Small write buffer
options.max_write_buffer_number = 256; options.max_write_buffer_number = 256;
options.level0_file_num_compaction_trigger = 3; options.level0_file_num_compaction_trigger = 1;
options.level0_slowdown_writes_trigger = 3; options.level0_slowdown_writes_trigger = 3;
options.level0_stop_writes_trigger = 999999; options.level0_stop_writes_trigger = 999999;
options.delayed_write_rate = 200000; // About 200KB/s limited rate options.delayed_write_rate = 20000; // About 200KB/s limited rate
options.soft_rate_limit = 1.1; options.soft_pending_compaction_bytes_limit = 200000;
options.target_file_size_base = 99999999; // All into one file options.target_file_size_base = 99999999; // All into one file
options.max_bytes_for_level_base = 50000; options.max_bytes_for_level_base = 50000;
options.max_bytes_for_level_multiplier = 10;
options.max_background_compactions = 1;
options.compression = kNoCompression; options.compression = kNoCompression;
Reopen(options); Reopen(options);
Put(Key(0), ""); Put(Key(0), "");
// Only allow two compactions test::SleepingBackgroundTask sleeping_task_low;
port::Mutex mut; // Block compactions
port::CondVar cv(&mut); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
std::atomic<int> compaction_cnt(0); Env::Priority::LOW);
rocksdb::SyncPoint::GetInstance()->SetCallBack( sleeping_task_low.WaitUntilSleeping();
"VersionSet::LogAndApply:WriteManifest", [&](void* arg) {
// Three flushes and the first compaction,
// three flushes and the second compaction go through.
MutexLock l(&mut);
while (compaction_cnt.load() >= 8) {
cv.Wait();
}
compaction_cnt.fetch_add(1);
});
std::atomic<int> sleep_count(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::DelayWrite:Sleep", [&](void* arg) { sleep_count.fetch_add(1); });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
// Create 3 L0 files, making score of L0 to be 3.
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
Put(Key(i), std::string(5000, 'x')); Put(Key(i), std::string(5000, 'x'));
Put(Key(100 - i), std::string(5000, 'x')); Put(Key(100 - i), std::string(5000, 'x'));
// Flush the file. File size is around 30KB.
Flush(); Flush();
} }
while (compaction_cnt.load() < 4 || NumTableFilesAtLevel(0) > 0) { ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
env_->SleepForMicroseconds(1000);
} sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilDone();
sleeping_task_low.Reset();
dbfull()->TEST_WaitForCompact();
// Now there is one L1 file but doesn't trigger soft_rate_limit // Now there is one L1 file but doesn't trigger soft_rate_limit
// The L1 file size is around 30KB.
ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_EQ(NumTableFilesAtLevel(1), 1);
ASSERT_EQ(sleep_count.load(), 0); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
// Only allow one compactin going through.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BackgroundCallCompaction:0", [&](void* arg) {
// Schedule a sleeping task.
sleeping_task_low.Reset();
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_task_low, Env::Priority::LOW);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::LOW);
sleeping_task_low.WaitUntilSleeping();
// Create 3 L0 files, making score of L0 to be 3
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
Put(Key(10 + i), std::string(5000, 'x')); Put(Key(10 + i), std::string(5000, 'x'));
Put(Key(90 - i), std::string(5000, 'x')); Put(Key(90 - i), std::string(5000, 'x'));
// Flush the file. File size is around 30KB.
Flush(); Flush();
} }
while (compaction_cnt.load() < 8 || NumTableFilesAtLevel(0) > 0) {
env_->SleepForMicroseconds(1000); // Wake up sleep task to enable compaction to run and waits
} // for it to go to sleep state again to make sure one compaction
// goes through.
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilSleeping();
// Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
// Given level multiplier 10, estimated pending compaction is around 100KB
// doesn't trigger soft_pending_compaction_bytes_limit
ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_EQ(NumTableFilesAtLevel(1), 1);
ASSERT_EQ(sleep_count.load(), 0); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
// Slowdown is triggered now // Create 3 L0 files, making score of L0 to be 3, higher than L0.
for (int i = 0; i < 10; i++) { for (int i = 0; i < 3; i++) {
Put(Key(i), std::string(100, 'x')); Put(Key(20 + i), std::string(5000, 'x'));
Put(Key(80 - i), std::string(5000, 'x'));
// Flush the file. File size is around 30KB.
Flush();
} }
ASSERT_GT(sleep_count.load(), 0); // Wake up sleep task to enable compaction to run and waits
// for it to go to sleep state again to make sure one compaction
// goes through.
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilSleeping();
{ // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
MutexLock l(&mut); // Given level multiplier 10, estimated pending compaction is around 400KB
compaction_cnt.store(7); // triggerring soft_pending_compaction_bytes_limit
cv.SignalAll(); ASSERT_EQ(NumTableFilesAtLevel(1), 1);
} ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
while (NumTableFilesAtLevel(1) > 0) {
env_->SleepForMicroseconds(1000);
}
// Slowdown is not triggered any more. sleeping_task_low.WakeUp();
sleep_count.store(0); sleeping_task_low.WaitUntilSleeping();
// Slowdown is not triggered now
for (int i = 0; i < 10; i++) { ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
Put(Key(i), std::string(100, 'x'));
}
ASSERT_EQ(sleep_count.load(), 0);
// shrink level base so L2 will hit soft limit easier. // shrink level base so L2 will hit soft limit easier.
ASSERT_OK(dbfull()->SetOptions({ ASSERT_OK(dbfull()->SetOptions({
{"max_bytes_for_level_base", "5000"}, {"max_bytes_for_level_base", "5000"},
})); }));
compaction_cnt.store(7);
Flush();
while (NumTableFilesAtLevel(0) == 0) {
env_->SleepForMicroseconds(1000);
}
// Slowdown is triggered now
for (int i = 0; i < 10; i++) {
Put(Key(i), std::string(100, 'x'));
}
ASSERT_GT(sleep_count.load(), 0);
{
MutexLock l(&mut);
compaction_cnt.store(7);
cv.SignalAll();
}
while (NumTableFilesAtLevel(2) != 0) { Put("", "");
env_->SleepForMicroseconds(1000); Flush();
} ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
// Slowdown is not triggered anymore sleeping_task_low.WaitUntilSleeping();
sleep_count.store(0);
for (int i = 0; i < 10; i++) {
Put(Key(i), std::string(100, 'x'));
}
ASSERT_EQ(sleep_count.load(), 0);
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); rocksdb::SyncPoint::GetInstance()->DisableProcessing();
sleeping_task_low.WakeUp();
sleeping_task_low.WaitUntilDone();
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -35,8 +35,7 @@ void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) {
"\n** Compaction Stats [%s] **\n" "\n** Compaction Stats [%s] **\n"
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) " "Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) "
"Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) " "Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) "
"Comp(sec) Comp(cnt) Avg(sec) " "Comp(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop\n"
"Stall(cnt) KeyIn KeyDrop\n"
"--------------------------------------------------------------------" "--------------------------------------------------------------------"
"-----------------------------------------------------------" "-----------------------------------------------------------"
"--------------------------------------\n", "--------------------------------------\n",
@ -44,8 +43,8 @@ void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) {
} }
void PrintLevelStats(char* buf, size_t len, const std::string& name, void PrintLevelStats(char* buf, size_t len, const std::string& name,
int num_files, int being_compacted, double total_file_size, double score, int num_files, int being_compacted, double total_file_size,
double w_amp, uint64_t stalls, double score, double w_amp,
const InternalStats::CompactionStats& stats) { const InternalStats::CompactionStats& stats) {
uint64_t bytes_read = uint64_t bytes_read =
stats.bytes_read_non_output_levels + stats.bytes_read_output_level; stats.bytes_read_non_output_levels + stats.bytes_read_output_level;
@ -70,8 +69,6 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name,
"%9.0f " /* Comp(sec) */ "%9.0f " /* Comp(sec) */
"%9d " /* Comp(cnt) */ "%9d " /* Comp(cnt) */
"%8.3f " /* Avg(sec) */ "%8.3f " /* Avg(sec) */
"%10" PRIu64
" " /* Stall(cnt) */
"%7s " /* KeyIn */ "%7s " /* KeyIn */
"%6s\n", /* KeyDrop */ "%6s\n", /* KeyDrop */
name.c_str(), name.c_str(),
@ -82,7 +79,7 @@ void PrintLevelStats(char* buf, size_t len, const std::string& name,
bytes_read / kMB / elapsed, stats.bytes_written / kMB / elapsed, bytes_read / kMB / elapsed, stats.bytes_written / kMB / elapsed,
stats.micros / kMicrosInSec, stats.count, stats.micros / kMicrosInSec, stats.count,
stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count, stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count,
stalls, num_input_records.c_str(), num_dropped_records.c_str()); num_input_records.c_str(), num_dropped_records.c_str());
} }
} }
@ -677,27 +674,13 @@ void InternalStats::DumpCFStats(std::string* value) {
int total_files = 0; int total_files = 0;
int total_files_being_compacted = 0; int total_files_being_compacted = 0;
double total_file_size = 0; double total_file_size = 0;
uint64_t total_slowdown_count_soft = 0;
uint64_t total_slowdown_count_hard = 0;
uint64_t total_stall_count = 0;
for (int level = 0; level < number_levels_; level++) { for (int level = 0; level < number_levels_; level++) {
int files = vstorage->NumLevelFiles(level); int files = vstorage->NumLevelFiles(level);
total_files += files; total_files += files;
total_files_being_compacted += files_being_compacted[level]; total_files_being_compacted += files_being_compacted[level];
if (comp_stats_[level].micros > 0 || files > 0) { if (comp_stats_[level].micros > 0 || files > 0) {
uint64_t stalls =
level == 0 ? (cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL] +
cf_stats_count_[LEVEL0_NUM_FILES_TOTAL] +
cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT] +
cf_stats_count_[MEMTABLE_COMPACTION])
: (stall_leveln_slowdown_count_soft_[level] +
stall_leveln_slowdown_count_hard_[level]);
stats_sum.Add(comp_stats_[level]); stats_sum.Add(comp_stats_[level]);
total_file_size += vstorage->NumLevelBytes(level); total_file_size += vstorage->NumLevelBytes(level);
total_stall_count += stalls;
total_slowdown_count_soft += stall_leveln_slowdown_count_soft_[level];
total_slowdown_count_hard += stall_leveln_slowdown_count_hard_[level];
double w_amp = double w_amp =
(comp_stats_[level].bytes_read_non_output_levels == 0) ? 0.0 (comp_stats_[level].bytes_read_non_output_levels == 0) ? 0.0
: static_cast<double>(comp_stats_[level].bytes_written) / : static_cast<double>(comp_stats_[level].bytes_written) /
@ -705,17 +688,23 @@ void InternalStats::DumpCFStats(std::string* value) {
PrintLevelStats(buf, sizeof(buf), "L" + ToString(level), files, PrintLevelStats(buf, sizeof(buf), "L" + ToString(level), files,
files_being_compacted[level], files_being_compacted[level],
vstorage->NumLevelBytes(level), compaction_score[level], vstorage->NumLevelBytes(level), compaction_score[level],
w_amp, stalls, comp_stats_[level]); w_amp, comp_stats_[level]);
value->append(buf); value->append(buf);
} }
} }
uint64_t curr_ingest = cf_stats_value_[BYTES_FLUSHED]; uint64_t curr_ingest = cf_stats_value_[BYTES_FLUSHED];
// Cumulative summary // Cumulative summary
double w_amp = stats_sum.bytes_written / static_cast<double>(curr_ingest + 1); double w_amp = stats_sum.bytes_written / static_cast<double>(curr_ingest + 1);
uint64_t total_stall_count =
cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL] +
cf_stats_count_[LEVEL0_NUM_FILES_TOTAL] +
cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT] +
cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT] +
cf_stats_count_[MEMTABLE_COMPACTION];
// Stats summary across levels // Stats summary across levels
PrintLevelStats(buf, sizeof(buf), "Sum", total_files, PrintLevelStats(buf, sizeof(buf), "Sum", total_files,
total_files_being_compacted, total_file_size, 0, w_amp, total_files_being_compacted, total_file_size, 0, w_amp,
total_stall_count, stats_sum); stats_sum);
value->append(buf); value->append(buf);
// Interval summary // Interval summary
uint64_t interval_ingest = uint64_t interval_ingest =
@ -723,9 +712,7 @@ void InternalStats::DumpCFStats(std::string* value) {
CompactionStats interval_stats(stats_sum); CompactionStats interval_stats(stats_sum);
interval_stats.Subtract(cf_stats_snapshot_.comp_stats); interval_stats.Subtract(cf_stats_snapshot_.comp_stats);
w_amp = interval_stats.bytes_written / static_cast<double>(interval_ingest); w_amp = interval_stats.bytes_written / static_cast<double>(interval_ingest);
PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, w_amp, interval_stats);
w_amp, total_stall_count - cf_stats_snapshot_.stall_count,
interval_stats);
value->append(buf); value->append(buf);
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
@ -742,19 +729,20 @@ void InternalStats::DumpCFStats(std::string* value) {
"%" PRIu64 "%" PRIu64
" level0_numfiles_with_compaction, " " level0_numfiles_with_compaction, "
"%" PRIu64 "%" PRIu64
" pending_compaction_bytes, " " stop for pending_compaction_bytes, "
"%" PRIu64 "%" PRIu64
" memtable_compaction, " " slowdown for pending_compaction_bytes, "
"%" PRIu64 "%" PRIu64
" leveln_slowdown_soft, " " memtable_compaction, "
"%" PRIu64 " leveln_slowdown_hard\n", "interval %" PRIu64 " total count\n",
cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL], cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL],
cf_stats_count_[LEVEL0_SLOWDOWN_WITH_COMPACTION], cf_stats_count_[LEVEL0_SLOWDOWN_WITH_COMPACTION],
cf_stats_count_[LEVEL0_NUM_FILES_TOTAL], cf_stats_count_[LEVEL0_NUM_FILES_TOTAL],
cf_stats_count_[LEVEL0_NUM_FILES_WITH_COMPACTION], cf_stats_count_[LEVEL0_NUM_FILES_WITH_COMPACTION],
cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT], cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT],
cf_stats_count_[MEMTABLE_COMPACTION], total_slowdown_count_soft, cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT],
total_slowdown_count_hard); cf_stats_count_[MEMTABLE_COMPACTION],
total_stall_count - cf_stats_snapshot_.stall_count);
value->append(buf); value->append(buf);
cf_stats_snapshot_.ingest_bytes = curr_ingest; cf_stats_snapshot_.ingest_bytes = curr_ingest;

@ -88,6 +88,7 @@ class InternalStats {
MEMTABLE_COMPACTION, MEMTABLE_COMPACTION,
LEVEL0_NUM_FILES_TOTAL, LEVEL0_NUM_FILES_TOTAL,
LEVEL0_NUM_FILES_WITH_COMPACTION, LEVEL0_NUM_FILES_WITH_COMPACTION,
SOFT_PENDING_COMPACTION_BYTES_LIMIT,
HARD_PENDING_COMPACTION_BYTES_LIMIT, HARD_PENDING_COMPACTION_BYTES_LIMIT,
WRITE_STALLS_ENUM_MAX, WRITE_STALLS_ENUM_MAX,
BYTES_FLUSHED, BYTES_FLUSHED,
@ -111,8 +112,6 @@ class InternalStats {
cf_stats_value_(INTERNAL_CF_STATS_ENUM_MAX), cf_stats_value_(INTERNAL_CF_STATS_ENUM_MAX),
cf_stats_count_(INTERNAL_CF_STATS_ENUM_MAX), cf_stats_count_(INTERNAL_CF_STATS_ENUM_MAX),
comp_stats_(num_levels), comp_stats_(num_levels),
stall_leveln_slowdown_count_hard_(num_levels),
stall_leveln_slowdown_count_soft_(num_levels),
file_read_latency_(num_levels), file_read_latency_(num_levels),
bg_error_count_(0), bg_error_count_(0),
number_levels_(num_levels), number_levels_(num_levels),
@ -126,10 +125,6 @@ class InternalStats {
cf_stats_value_[i] = 0; cf_stats_value_[i] = 0;
cf_stats_count_[i] = 0; cf_stats_count_[i] = 0;
} }
for (int i = 0; i < num_levels; ++i) {
stall_leveln_slowdown_count_hard_[i] = 0;
stall_leveln_slowdown_count_soft_[i] = 0;
}
} }
// Per level compaction stats. comp_stats_[level] stores the stats for // Per level compaction stats. comp_stats_[level] stores the stats for
@ -237,14 +232,6 @@ class InternalStats {
comp_stats_[level].bytes_moved += amount; comp_stats_[level].bytes_moved += amount;
} }
void RecordLevelNSlowdown(int level, bool soft) {
if (soft) {
++stall_leveln_slowdown_count_soft_[level];
} else {
++stall_leveln_slowdown_count_hard_[level];
}
}
void AddCFStats(InternalCFStatsType type, uint64_t value) { void AddCFStats(InternalCFStatsType type, uint64_t value) {
cf_stats_value_[type] += value; cf_stats_value_[type] += value;
++cf_stats_count_[type]; ++cf_stats_count_[type];
@ -282,9 +269,6 @@ class InternalStats {
std::vector<uint64_t> cf_stats_count_; std::vector<uint64_t> cf_stats_count_;
// Per-ColumnFamily/level compaction stats // Per-ColumnFamily/level compaction stats
std::vector<CompactionStats> comp_stats_; std::vector<CompactionStats> comp_stats_;
// These count the number of microseconds for which MakeRoomForWrite stalls.
std::vector<uint64_t> stall_leveln_slowdown_count_hard_;
std::vector<uint64_t> stall_leveln_slowdown_count_soft_;
std::vector<HistogramImpl> file_read_latency_; std::vector<HistogramImpl> file_read_latency_;
// Used to compute per-interval statistics // Used to compute per-interval statistics
@ -361,6 +345,7 @@ class InternalStats {
MEMTABLE_COMPACTION, MEMTABLE_COMPACTION,
LEVEL0_NUM_FILES_TOTAL, LEVEL0_NUM_FILES_TOTAL,
LEVEL0_NUM_FILES_WITH_COMPACTION, LEVEL0_NUM_FILES_WITH_COMPACTION,
SOFT_PENDING_COMPACTION_BYTES_LIMIT,
HARD_PENDING_COMPACTION_BYTES_LIMIT, HARD_PENDING_COMPACTION_BYTES_LIMIT,
WRITE_STALLS_ENUM_MAX, WRITE_STALLS_ENUM_MAX,
BYTES_FLUSHED, BYTES_FLUSHED,
@ -407,8 +392,6 @@ class InternalStats {
void IncBytesMoved(int level, uint64_t amount) {} void IncBytesMoved(int level, uint64_t amount) {}
void RecordLevelNSlowdown(int level, bool soft) {}
void AddCFStats(InternalCFStatsType type, uint64_t value) {} void AddCFStats(InternalCFStatsType type, uint64_t value) {}
void AddDBStats(InternalDBStatsType type, uint64_t value) {} void AddDBStats(InternalDBStatsType type, uint64_t value) {}

@ -1156,9 +1156,6 @@ void VersionStorageInfo::EstimateCompactionBytesNeeded(
void VersionStorageInfo::ComputeCompactionScore( void VersionStorageInfo::ComputeCompactionScore(
const MutableCFOptions& mutable_cf_options, const MutableCFOptions& mutable_cf_options,
const CompactionOptionsFIFO& compaction_options_fifo) { const CompactionOptionsFIFO& compaction_options_fifo) {
double max_score = 0;
int max_score_level = 0;
for (int level = 0; level <= MaxInputLevel(); level++) { for (int level = 0; level <= MaxInputLevel(); level++) {
double score; double score;
if (level == 0) { if (level == 0) {
@ -1209,19 +1206,11 @@ void VersionStorageInfo::ComputeCompactionScore(
} }
score = static_cast<double>(level_bytes_no_compacting) / score = static_cast<double>(level_bytes_no_compacting) /
MaxBytesForLevel(level); MaxBytesForLevel(level);
if (max_score < score) {
max_score = score;
max_score_level = level;
}
} }
compaction_level_[level] = level; compaction_level_[level] = level;
compaction_score_[level] = score; compaction_score_[level] = score;
} }
// update the max compaction score in levels 1 to n-1
max_compaction_score_ = max_score;
max_compaction_score_level_ = max_score_level;
// sort all the levels based on their score. Higher scores get listed // sort all the levels based on their score. Higher scores get listed
// first. Use bubble sort because the number of entries are small. // first. Use bubble sort because the number of entries are small.
for (int i = 0; i < num_levels() - 2; i++) { for (int i = 0; i < num_levels() - 2; i++) {

@ -146,12 +146,6 @@ class VersionStorageInfo {
int MaxInputLevel() const; int MaxInputLevel() const;
// Returns the maxmimum compaction score for levels 1 to max
double max_compaction_score() const { return max_compaction_score_; }
// See field declaration
int max_compaction_score_level() const { return max_compaction_score_level_; }
// Return level number that has idx'th highest score // Return level number that has idx'th highest score
int CompactionScoreLevel(int idx) const { return compaction_level_[idx]; } int CompactionScoreLevel(int idx) const { return compaction_level_[idx]; }
@ -387,8 +381,6 @@ class VersionStorageInfo {
// These are used to pick the best compaction level // These are used to pick the best compaction level
std::vector<double> compaction_score_; std::vector<double> compaction_score_;
std::vector<int> compaction_level_; std::vector<int> compaction_level_;
double max_compaction_score_ = 0.0; // max score in l1 to ln-1
int max_compaction_score_level_ = 0; // level on which max score occurs
int l0_delay_trigger_count_ = 0; // Count used to trigger slow down and stop int l0_delay_trigger_count_ = 0; // Count used to trigger slow down and stop
// for number of L0 files. // for number of L0 files.

@ -505,6 +505,7 @@ struct ColumnFamilyOptions {
// Dynamically changeable through SetOptions() API // Dynamically changeable through SetOptions() API
int max_grandparent_overlap_factor; int max_grandparent_overlap_factor;
// DEPRECATED -- this options is no longer used
// Puts are delayed to options.delayed_write_rate when any level has a // Puts are delayed to options.delayed_write_rate when any level has a
// compaction score that exceeds soft_rate_limit. This is ignored when == 0.0. // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
// //
@ -513,9 +514,15 @@ struct ColumnFamilyOptions {
// Dynamically changeable through SetOptions() API // Dynamically changeable through SetOptions() API
double soft_rate_limit; double soft_rate_limit;
// DEPRECATED -- this options is no longer usde // DEPRECATED -- this options is no longer used
double hard_rate_limit; double hard_rate_limit;
// All writes will be slowed down to at least delayed_write_rate if estimated
// bytes needed to be compaction exceed this threshold.
//
// Default: 0 (disabled)
uint64_t soft_pending_compaction_bytes_limit;
// All writes are stopped if estimated bytes needed to be compaction exceed // All writes are stopped if estimated bytes needed to be compaction exceed
// this threshold. // this threshold.
// //

@ -82,8 +82,8 @@ void MutableCFOptions::Dump(Logger* log) const {
filter_deletes); filter_deletes);
Log(log, " disable_auto_compactions: %d", Log(log, " disable_auto_compactions: %d",
disable_auto_compactions); disable_auto_compactions);
Log(log, " soft_rate_limit: %lf", Log(log, " soft_pending_compaction_bytes_limit: %" PRIu64,
soft_rate_limit); soft_pending_compaction_bytes_limit);
Log(log, " hard_pending_compaction_bytes_limit: %" PRIu64, Log(log, " hard_pending_compaction_bytes_limit: %" PRIu64,
hard_pending_compaction_bytes_limit); hard_pending_compaction_bytes_limit);
Log(log, " level0_file_num_compaction_trigger: %d", Log(log, " level0_file_num_compaction_trigger: %d",

@ -24,7 +24,8 @@ struct MutableCFOptions {
filter_deletes(options.filter_deletes), filter_deletes(options.filter_deletes),
inplace_update_num_locks(options.inplace_update_num_locks), inplace_update_num_locks(options.inplace_update_num_locks),
disable_auto_compactions(options.disable_auto_compactions), disable_auto_compactions(options.disable_auto_compactions),
soft_rate_limit(options.soft_rate_limit), soft_pending_compaction_bytes_limit(
options.soft_pending_compaction_bytes_limit),
hard_pending_compaction_bytes_limit( hard_pending_compaction_bytes_limit(
options.hard_pending_compaction_bytes_limit), options.hard_pending_compaction_bytes_limit),
level0_file_num_compaction_trigger( level0_file_num_compaction_trigger(
@ -62,7 +63,7 @@ struct MutableCFOptions {
filter_deletes(false), filter_deletes(false),
inplace_update_num_locks(0), inplace_update_num_locks(0),
disable_auto_compactions(false), disable_auto_compactions(false),
soft_rate_limit(0), soft_pending_compaction_bytes_limit(0),
hard_pending_compaction_bytes_limit(0), hard_pending_compaction_bytes_limit(0),
level0_file_num_compaction_trigger(0), level0_file_num_compaction_trigger(0),
level0_slowdown_writes_trigger(0), level0_slowdown_writes_trigger(0),
@ -114,7 +115,7 @@ struct MutableCFOptions {
// Compaction related options // Compaction related options
bool disable_auto_compactions; bool disable_auto_compactions;
double soft_rate_limit; uint64_t soft_pending_compaction_bytes_limit;
uint64_t hard_pending_compaction_bytes_limit; uint64_t hard_pending_compaction_bytes_limit;
int level0_file_num_compaction_trigger; int level0_file_num_compaction_trigger;
int level0_slowdown_writes_trigger; int level0_slowdown_writes_trigger;

@ -104,6 +104,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
max_grandparent_overlap_factor(10), max_grandparent_overlap_factor(10),
soft_rate_limit(0.0), soft_rate_limit(0.0),
hard_rate_limit(0.0), hard_rate_limit(0.0),
soft_pending_compaction_bytes_limit(0),
hard_pending_compaction_bytes_limit(0), hard_pending_compaction_bytes_limit(0),
rate_limit_delay_max_milliseconds(1000), rate_limit_delay_max_milliseconds(1000),
arena_block_size(0), arena_block_size(0),
@ -164,6 +165,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
source_compaction_factor(options.source_compaction_factor), source_compaction_factor(options.source_compaction_factor),
max_grandparent_overlap_factor(options.max_grandparent_overlap_factor), max_grandparent_overlap_factor(options.max_grandparent_overlap_factor),
soft_rate_limit(options.soft_rate_limit), soft_rate_limit(options.soft_rate_limit),
soft_pending_compaction_bytes_limit(
options.soft_pending_compaction_bytes_limit),
hard_pending_compaction_bytes_limit( hard_pending_compaction_bytes_limit(
options.hard_pending_compaction_bytes_limit), options.hard_pending_compaction_bytes_limit),
rate_limit_delay_max_milliseconds( rate_limit_delay_max_milliseconds(
@ -515,8 +518,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
Header(log, Header(log,
" Options.arena_block_size: %" ROCKSDB_PRIszt, " Options.arena_block_size: %" ROCKSDB_PRIszt,
arena_block_size); arena_block_size);
Header(log, " Options.soft_rate_limit: %.2f", Header(log, " Options.soft_pending_compaction_bytes_limit: %" PRIu64,
soft_rate_limit); soft_pending_compaction_bytes_limit);
Header(log, " Options.hard_pending_compaction_bytes_limit: %" PRIu64, Header(log, " Options.hard_pending_compaction_bytes_limit: %" PRIu64,
hard_pending_compaction_bytes_limit); hard_pending_compaction_bytes_limit);
Header(log, " Options.rate_limit_delay_max_milliseconds: %u", Header(log, " Options.rate_limit_delay_max_milliseconds: %u",

@ -539,7 +539,10 @@ bool ParseCompactionOptions(const std::string& name, const std::string& value,
if (name == "disable_auto_compactions") { if (name == "disable_auto_compactions") {
new_options->disable_auto_compactions = ParseBoolean(name, value); new_options->disable_auto_compactions = ParseBoolean(name, value);
} else if (name == "soft_rate_limit") { } else if (name == "soft_rate_limit") {
new_options->soft_rate_limit = ParseDouble(value); // Deprecated options but still leave it here to avoid older options
// strings can be consumed.
} else if (name == "soft_pending_compaction_bytes_limit") {
new_options->soft_pending_compaction_bytes_limit = ParseUint64(value);
} else if (name == "hard_pending_compaction_bytes_limit") { } else if (name == "hard_pending_compaction_bytes_limit") {
new_options->hard_pending_compaction_bytes_limit = ParseUint64(value); new_options->hard_pending_compaction_bytes_limit = ParseUint64(value);
} else if (name == "hard_rate_limit") { } else if (name == "hard_rate_limit") {
@ -1386,7 +1389,6 @@ ColumnFamilyOptions BuildColumnFamilyOptions(
// Compaction related options // Compaction related options
cf_opts.disable_auto_compactions = cf_opts.disable_auto_compactions =
mutable_cf_options.disable_auto_compactions; mutable_cf_options.disable_auto_compactions;
cf_opts.soft_rate_limit = mutable_cf_options.soft_rate_limit;
cf_opts.level0_file_num_compaction_trigger = cf_opts.level0_file_num_compaction_trigger =
mutable_cf_options.level0_file_num_compaction_trigger; mutable_cf_options.level0_file_num_compaction_trigger;
cf_opts.level0_slowdown_writes_trigger = cf_opts.level0_slowdown_writes_trigger =

@ -328,6 +328,9 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
{"verify_checksums_in_compaction", {"verify_checksums_in_compaction",
{offsetof(struct ColumnFamilyOptions, verify_checksums_in_compaction), {offsetof(struct ColumnFamilyOptions, verify_checksums_in_compaction),
OptionType::kBoolean, OptionVerificationType::kNormal}}, OptionType::kBoolean, OptionVerificationType::kNormal}},
{"soft_pending_compaction_bytes_limit",
{offsetof(struct ColumnFamilyOptions, soft_pending_compaction_bytes_limit),
OptionType::kUInt64T, OptionVerificationType::kNormal}},
{"hard_pending_compaction_bytes_limit", {"hard_pending_compaction_bytes_limit",
{offsetof(struct ColumnFamilyOptions, hard_pending_compaction_bytes_limit), {offsetof(struct ColumnFamilyOptions, hard_pending_compaction_bytes_limit),
OptionType::kUInt64T, OptionVerificationType::kNormal}}, OptionType::kUInt64T, OptionVerificationType::kNormal}},

@ -316,7 +316,7 @@ class SleepingBackgroundTask {
} }
void WaitUntilSleeping() { void WaitUntilSleeping() {
MutexLock l(&mutex_); MutexLock l(&mutex_);
while (!sleeping_) { while (!sleeping_ || !should_sleep_) {
bg_cv_.Wait(); bg_cv_.Wait();
} }
} }

Loading…
Cancel
Save