From 439e36db21ca3ac637fce484c39601f402f1056c Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Thu, 16 Jan 2014 18:44:23 -0800 Subject: [PATCH 1/2] Fix SlowdownAmount Summary: This had a few bugs. 1) bottom and top were reversed. top is for the max value but the callers were passing the max value to bottom. The result is that the max sleep is used when n >= bottom. 2) one of the callers passed values with type double and these values are frequently between 1.0 and 2.0 so rounding will do some bad things 3) sometimes the function returned 0 when there should be a stall With this change and one other diff (out for review soon) there are slightly fewer stalls on one workload. With the fix. Stalls(secs): 160.166 level0_slowdown, 0.000 level0_numfiles, 0.000 memtable_compaction, 58.495 leveln_slowdown Stalls(count): 910261 level0_slowdown, 0 level0_numfiles, 0 memtable_compaction, 54526 leveln_slowdown Without the fix. Stalls(secs): 172.227 level0_slowdown, 0.000 level0_numfiles, 0.000 memtable_compaction, 56.538 leveln_slowdown Stalls(count): 160831 level0_slowdown, 0 level0_numfiles, 0 memtable_compaction, 52845 leveln_slowdown Task ID: # Blame Rev: Test Plan: run db_bench for --benchmarks=overwrite with IO-bound database Revert Plan: Database Impact: Memcache Impact: Other Notes: EImportant: - begin *PUBLIC* platform impact section - Bugzilla: # - end platform impact - Reviewers: haobo Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15243 --- db/db_impl.cc | 8 ++++---- db/db_impl.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index e84817b9b..0cafc269c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3155,7 +3155,7 @@ void DBImpl::BuildBatchGroup(Writer** last_writer, // The goal of this formula is to gradually increase the rate at which writes // are slowed. We also tried linear delay (r * 1000), but it seemed to do // slightly worse. There is no other particular reason for choosing quadratic. -uint64_t DBImpl::SlowdownAmount(int n, int top, int bottom) { +uint64_t DBImpl::SlowdownAmount(int n, double bottom, double top) { uint64_t delay; if (n >= top) { delay = 1000; @@ -3167,10 +3167,10 @@ uint64_t DBImpl::SlowdownAmount(int n, int top, int bottom) { // If we are here, we know that: // level0_start_slowdown <= n < level0_slowdown // since the previous two conditions are false. - float how_much = - (float) (n - bottom) / + double how_much = + (double) (n - bottom) / (top - bottom); - delay = how_much * how_much * 1000; + delay = std::max(how_much * how_much * 1000, 100.0); } assert(delay <= 1000); return delay; diff --git a/db/db_impl.h b/db/db_impl.h index 214affac7..3eebaf4a7 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -293,7 +293,7 @@ class DBImpl : public DB { Status WriteLevel0Table(std::vector &mems, VersionEdit* edit, uint64_t* filenumber); - uint64_t SlowdownAmount(int n, int top, int bottom); + uint64_t SlowdownAmount(int n, double bottom, double top); // MakeRoomForWrite will return superversion_to_free through an arugment, // which the caller needs to delete. We do it because caller can delete // the superversion outside of mutex From 0f4a75b710d616b08a06321ae29979d05cbf1f76 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Fri, 17 Jan 2014 12:02:03 -0800 Subject: [PATCH 2/2] Fix SIGSEGV in compaction picker Summary: The SIGSEGV was introduced by https://reviews.facebook.net/D15171 I also fixed ExpandWhileOverlapping() which returned the failure by setting it's own stack variable to nullptr (!). This bug is present in 2.6 release, so I guess ExpandWhileOverlapping never fails :) Test Plan: `make check`. Also MarkCallaghan confirmed it fixed the SIGSEGV he reported. Reviewers: MarkCallaghan, kailiu, sdong, dhruba, haobo Reviewed By: kailiu CC: leveldb Differential Revision: https://reviews.facebook.net/D15261 --- db/compaction_picker.cc | 36 +++++++++++++++--------------------- db/compaction_picker.h | 12 +++++++++++- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index fa2fbc663..8dd3c03bf 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -132,26 +132,16 @@ void CompactionPicker::GetRange(const std::vector& inputs1, GetRange(all, smallest, largest); } -// Add more files to the inputs on "level" to make sure that -// no newer version of a key is compacted to "level+1" while leaving an older -// version in a "level". Otherwise, any Get() will search "level" first, -// and will likely return an old/stale value for the key, since it always -// searches in increasing order of level to find the value. This could -// also scramble the order of merge operands. This function should be -// called any time a new Compaction is created, and its inputs_[0] are -// populated. -// -// Will set c to nullptr if it is impossible to apply this compaction. -void CompactionPicker::ExpandWhileOverlapping(Compaction* c) { +bool CompactionPicker::ExpandWhileOverlapping(Compaction* c) { // If inputs are empty then there is nothing to expand. if (!c || c->inputs_[0].empty()) { - return; + return true; } // GetOverlappingInputs will always do the right thing for level-0. // So we don't need to do any expansion if level == 0. if (c->level() == 0) { - return; + return true; } const int level = c->level(); @@ -182,9 +172,9 @@ void CompactionPicker::ExpandWhileOverlapping(Compaction* c) { &parent_index))) { c->inputs_[0].clear(); c->inputs_[1].clear(); - delete c; - c = nullptr; + return false; } + return true; } uint64_t CompactionPicker::ExpandedCompactionByteSizeLimit(int level) { @@ -341,8 +331,8 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level, MaxGrandParentOverlapBytes(input_level)); c->inputs_[0] = inputs; - ExpandWhileOverlapping(c); - if (c == nullptr) { + if (ExpandWhileOverlapping(c) == false) { + delete c; Log(options_->info_log, "Could not compact due to expansion failure.\n"); return nullptr; } @@ -383,8 +373,10 @@ Compaction* LevelCompactionPicker::PickCompaction(Version* version) { level = version->compaction_level_[i]; if ((version->compaction_score_[i] >= 1)) { c = PickCompactionBySize(version, level, version->compaction_score_[i]); - ExpandWhileOverlapping(c); - if (c != nullptr) { + if (ExpandWhileOverlapping(c) == false) { + delete c; + c = nullptr; + } else { break; } } @@ -408,7 +400,9 @@ Compaction* LevelCompactionPicker::PickCompaction(Version* version) { c->inputs_[0].push_back(f); c->parent_index_ = parent_index; c->input_version_->file_to_compact_ = nullptr; - ExpandWhileOverlapping(c); + if (ExpandWhileOverlapping(c) == false) { + return nullptr; + } } } } @@ -528,7 +522,7 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version, } // store where to start the iteration in the next call to PickCompaction - c->input_version_->next_file_to_compact_by_size_[level] = nextIndex; + version->next_file_to_compact_by_size_[level] = nextIndex; return c; } diff --git a/db/compaction_picker.h b/db/compaction_picker.h index 980c60013..0fe086a18 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -85,7 +85,17 @@ class CompactionPicker { const std::vector& inputs2, InternalKey* smallest, InternalKey* largest); - void ExpandWhileOverlapping(Compaction* c); + // Add more files to the inputs on "level" to make sure that + // no newer version of a key is compacted to "level+1" while leaving an older + // version in a "level". Otherwise, any Get() will search "level" first, + // and will likely return an old/stale value for the key, since it always + // searches in increasing order of level to find the value. This could + // also scramble the order of merge operands. This function should be + // called any time a new Compaction is created, and its inputs_[0] are + // populated. + // + // Will return false if it is impossible to apply this compaction. + bool ExpandWhileOverlapping(Compaction* c); uint64_t ExpandedCompactionByteSizeLimit(int level);