From 7632fdb5cbe9d554cb6c2f4f2f81bfc509e61420 Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Tue, 20 Nov 2012 23:07:41 -0800 Subject: [PATCH] Support taking a configurable number of files from the same level to compact in a single compaction run. Summary: The compaction process takes some files from LevelK and merges it into LevelK+1. The number of files it picks from LevelK was capped such a way that the total amount of data picked does not exceed the maxfilesize of that level. This essentially meant that only one file from LevelK is picked for a single compaction. For bulkloads, we would like to take many many file from LevelK and compact them using a single compaction run. This patch introduces a option called the 'source_compaction_factor' (similar to expanded_compaction_factor). It is a multiplier that is multiplied by the maxfilesize of that level to arrive at the limit that is used to throttle the number of source files from LevelK. For bulk loads, set source_compaction_factor to a very high number so that multiple files from the same level are picked for compaction in a single compaction. The default value of source_compaction_factor is 1, so that we can keep backward compatibilty with existing compaction semantics. Test Plan: make clean check Reviewers: emayanke, sheki Reviewed By: emayanke CC: leveldb Differential Revision: https://reviews.facebook.net/D6867 --- db/db_bench.cc | 8 ++++++++ db/version_set.cc | 5 +++-- include/leveldb/options.h | 9 +++++++++ util/options.cc | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index 96bb44d67..0ed57d461 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -221,6 +221,10 @@ static bool FLAGS_read_only = false; // Do not auto trigger compactions static bool FLAGS_disable_auto_compactions = false; +// Cap the size of data in levelK for a compaction run +// that compacts Levelk with LevelK+1 +static int FLAGS_source_compaction_factor = 1; + extern bool useOsBuffer; extern bool useFsReadAhead; extern bool useMmapRead; @@ -978,6 +982,7 @@ class Benchmark { options.max_grandparent_overlap_factor = FLAGS_max_grandparent_overlap_factor; options.disable_auto_compactions = FLAGS_disable_auto_compactions; + options.source_compaction_factor = FLAGS_source_compaction_factor; Status s; if(FLAGS_read_only) { s = DB::OpenForReadOnly(options, FLAGS_db, &db_); @@ -1431,6 +1436,9 @@ int main(int argc, char** argv) { } else if (sscanf(argv[i], "--disable_auto_compactions=%d%c", &n, &junk) == 1 && (n == 0 || n ==1)) { FLAGS_disable_auto_compactions = n; + } else if (sscanf(argv[i], "--source_compaction_factor=%d%c", + &n, &junk) == 1 && n > 0) { + FLAGS_source_compaction_factor = n; } else { fprintf(stderr, "Invalid flag '%s'\n", argv[i]); exit(1); diff --git a/db/version_set.cc b/db/version_set.cc index 47d82c7b0..dd33fe21b 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2028,7 +2028,8 @@ Compaction* VersionSet::CompactRange( } // Avoid compacting too much in one shot in case the range is large. - const uint64_t limit = MaxFileSizeForLevel(level); + const uint64_t limit = MaxFileSizeForLevel(level) * + options_->source_compaction_factor; uint64_t total = 0; for (size_t i = 0; i < inputs.size(); i++) { uint64_t s = inputs[i]->file_size; @@ -2039,7 +2040,7 @@ Compaction* VersionSet::CompactRange( } } - Compaction* c = new Compaction(level, limit, + Compaction* c = new Compaction(level, MaxFileSizeForLevel(level), MaxGrandParentOverlapBytes(level), NumberLevels()); c->input_version_ = current_; c->input_version_->Ref(); diff --git a/include/leveldb/options.h b/include/leveldb/options.h index 2ae8c31bb..b89322440 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -238,6 +238,15 @@ struct Options { // (expanded_compaction_factor * targetFileSizeLevel()) many bytes. int expanded_compaction_factor; + // Maximum number of bytes in all source files to be compacted in a + // single compaction run. We avoid picking too many files in the + // source level so that we do not exceed the total source bytes + // for compaction to exceed + // (source_compaction_factor * targetFileSizeLevel()) many bytes. + // Default:1, i.e. pick maxfilesize amount of data as the source of + // a compaction. + int source_compaction_factor; + // Control maximum bytes of overlaps in grandparent (i.e., level+2) before we // stop building a single file in a level->level+1 compaction. int max_grandparent_overlap_factor; diff --git a/util/options.cc b/util/options.cc index 4907c45af..58981a2ba 100644 --- a/util/options.cc +++ b/util/options.cc @@ -37,6 +37,7 @@ Options::Options() max_bytes_for_level_base(10 * 1048576), max_bytes_for_level_multiplier(10), expanded_compaction_factor(25), + source_compaction_factor(1), max_grandparent_overlap_factor(10), statistics(NULL), disableDataSync(false), @@ -115,6 +116,8 @@ Options::Dump( max_bytes_for_level_multiplier); Log(log," Options.expanded_compaction_factor: %d", expanded_compaction_factor); + Log(log," Options.source_compaction_factor: %d", + source_compaction_factor); Log(log," Options.max_grandparent_overlap_factor: %d", max_grandparent_overlap_factor); Log(log," Options.db_log_dir: %s",