Support taking a configurable number of files from the same level to compact in a single compaction run.

Summary:
The compaction process takes some files from LevelK and
merges it into LevelK+1. The number of files it picks from
LevelK was capped such a way that the total amount of
data picked does not exceed the maxfilesize of that level.
This essentially meant that only one file from LevelK
is picked for a single compaction.

For bulkloads, we would like to take many many file from
LevelK and compact them using a single compaction run.

This patch introduces a option called the 'source_compaction_factor'
(similar to expanded_compaction_factor). It is a multiplier
that is multiplied by the maxfilesize of that level to arrive
at the limit that is used to throttle the number of source
files from LevelK.  For bulk loads, set source_compaction_factor
to a very high number so that multiple files from the same
level are picked for compaction in a single compaction.

The default value of source_compaction_factor is 1, so that
we can keep backward compatibilty with existing compaction semantics.

Test Plan: make clean check

Reviewers: emayanke, sheki

Reviewed By: emayanke

CC: leveldb

Differential Revision: https://reviews.facebook.net/D6867
main
Dhruba Borthakur 12 years ago
parent fbb73a4ac3
commit 7632fdb5cb
  1. 8
      db/db_bench.cc
  2. 5
      db/version_set.cc
  3. 9
      include/leveldb/options.h
  4. 3
      util/options.cc

@ -221,6 +221,10 @@ static bool FLAGS_read_only = false;
// Do not auto trigger compactions // Do not auto trigger compactions
static bool FLAGS_disable_auto_compactions = false; static bool FLAGS_disable_auto_compactions = false;
// Cap the size of data in levelK for a compaction run
// that compacts Levelk with LevelK+1
static int FLAGS_source_compaction_factor = 1;
extern bool useOsBuffer; extern bool useOsBuffer;
extern bool useFsReadAhead; extern bool useFsReadAhead;
extern bool useMmapRead; extern bool useMmapRead;
@ -978,6 +982,7 @@ class Benchmark {
options.max_grandparent_overlap_factor = options.max_grandparent_overlap_factor =
FLAGS_max_grandparent_overlap_factor; FLAGS_max_grandparent_overlap_factor;
options.disable_auto_compactions = FLAGS_disable_auto_compactions; options.disable_auto_compactions = FLAGS_disable_auto_compactions;
options.source_compaction_factor = FLAGS_source_compaction_factor;
Status s; Status s;
if(FLAGS_read_only) { if(FLAGS_read_only) {
s = DB::OpenForReadOnly(options, FLAGS_db, &db_); s = DB::OpenForReadOnly(options, FLAGS_db, &db_);
@ -1431,6 +1436,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--disable_auto_compactions=%d%c", } else if (sscanf(argv[i], "--disable_auto_compactions=%d%c",
&n, &junk) == 1 && (n == 0 || n ==1)) { &n, &junk) == 1 && (n == 0 || n ==1)) {
FLAGS_disable_auto_compactions = n; FLAGS_disable_auto_compactions = n;
} else if (sscanf(argv[i], "--source_compaction_factor=%d%c",
&n, &junk) == 1 && n > 0) {
FLAGS_source_compaction_factor = n;
} else { } else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]); fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1); exit(1);

@ -2028,7 +2028,8 @@ Compaction* VersionSet::CompactRange(
} }
// Avoid compacting too much in one shot in case the range is large. // Avoid compacting too much in one shot in case the range is large.
const uint64_t limit = MaxFileSizeForLevel(level); const uint64_t limit = MaxFileSizeForLevel(level) *
options_->source_compaction_factor;
uint64_t total = 0; uint64_t total = 0;
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
uint64_t s = inputs[i]->file_size; uint64_t s = inputs[i]->file_size;
@ -2039,7 +2040,7 @@ Compaction* VersionSet::CompactRange(
} }
} }
Compaction* c = new Compaction(level, limit, Compaction* c = new Compaction(level, MaxFileSizeForLevel(level),
MaxGrandParentOverlapBytes(level), NumberLevels()); MaxGrandParentOverlapBytes(level), NumberLevels());
c->input_version_ = current_; c->input_version_ = current_;
c->input_version_->Ref(); c->input_version_->Ref();

@ -238,6 +238,15 @@ struct Options {
// (expanded_compaction_factor * targetFileSizeLevel()) many bytes. // (expanded_compaction_factor * targetFileSizeLevel()) many bytes.
int expanded_compaction_factor; int expanded_compaction_factor;
// Maximum number of bytes in all source files to be compacted in a
// single compaction run. We avoid picking too many files in the
// source level so that we do not exceed the total source bytes
// for compaction to exceed
// (source_compaction_factor * targetFileSizeLevel()) many bytes.
// Default:1, i.e. pick maxfilesize amount of data as the source of
// a compaction.
int source_compaction_factor;
// Control maximum bytes of overlaps in grandparent (i.e., level+2) before we // Control maximum bytes of overlaps in grandparent (i.e., level+2) before we
// stop building a single file in a level->level+1 compaction. // stop building a single file in a level->level+1 compaction.
int max_grandparent_overlap_factor; int max_grandparent_overlap_factor;

@ -37,6 +37,7 @@ Options::Options()
max_bytes_for_level_base(10 * 1048576), max_bytes_for_level_base(10 * 1048576),
max_bytes_for_level_multiplier(10), max_bytes_for_level_multiplier(10),
expanded_compaction_factor(25), expanded_compaction_factor(25),
source_compaction_factor(1),
max_grandparent_overlap_factor(10), max_grandparent_overlap_factor(10),
statistics(NULL), statistics(NULL),
disableDataSync(false), disableDataSync(false),
@ -115,6 +116,8 @@ Options::Dump(
max_bytes_for_level_multiplier); max_bytes_for_level_multiplier);
Log(log," Options.expanded_compaction_factor: %d", Log(log," Options.expanded_compaction_factor: %d",
expanded_compaction_factor); expanded_compaction_factor);
Log(log," Options.source_compaction_factor: %d",
source_compaction_factor);
Log(log," Options.max_grandparent_overlap_factor: %d", Log(log," Options.max_grandparent_overlap_factor: %d",
max_grandparent_overlap_factor); max_grandparent_overlap_factor);
Log(log," Options.db_log_dir: %s", Log(log," Options.db_log_dir: %s",

Loading…
Cancel
Save