From d1aaaf718c04e87eb3599744eb850d8f30134323 Mon Sep 17 00:00:00 2001 From: Dhruba Borthakur Date: Tue, 21 May 2013 11:37:06 -0700 Subject: [PATCH] Ability to set different size fanout multipliers for every level. Summary: There is an existing field Options.max_bytes_for_level_multiplier that sets the multiplier for the size of each level in the database. This patch introduces the ability to set different multipliers for every level in the database. The size of a level is determined by using both max_bytes_for_level_multiplier as well as the per-level fanout. size of level[i] = size of level[i-1] * max_bytes_for_level_multiplier * fanout[i-1] The default value of fanout is 1, so that it is backward compatible. Test Plan: make check Reviewers: haobo, emayanke Reviewed By: emayanke CC: leveldb Differential Revision: https://reviews.facebook.net/D10863 --- db/db_bench.cc | 23 +++++++++++++++++++++++ db/version_set.cc | 3 ++- include/leveldb/options.h | 6 ++++++ util/ldb_cmd.cc | 10 ---------- util/ldb_cmd.h | 1 + util/options.cc | 5 +++++ util/string_util.cc | 23 +++++++++++++++++++++++ util/string_util.h | 12 ++++++++++++ 8 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 util/string_util.cc create mode 100644 util/string_util.h diff --git a/db/db_bench.cc b/db/db_bench.cc index 621198e89..8cfdfcb82 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -20,6 +20,7 @@ #include "util/mutexlock.h" #include "util/random.h" #include "util/stack_trace.h" +#include "util/string_util.h" #include "util/testutil.h" #include "hdfs/env_hdfs.h" @@ -202,6 +203,9 @@ static uint64_t FLAGS_max_bytes_for_level_base = 10 * 1048576; // A multiplier to compute max bytes for level-N static int FLAGS_max_bytes_for_level_multiplier = 10; +// A vector that specifies additional fanout per level +static std::vector FLAGS_max_bytes_for_level_multiplier_additional; + // Number of files in level-0 that will trigger put stop. static int FLAGS_level0_stop_writes_trigger = 12; @@ -1089,6 +1093,16 @@ unique_ptr GenerateKeyFromInt(int v, const char* suffix = "") options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base; options.max_bytes_for_level_multiplier = FLAGS_max_bytes_for_level_multiplier; + if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) { + if (FLAGS_max_bytes_for_level_multiplier_additional.size() != + (unsigned int)FLAGS_num_levels) { + fprintf(stderr, "Insufficient number of fanouts specified %d\n", + (int)FLAGS_max_bytes_for_level_multiplier_additional.size()); + exit(1); + } + options.max_bytes_for_level_multiplier_additional = + FLAGS_max_bytes_for_level_multiplier_additional; + } options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger; options.level0_file_num_compaction_trigger = FLAGS_level0_file_num_compaction_trigger; @@ -1718,6 +1732,7 @@ int main(int argc, char** argv) { long l; char junk; char hdfsname[2048]; + char str[512]; if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { @@ -1853,6 +1868,14 @@ int main(int argc, char** argv) { } else if (sscanf(argv[i],"--level0_stop_writes_trigger=%d%c", &n, &junk) == 1) { FLAGS_level0_stop_writes_trigger = n; + } else if (sscanf(argv[i], + "--max_bytes_for_level_multiplier_additional=%s%c", + str, &junk) == 1) { + std::vector fanout = leveldb::stringSplit(str, ','); + for (unsigned int j= 0; j < fanout.size(); j++) { + FLAGS_max_bytes_for_level_multiplier_additional.push_back( + std::stoi(fanout[j])); + } } else if (sscanf(argv[i],"--level0_slowdown_writes_trigger=%d%c", &n, &junk) == 1) { FLAGS_level0_slowdown_writes_trigger = n; diff --git a/db/version_set.cc b/db/version_set.cc index a621be967..381ca6b60 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1009,7 +1009,8 @@ void VersionSet::Init(int num_levels) { for (int i = 0; i < num_levels; i++) { if (i > 1) { max_file_size_[i] = max_file_size_[i-1] * target_file_size_multiplier; - level_max_bytes_[i] = level_max_bytes_[i-1] * max_bytes_multiplier; + level_max_bytes_[i] = level_max_bytes_[i-1] * max_bytes_multiplier * + options_->max_bytes_for_level_multiplier_additional[i-1]; } else { max_file_size_[i] = options_->target_file_size_base; level_max_bytes_[i] = options_->max_bytes_for_level_base; diff --git a/include/leveldb/options.h b/include/leveldb/options.h index e435e5865..f180c74eb 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -255,6 +255,12 @@ struct Options { // by default 'max_bytes_for_level_base' is 10. int max_bytes_for_level_multiplier; + // Different max-size multipliers for different levels. + // These are multiplied by max_bytes_for_level_multiplier to arrive + // at the max-size of each level. + // Default: 1 + std::vector max_bytes_for_level_multiplier_additional; + // Maximum number of bytes in all compacted files. We avoid expanding // the lower level file set of a compaction if it would make the // total compaction cover more than diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index b4e3beec5..46dc7edc2 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -19,16 +19,6 @@ namespace leveldb { using namespace std; -vector stringSplit(string arg, char delim) { - vector splits; - stringstream ss(arg); - string item; - while(getline(ss, item, delim)) { - splits.push_back(item); - } - return splits; -} - const string LDBCommand::ARG_DB = "db"; const string LDBCommand::ARG_HEX = "hex"; const string LDBCommand::ARG_KEY_HEX = "key_hex"; diff --git a/util/ldb_cmd.h b/util/ldb_cmd.h index 2899ce4ce..1fc51c4e3 100644 --- a/util/ldb_cmd.h +++ b/util/ldb_cmd.h @@ -19,6 +19,7 @@ #include "leveldb/slice.h" #include "util/logging.h" #include "util/ldb_cmd_execute_result.h" +#include "util/string_util.h" #include "utilities/utility_db.h" using std::string; diff --git a/util/options.cc b/util/options.cc index 689aa3df2..6ac122218 100644 --- a/util/options.cc +++ b/util/options.cc @@ -40,6 +40,7 @@ Options::Options() target_file_size_multiplier(1), max_bytes_for_level_base(10 * 1048576), max_bytes_for_level_multiplier(10), + max_bytes_for_level_multiplier_additional(num_levels, 1), expanded_compaction_factor(25), source_compaction_factor(1), max_grandparent_overlap_factor(10), @@ -142,6 +143,10 @@ Options::Dump(Logger* log) const max_bytes_for_level_base); Log(log," Options.max_bytes_for_level_multiplier: %d", max_bytes_for_level_multiplier); + for (int i = 0; i < num_levels; i++) { + Log(log,"Options.max_bytes_for_level_multiplier_addtl[%d]: %d", + i, max_bytes_for_level_multiplier_additional[i]); + } Log(log," Options.expanded_compaction_factor: %d", expanded_compaction_factor); Log(log," Options.source_compaction_factor: %d", diff --git a/util/string_util.cc b/util/string_util.cc new file mode 100644 index 000000000..b7529f636 --- /dev/null +++ b/util/string_util.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2013 Facebook. + +#include +#include +#include + +namespace leveldb { + +using namespace std; +using std::string; +using std::vector; +using std::stringstream; + +vector stringSplit(string arg, char delim) { + vector splits; + stringstream ss(arg); + string item; + while(getline(ss, item, delim)) { + splits.push_back(item); + } + return splits; +} +} diff --git a/util/string_util.h b/util/string_util.h new file mode 100644 index 000000000..8c70d3e9b --- /dev/null +++ b/util/string_util.h @@ -0,0 +1,12 @@ +// Copyright (c) 2013 Facebook. + +#ifndef STORAGE_LEVELDB_UTIL_STRINGUTIL_H_ +#define STORAGE_LEVELDB_UTIL_STRINGUTIL_H_ + +namespace leveldb { + +extern std::vector stringSplit(std::string arg, char delim); + +} + +#endif // STORAGE_LEVELDB_UTIL_STRINGUTIL_H_