From 18d2e4beb77abcacd5d89ee45be8c51b4851bd1f Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 1 Mar 2019 11:14:28 -0800 Subject: [PATCH] Run db_bench on database generated externally (#5017) Summary: Added an option, `-use_existing_keys`, which can be set to run benchmarks against an arbitrary existing database. Now users can benchmark against their actual database rather than synthetic data. Before the run begins, it loads all the keys into memory, then uses that set of keys rather than synthesizing new ones in `GenerateKeyFromInt`. This is mainly intended for small-scale DBs where the memory consumption is not a concern. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5017 Differential Revision: D14270303 Pulled By: riversand963 fbshipit-source-id: 6328df9dffb5e19170270dd00a69f4bbe424e5ed --- tools/db_bench_tool.cc | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 1b2e5c208..445e12134 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -523,6 +523,14 @@ DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing" " database. If you set this flag and also specify a benchmark that" " wants a fresh database, that benchmark will fail."); +DEFINE_bool(use_existing_keys, false, + "If true, uses existing keys in the DB, " + "rather than generating new ones. This involves some startup " + "latency to load all keys into memory. It is supported for the " + "same read/overwrite benchmarks as `-use_existing_db=true`, which " + "must also be set for this flag to be enabled. When this flag is " + "set, the value for `-num` will be ignored."); + DEFINE_bool(show_table_properties, false, "If true, then per-level table" " properties will be printed on every stats-interval when" @@ -700,6 +708,7 @@ DEFINE_string( "RocksDB options related command-line arguments, all other arguments " "that are related to RocksDB options will be ignored:\n" "\t--use_existing_db\n" + "\t--use_existing_keys\n" "\t--statistics\n" "\t--row_cache_size\n" "\t--row_cache_numshardbits\n" @@ -2051,6 +2060,7 @@ class Benchmark { int64_t merge_keys_; bool report_file_operations_; bool use_blob_db_; + std::vector keys_; class ErrorHandlerListener : public EventListener { public: @@ -2470,6 +2480,13 @@ class Benchmark { // | key 00000 | // ---------------------------- void GenerateKeyFromInt(uint64_t v, int64_t num_keys, Slice* key) { + if (!keys_.empty()) { + assert(FLAGS_use_existing_keys); + assert(keys_.size() == static_cast(num_keys)); + assert(v < static_cast(num_keys)); + *key = keys_[v]; + return; + } char* start = const_cast(key->data()); char* pos = start; if (keys_per_prefix_ > 0) { @@ -3654,6 +3671,19 @@ void VerifyDBFromDB(std::string& truth_db_name) { options.compaction_filter = new KeepFilter(); fprintf(stdout, "A noop compaction filter is used\n"); } + + if (FLAGS_use_existing_keys) { + // Only work on single database + assert(db_.db != nullptr); + ReadOptions read_opts; + read_opts.total_order_seek = true; + Iterator* iter = db_.db->NewIterator(read_opts); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + keys_.emplace_back(iter->key().ToString()); + } + delete iter; + FLAGS_num = keys_.size(); + } } void Open(Options* opts) { @@ -6113,6 +6143,13 @@ int db_bench_tool(int argc, char** argv) { } } #endif // ROCKSDB_LITE + if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) { + fprintf(stderr, + "`-use_existing_db` must be true for `-use_existing_keys` to be " + "settable\n"); + exit(1); + } + if (!FLAGS_hdfs.empty()) { FLAGS_env = new rocksdb::HdfsEnv(FLAGS_hdfs); }