From 1a130fa3c11ac9b93cd138ec28bb319ce37326b1 Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Fri, 25 Mar 2022 10:12:27 -0700 Subject: [PATCH] db_bench should use a good seed when --seed is not set or set to 0 (#9740) Summary: This is for https://github.com/facebook/rocksdb/issues/9737 I have wasted more than a few hours running db_bench benchmarks where --seed was not set and getting better than expected results because cache hit rates are great because multiple invocations of db_bench used the same value for --seed or did not set it, and then all used 0. The result is that all see the same sequence of keys. Others have done the same. The problem is worse in that it is easy to miss and the result is a benchmark with results that are misleading. A good way to avoid this is to set it to the equivalent of gettimeofday() when either --seed is not set or it is set to 0 (the default). With this change the actual seed is printed when it was 0 at process start: Set seed to 1647992570365606 because --seed was 0 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9740 Test Plan: Perf results: ./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 readrandom : 6.469 micros/op 154583 ops/sec; 17.1 MB/s (4000000 of 4000000 found) ./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=0 readrandom : 6.565 micros/op 152321 ops/sec; 16.9 MB/s (4000000 of 4000000 found) ./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=1 readrandom : 6.461 micros/op 154777 ops/sec; 17.1 MB/s (4000000 of 4000000 found) ./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=2 readrandom : 6.525 micros/op 153244 ops/sec; 17.0 MB/s (4000000 of 4000000 found) Reviewed By: jay-zhuang Differential Revision: D35145361 Pulled By: mdcallag fbshipit-source-id: 2b35b153ccec46b27d7c9405997523555fc51267 --- tools/db_bench_tool.cc | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index eee9c8ba2..fb2270adb 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -285,8 +285,10 @@ DEFINE_int64(deletes, -1, "Number of delete operations to do. " DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality"); -DEFINE_int64(seed, 0, "Seed base for random number generators. " - "When 0 it is deterministic."); +DEFINE_int64(seed, 0, + "Seed base for random number generators. " + "When 0 it is derived from the current time."); +static int64_t seed_base; DEFINE_int32(threads, 1, "Number of concurrent threads to run."); @@ -2425,8 +2427,7 @@ struct ThreadState { Stats stats; SharedState* shared; - explicit ThreadState(int index) - : tid(index), rand((FLAGS_seed ? FLAGS_seed : 1000) + index) {} + explicit ThreadState(int index) : tid(index), rand(seed_base + index) {} }; class Duration { @@ -4604,7 +4605,7 @@ class Benchmark { values_[i] = i; } RandomShuffle(values_.begin(), values_.end(), - static_cast(FLAGS_seed)); + static_cast(seed_base)); } } @@ -4717,7 +4718,7 @@ class Benchmark { // Default_random_engine provides slightly // improved throughput over mt19937. std::default_random_engine overwrite_gen{ - static_cast(FLAGS_seed)}; + static_cast(seed_base)}; std::bernoulli_distribution overwrite_decider(p); // Inserted key window is filled with the last N @@ -4727,7 +4728,7 @@ class Benchmark { // - random access is O(1) // - insertion/removal at beginning/end is also O(1). std::deque inserted_key_window; - Random64 reservoir_id_gen(FLAGS_seed); + Random64 reservoir_id_gen(seed_base); // --- Variables used in disposable/persistent keys simulation: // The following variables are used when @@ -4764,7 +4765,7 @@ class Benchmark { ErrorExit(); } } - Random rnd_disposable_entry(static_cast(FLAGS_seed)); + Random rnd_disposable_entry(static_cast(seed_base)); std::string random_value; // Queue that stores scheduled timestamp of disposable entries deletes, // along with starting index of disposable entry keys to delete. @@ -8105,6 +8106,15 @@ int db_bench_tool(int argc, char** argv) { FLAGS_use_existing_db |= FLAGS_readonly; #endif // ROCKSDB_LITE + if (!FLAGS_seed) { + uint64_t now = FLAGS_env->GetSystemClock()->NowMicros(); + seed_base = static_cast(now); + fprintf(stdout, "Set seed to %" PRIu64 " because --seed was 0\n", + seed_base); + } else { + seed_base = FLAGS_seed; + } + if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) { fprintf(stderr, "`-use_existing_db` must be true for `-use_existing_keys` to be "