From 28087acd796b2d14f35e4e49224a91ebea4c57d6 Mon Sep 17 00:00:00 2001 From: Amy Tai Date: Fri, 13 Apr 2018 22:10:00 -0700 Subject: [PATCH] =?UTF-8?q?Implemented=20Knuth=20shuffle=20to=20construct?= =?UTF-8?q?=20permutation=20for=20selecting=20no=5Fo=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: …verwrite_keys. Also changed each no_overwrite_key set to an unordered set, otherwise Knuth shuffle only gets you 2x time improvement, because insertion (and subsequent internal sorting) into an ordered set is the bottleneck. With this change, each iteration of permutation construction and prefix selection takes around 40 secs, as opposed to 360 secs previously. However, this still means that with the default 10 CF per blackbox test case, the test is going to time out given the default interval of 200 secs. Also, there is currently an assertion error affecting all blackbox tests in db_crashtest.py; this assertion error will be fixed in a future PR. Closes https://github.com/facebook/rocksdb/pull/3699 Differential Revision: D7624616 Pulled By: amytai fbshipit-source-id: ea64fbe83407ff96c1c0ecabbc6c830576939393 --- tools/db_stress.cc | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index c4d5418f1..186f0ae7b 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -791,18 +791,34 @@ class SharedState { // overwrite printf("Choosing random keys with no overwrite\n"); - Random rnd(seed_); - size_t num_no_overwrite_keys = (max_key_ * FLAGS_nooverwritepercent) / 100; + Random64 rnd(seed_); + // Start with the identity permutation. Subsequent iterations of + // for loop below will start with perm of previous for loop + int64_t *permutation = new int64_t[max_key_]; + for (int64_t i = 0; i < max_key_; i++) { + permutation[i] = i; + } + for (auto& cf_ids : no_overwrite_ids_) { - for (size_t i = 0; i < num_no_overwrite_keys; i++) { - size_t rand_key; - do { - rand_key = rnd.Next() % max_key_; - } while (cf_ids.find(rand_key) != cf_ids.end()); - cf_ids.insert(rand_key); + // Now do the Knuth shuffle + int64_t num_no_overwrite_keys = (max_key_ * FLAGS_nooverwritepercent) / 100; + // Only need to figure out first num_no_overwrite_keys of permutation + for (int64_t i = 0; i < num_no_overwrite_keys; i++) { + int64_t rand_index = i + rnd.Next() % (max_key_ - 1 - i); + // Swap i and rand_index; + int64_t temp = permutation[i]; + permutation[i] = permutation[rand_index]; + permutation[rand_index] = temp; + } + + // Now fill cf_ids with the first num_no_overwrite_keys of permutation + cf_ids.reserve(num_no_overwrite_keys); + for (int64_t i = 0; i < num_no_overwrite_keys; i++) { + cf_ids.insert(permutation[i]); } - assert(cf_ids.size() == num_no_overwrite_keys); + assert(cf_ids.size() == static_cast(num_no_overwrite_keys)); } + delete permutation; if (FLAGS_test_batches_snapshots) { fprintf(stdout, "No lock creation because test_batches_snapshots set\n"); @@ -979,7 +995,7 @@ class SharedState { std::atomic verification_failure_; // Keys that should not be overwritten - std::vector > no_overwrite_ids_; + std::vector > no_overwrite_ids_; std::vector> values_; // Has to make it owned by a smart ptr as port::Mutex is not copyable