From ed75dddc35714bf44e1bc6ed3d80e2d19f3df023 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 1 Feb 2022 11:46:12 -0800 Subject: [PATCH] Optimize db_stress setup phase (#9475) Summary: It is too slow that our `db_crashtest.py` often kills `db_stress` before the setup phase completes. Profiled it and found a few ways to optimize. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9475 Test Plan: Measured setup phase time reduced 22% (36 -> 28 seconds) for first run, and 36% (38 -> 24 seconds) for non-first run on empty-ish DB. - first run benchmark command: `rm -rf /dev/shm/dbstress*/ && mkdir -p /dev/shm/dbstress_expected/ && ./db_stress -max_key=100000000 -destroy_db_initially=1 -expected_values_dir=/dev/shm/dbstress_expected/ -db=/dev/shm/dbstress/ --clear_column_family_one_in=0 --reopen=0 --nooverwritepercent=1` output before this PR: ``` 2022/01/31-11:14:05 Initializing db_stress ... 2022/01/31-11:14:41 Starting database operations ``` output after this PR: ``` ... 2022/01/31-11:12:23 Initializing db_stress ... 2022/01/31-11:12:51 Starting database operations ``` - non-first run benchmark command: `./db_stress -max_key=100000000 -destroy_db_initially=0 -expected_values_dir=/dev/shm/dbstress_expected/ -db=/dev/shm/dbstress/ --clear_column_family_one_in=0 --reopen=0 --nooverwritepercent=1` output before this PR: ``` 2022/01/31-11:20:45 Initializing db_stress ... 2022/01/31-11:21:23 Starting database operations ``` output after this PR: ``` 2022/01/31-11:22:02 Initializing db_stress ... 2022/01/31-11:22:26 Starting database operations ``` - ran minified crash test a while: `DEBUG_LEVEL=0 TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --simple --interval=10 --max_key=1000000 --write_buffer_size=1048576 --target_file_size_base=1048576 --max_bytes_for_level_base=4194304 --value_size_mult=33` Reviewed By: anand1976 Differential Revision: D33897793 Pulled By: ajkr fbshipit-source-id: 0d7b2c93e1e2a9f8a878e87632c2455406313087 --- db_stress_tool/db_stress_common.h | 38 ++++++++++++++----------- db_stress_tool/db_stress_shared_state.h | 19 ++++++------- db_stress_tool/expected_state.cc | 3 +- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 06e69977e..a3b6e80eb 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -407,17 +407,15 @@ inline bool GetNextPrefix(const ROCKSDB_NAMESPACE::Slice& src, std::string* v) { #pragma warning(pop) #endif -// convert long to a big-endian slice key -extern inline std::string GetStringFromInt(int64_t val) { - std::string little_endian_key; - std::string big_endian_key; - PutFixed64(&little_endian_key, val); - assert(little_endian_key.size() == sizeof(val)); - big_endian_key.resize(sizeof(val)); - for (size_t i = 0; i < sizeof(val); ++i) { - big_endian_key[i] = little_endian_key[sizeof(val) - 1 - i]; +// Append `val` to `*key` in fixed-width big-endian format +extern inline void AppendIntToString(uint64_t val, std::string* key) { + // PutFixed64 uses little endian + PutFixed64(key, val); + // Reverse to get big endian + char* int_data = &((*key)[key->size() - sizeof(uint64_t)]); + for (size_t i = 0; i < sizeof(uint64_t) / 2; ++i) { + std::swap(int_data[i], int_data[sizeof(uint64_t) - 1 - i]); } - return big_endian_key; } // A struct for maintaining the parameters for generating variable length keys @@ -443,13 +441,22 @@ extern inline std::string Key(int64_t val) { uint64_t window = key_gen_ctx.window; size_t levels = key_gen_ctx.weights.size(); std::string key; + // Over-reserve and for now do not bother `shrink_to_fit()` since the key + // strings are transient. + key.reserve(FLAGS_max_key_len * 8); + uint64_t window_idx = static_cast(val) / window; + uint64_t offset = static_cast(val) % window; for (size_t level = 0; level < levels; ++level) { uint64_t weight = key_gen_ctx.weights[level]; - uint64_t offset = static_cast(val) % window; - uint64_t mult = static_cast(val) / window; - uint64_t pfx = mult * weight + (offset >= weight ? weight - 1 : offset); - key.append(GetStringFromInt(pfx)); + uint64_t pfx; + if (level == 0) { + pfx = window_idx * weight; + } else { + pfx = 0; + } + pfx += offset >= weight ? weight - 1 : offset; + AppendIntToString(pfx, &key); if (offset < weight) { // Use the bottom 3 bits of offset as the number of trailing 'x's in the // key. If the next key is going to be of the next level, then skip the @@ -461,8 +468,7 @@ extern inline std::string Key(int64_t val) { } break; } - val = offset - weight; - window -= weight; + offset -= weight; } return key; diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index ddfb67261..e7eb4aade 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -158,10 +158,7 @@ class SharedState { key_locks_.resize(FLAGS_column_families); for (int i = 0; i < FLAGS_column_families; ++i) { - key_locks_[i].resize(num_locks); - for (auto& ptr : key_locks_[i]) { - ptr.reset(new port::Mutex); - } + key_locks_[i].reset(new port::Mutex[num_locks]); } #ifndef NDEBUG if (FLAGS_read_fault_one_in) { @@ -227,20 +224,20 @@ class SharedState { // Returns a lock covering `key` in `cf`. port::Mutex* GetMutexForKey(int cf, int64_t key) { - return key_locks_[cf][key >> log2_keys_per_lock_].get(); + return &key_locks_[cf][key >> log2_keys_per_lock_]; } // Acquires locks for all keys in `cf`. void LockColumnFamily(int cf) { - for (auto& mutex : key_locks_[cf]) { - mutex->Lock(); + for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) { + key_locks_[cf][i].Lock(); } } // Releases locks for all keys in `cf`. void UnlockColumnFamily(int cf) { - for (auto& mutex : key_locks_[cf]) { - mutex->Unlock(); + for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) { + key_locks_[cf][i].Unlock(); } } @@ -361,9 +358,9 @@ class SharedState { std::unordered_set no_overwrite_ids_; std::unique_ptr expected_state_manager_; - // Has to make it owned by a smart ptr as port::Mutex is not copyable + // Cannot store `port::Mutex` directly in vector since it is not copyable // and storing it in the container may require copying depending on the impl. - std::vector>> key_locks_; + std::vector> key_locks_; std::atomic printing_verification_results_; }; diff --git a/db_stress_tool/expected_state.cc b/db_stress_tool/expected_state.cc index c787ff757..487ad193f 100644 --- a/db_stress_tool/expected_state.cc +++ b/db_stress_tool/expected_state.cc @@ -78,7 +78,8 @@ bool ExpectedState::Exists(int cf, int64_t key) { void ExpectedState::Reset() { for (size_t i = 0; i < num_column_families_; ++i) { for (size_t j = 0; j < max_key_; ++j) { - Delete(static_cast(i), j, false /* pending */); + Value(static_cast(i), j) + .store(SharedState::DELETION_SENTINEL, std::memory_order_relaxed); } } }