From d140fbfd7dcdb52a66e94b0cf6a14516e409e1fb Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 24 Aug 2022 14:59:50 -0700 Subject: [PATCH] Add Iterator test against expected state to stress test (#10538) Summary: As mentioned in https://github.com/facebook/rocksdb/pull/5506#issuecomment-506021913, `db_stress` does not have much verification for iterator correctness. It has a `TestIterate()` function, but that is mainly for comparing results between two iterators, one with `total_order_seek` and the other optionally sets auto_prefix, upper/lower bounds. Commit 49a0581ad2462e31aa3f768afa769e0d33390f33 added a new `TestIterateAgainstExpected()` function that compares iterator against expected state. It locks a range of keys, creates an iterator, does a random sequence of `Next/Prev` and compares against expected state. This PR is based on that commit, the main changes include some logs (for easier debugging if a test fails), a forward and backward scan to cover the entire locked key range, and a flag for optionally turning on this version of Iterator testing. Added constraint that the checks against expected state in `TestIterateAgainstExpected()` and in `TestGet()` are only turned on when `--skip_verifydb` flag is not set. Remove the change log introduced in https://github.com/facebook/rocksdb/issues/10553. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10538 Test Plan: Run `db_stress` with `--verify_iterator_with_expected_state_one_in=1`, and a large `--iterpercent` and `--num_iterations`. Checked `op_logs` manually to ensure expected coverage. Tweaked part of the code in https://github.com/facebook/rocksdb/issues/10449 and stress test was able to catch it. - internally run various flavor of crash test Reviewed By: ajkr Differential Revision: D38847269 Pulled By: cbi42 fbshipit-source-id: 8b4402a9bba9f6cfa08051943cd672579d489599 --- HISTORY.md | 2 - db_stress_tool/db_stress_common.h | 2 + db_stress_tool/db_stress_gflags.cc | 10 +- db_stress_tool/db_stress_test_base.cc | 19 +- db_stress_tool/db_stress_test_base.h | 8 + db_stress_tool/no_batched_ops_stress.cc | 224 +++++++++++++++++++++++- tools/db_crashtest.py | 1 + 7 files changed, 255 insertions(+), 11 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 20e3c11ca..80ffc7b23 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,7 +1,5 @@ # Rocksdb Change Log ## Unreleased -### Behavior Change -* Updated `TestGet()` in `no_batched_op_stress` (default stress test) to check the result of Get() operations against expected state. ### Public API changes * Add `rocksdb_column_family_handle_get_id`, `rocksdb_column_family_handle_get_name` to get name, id of column family in C API diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index c3316ee0f..a2e49429c 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -312,6 +312,8 @@ DECLARE_bool(allow_data_in_errors); DECLARE_bool(enable_tiered_storage); // set last_level_temperature DECLARE_int64(preclude_last_level_data_seconds); +DECLARE_int32(verify_iterator_with_expected_state_one_in); + constexpr long KB = 1024; constexpr int kRandomValueMaxFactor = 3; constexpr int kValueMaxLen = 100; diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 90bc9282e..9af506fb9 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -922,7 +922,9 @@ DEFINE_bool(sync_fault_injection, false, DEFINE_bool(best_efforts_recovery, false, "If true, use best efforts recovery."); -DEFINE_bool(skip_verifydb, false, "If true, skip VerifyDb() calls."); +DEFINE_bool(skip_verifydb, false, + "If true, skip VerifyDb() calls and Get()/Iterator verifications" + "against expected state."); DEFINE_bool(enable_compaction_filter, false, "If true, configures a compaction filter that returns a kRemove " @@ -1027,4 +1029,10 @@ DEFINE_bool(allow_data_in_errors, ROCKSDB_NAMESPACE::Options().allow_data_in_errors, "If true, allow logging data, e.g. key, value in LOG files."); +DEFINE_int32(verify_iterator_with_expected_state_one_in, 0, + "If non-zero, when TestIterate() is to be called, there is a " + "1/verify_iterator_with_expected_state_one_in " + "chance that the iterator is verified against the expected state " + "file, instead of comparing keys between two iterators."); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 1f6b3794a..05d688400 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -922,12 +922,19 @@ void StressTest::OperateDb(ThreadState* thread) { } else if (prob_op < iterate_bound) { assert(delrange_bound <= prob_op); // OPERATION iterate - int num_seeks = static_cast( - std::min(static_cast(thread->rand.Uniform(4)), - FLAGS_ops_per_thread - i - 1)); - rand_keys = GenerateNKeys(thread, num_seeks, i); - i += num_seeks - 1; - TestIterate(thread, read_opts, rand_column_families, rand_keys); + if (!FLAGS_skip_verifydb && + thread->rand.OneInOpt( + FLAGS_verify_iterator_with_expected_state_one_in)) { + TestIterateAgainstExpected(thread, read_opts, rand_column_families, + rand_keys, lock); + } else { + int num_seeks = static_cast( + std::min(static_cast(thread->rand.Uniform(4)), + FLAGS_ops_per_thread - i - 1)); + rand_keys = GenerateNKeys(thread, num_seeks, i); + i += num_seeks - 1; + TestIterate(thread, read_opts, rand_column_families, rand_keys); + } } else { assert(iterate_bound <= prob_op); TestCustomOperations(thread, rand_column_families); diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index a7c7c68ec..3a4b8aa57 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -152,6 +152,14 @@ class StressTest { const std::vector& rand_column_families, const std::vector& rand_keys); + virtual Status TestIterateAgainstExpected( + ThreadState* /* thread */, const ReadOptions& /* read_opts */, + const std::vector& /* rand_column_families */, + const std::vector& /* rand_keys */, + std::unique_ptr& /* lock */) { + return Status::NotSupported(); + } + // Enum used by VerifyIterator() to identify the mode to validate. enum LastIterateOp { kLastOpSeek, diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index de4b69642..15bd341f8 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -361,7 +361,7 @@ class NonBatchedOpsStressTest : public StressTest { // found case thread->stats.AddGets(1, 1); // we only have the latest expected state - if (!read_opts_copy.timestamp && + if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp && thread->shared->Get(rand_column_families[0], rand_keys[0]) == SharedState::DELETION_SENTINEL) { thread->shared->SetVerificationFailure(); @@ -373,7 +373,7 @@ class NonBatchedOpsStressTest : public StressTest { } else if (s.IsNotFound()) { // not found case thread->stats.AddGets(1, 0); - if (!read_opts_copy.timestamp) { + if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp) { auto expected = thread->shared->Get(rand_column_families[0], rand_keys[0]); if (expected != SharedState::DELETION_SENTINEL && @@ -1000,6 +1000,226 @@ class NonBatchedOpsStressTest : public StressTest { } #endif // ROCKSDB_LITE + // Given a key K, this creates an iterator which scans the range + // [K, K + FLAGS_num_iterations) forward and backward. + // Then does a random sequence of Next/Prev operations. + Status TestIterateAgainstExpected( + ThreadState* thread, const ReadOptions& read_opts, + const std::vector& rand_column_families, + const std::vector& rand_keys, + std::unique_ptr& lock) override { + // Lock the whole range over which we might iterate to ensure it doesn't + // change under us. + std::vector> range_locks; + int64_t lb = rand_keys[0]; + int rand_column_family = rand_column_families[0]; + auto shared = thread->shared; + int64_t max_key = shared->GetMaxKey(); + if (static_cast(lb) > max_key - FLAGS_num_iterations) { + lock.reset(); + lb = thread->rand.Next() % (max_key - FLAGS_num_iterations + 1); + range_locks.emplace_back( + new MutexLock(shared->GetMutexForKey(rand_column_family, lb))); + } else { + range_locks.emplace_back(std::move(lock)); + } + for (int j = 1; j < static_cast(FLAGS_num_iterations); ++j) { + if (((lb + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { + range_locks.emplace_back( + new MutexLock(shared->GetMutexForKey(rand_column_family, lb + j))); + } + } + int64_t ub = lb + FLAGS_num_iterations; + // Locks acquired for [lb, ub) + ReadOptions readoptscopy(read_opts); + readoptscopy.total_order_seek = true; + auto cfh = column_families_[rand_column_family]; + std::string op_logs; + std::unique_ptr iter(db_->NewIterator(readoptscopy, cfh)); + + auto check_no_key_in_range = [&](int64_t start, int64_t end) { + for (auto j = std::max(start, lb); j < std::min(end, ub); ++j) { + auto expected_value = + shared->Get(rand_column_family, static_cast(j)); + if (expected_value != shared->DELETION_SENTINEL && + expected_value != shared->UNKNOWN_SENTINEL) { + // Fail fast to preserve the DB state. + thread->shared->SetVerificationFailure(); + if (iter->Valid()) { + fprintf(stderr, + "Expected state has key %s, iterator is at key %s\n", + Slice(Key(j)).ToString(true).c_str(), + iter->key().ToString(true).c_str()); + } else { + fprintf(stderr, "Expected state has key %s, iterator is invalid\n", + Slice(Key(j)).ToString(true).c_str()); + } + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + thread->stats.AddErrors(1); + return false; + } + } + return true; + }; + + // Forward and backward scan to ensure we cover the entire range [lb, ub). + // The random sequence Next and Prev test below tends to be very short + // ranged. + int64_t last_key = lb - 1; + std::string key_str = Key(lb); + iter->Seek(Slice(key_str)); + op_logs += "S " + Slice(key_str).ToString(true) + " "; + uint64_t curr; + while (true) { + if (!iter->Valid()) { + if (!iter->status().ok()) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, "TestIterate against expected state error: %s\n", + iter->status().ToString().c_str()); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + thread->stats.AddErrors(1); + return iter->status(); + } + if (!check_no_key_in_range(last_key + 1, static_cast(ub))) { + // error reported in check_no_key_in_range() + return Status::OK(); + } + break; + } + // iter is valid, the range (last_key, current key) was skipped + GetIntVal(iter->key().ToString(), &curr); + if (!check_no_key_in_range(last_key + 1, static_cast(curr))) { + return Status::OK(); + } + last_key = static_cast(curr); + if (last_key >= ub - 1) { + break; + } + iter->Next(); + op_logs += "N"; + } + + // backward scan + key_str = Key(ub - 1); + iter->SeekForPrev(Slice(key_str)); + op_logs += " SFP " + Slice(key_str).ToString(true) + " "; + last_key = ub; + while (true) { + if (!iter->Valid()) { + if (!iter->status().ok()) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, "TestIterate against expected state error: %s\n", + iter->status().ToString().c_str()); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + thread->stats.AddErrors(1); + return iter->status(); + } + if (!check_no_key_in_range(lb, last_key)) { + return Status::OK(); + } + break; + } + // the range (current key, last key) was skipped + GetIntVal(iter->key().ToString(), &curr); + if (!check_no_key_in_range(static_cast(curr + 1), last_key)) { + return Status::OK(); + } + last_key = static_cast(curr); + if (last_key <= lb) { + break; + } + iter->Prev(); + op_logs += "P"; + } + + // start from middle of [lb, ub) otherwise it is easy to iterate out of + // locked range + int64_t mid = lb + static_cast(FLAGS_num_iterations / 2); + key_str = Key(mid); + Slice key = key_str; + if (thread->rand.OneIn(2)) { + iter->Seek(key); + op_logs += " S " + key.ToString(true) + " "; + if (!iter->Valid() && iter->status().ok()) { + if (!check_no_key_in_range(mid, ub)) { + return Status::OK(); + } + } + } else { + iter->SeekForPrev(key); + op_logs += " SFP " + key.ToString(true) + " "; + if (!iter->Valid() && iter->status().ok()) { + // iterator says nothing <= mid + if (!check_no_key_in_range(lb, mid + 1)) { + return Status::OK(); + } + } + } + + for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); i++) { + GetIntVal(iter->key().ToString(), &curr); + if (curr < static_cast(lb)) { + iter->Next(); + op_logs += "N"; + } else if (curr >= static_cast(ub)) { + iter->Prev(); + op_logs += "P"; + } else { + uint32_t expected_value = + shared->Get(rand_column_family, static_cast(curr)); + if (expected_value == shared->DELETION_SENTINEL) { + // Fail fast to preserve the DB state. + thread->shared->SetVerificationFailure(); + fprintf(stderr, "Iterator has key %s, but expected state does not.\n", + iter->key().ToString(true).c_str()); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + thread->stats.AddErrors(1); + break; + } + if (thread->rand.OneIn(2)) { + iter->Next(); + op_logs += "N"; + if (!iter->Valid()) { + break; + } + uint64_t next; + GetIntVal(iter->key().ToString(), &next); + if (!check_no_key_in_range(static_cast(curr + 1), + static_cast(next))) { + return Status::OK(); + } + } else { + iter->Prev(); + op_logs += "P"; + if (!iter->Valid()) { + break; + } + uint64_t prev; + GetIntVal(iter->key().ToString(), &prev); + if (!check_no_key_in_range(static_cast(prev + 1), + static_cast(curr))) { + return Status::OK(); + } + } + } + } + if (!iter->status().ok()) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, "TestIterate against expected state error: %s\n", + iter->status().ToString().c_str()); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + thread->stats.AddErrors(1); + return iter->status(); + } + thread->stats.AddIterations(1); + return Status::OK(); + } + bool VerifyOrSyncValue(int cf, int64_t key, const ReadOptions& /*opts*/, SharedState* shared, const std::string& value_from_db, const Status& s, bool strict = false) const { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index e00ad2fed..b578111a4 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -288,6 +288,7 @@ simple_default_params = { "write_buffer_size": 32 * 1024 * 1024, "level_compaction_dynamic_level_bytes": False, "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), + "verify_iterator_with_expected_state_one_in": 5 # this locks a range of keys } blackbox_simple_default_params = {