Add Iterator test against expected state to stress test (#10538)

Summary:
As mentioned in https://github.com/facebook/rocksdb/pull/5506#issuecomment-506021913,
`db_stress` does not have much verification for iterator correctness.
It has a `TestIterate()` function, but that is mainly for comparing results
between two iterators, one with `total_order_seek` and the other optionally
sets auto_prefix, upper/lower bounds. Commit 49a0581ad2462e31aa3f768afa769e0d33390f33
added a new `TestIterateAgainstExpected()` function that compares iterator against
expected state. It locks a range of keys, creates an iterator, does
a random sequence of `Next/Prev` and compares against expected state.
This PR is based on that commit, the main changes include some logs
(for easier debugging if a test fails), a forward and backward scan to
cover the entire locked key range, and a flag for optionally turning on
this version of Iterator testing.

Added constraint that the checks against expected state in
`TestIterateAgainstExpected()` and in `TestGet()` are only turned on
when `--skip_verifydb` flag is not set.
Remove the change log introduced in https://github.com/facebook/rocksdb/issues/10553.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10538

Test Plan:
Run `db_stress` with `--verify_iterator_with_expected_state_one_in=1`,
and a large `--iterpercent` and `--num_iterations`. Checked `op_logs`
manually to ensure expected coverage. Tweaked part of the code in
https://github.com/facebook/rocksdb/issues/10449 and stress test was able to catch it.
- internally run various flavor of crash test

Reviewed By: ajkr

Differential Revision: D38847269

Pulled By: cbi42

fbshipit-source-id: 8b4402a9bba9f6cfa08051943cd672579d489599
main
Changyu Bi 2 years ago committed by Facebook GitHub Bot
parent 79ed4be80f
commit d140fbfd7d
  1. 2
      HISTORY.md
  2. 2
      db_stress_tool/db_stress_common.h
  3. 10
      db_stress_tool/db_stress_gflags.cc
  4. 19
      db_stress_tool/db_stress_test_base.cc
  5. 8
      db_stress_tool/db_stress_test_base.h
  6. 224
      db_stress_tool/no_batched_ops_stress.cc
  7. 1
      tools/db_crashtest.py

@ -1,7 +1,5 @@
# Rocksdb Change Log
## Unreleased
### Behavior Change
* Updated `TestGet()` in `no_batched_op_stress` (default stress test) to check the result of Get() operations against expected state.
### Public API changes
* Add `rocksdb_column_family_handle_get_id`, `rocksdb_column_family_handle_get_name` to get name, id of column family in C API

@ -312,6 +312,8 @@ DECLARE_bool(allow_data_in_errors);
DECLARE_bool(enable_tiered_storage); // set last_level_temperature
DECLARE_int64(preclude_last_level_data_seconds);
DECLARE_int32(verify_iterator_with_expected_state_one_in);
constexpr long KB = 1024;
constexpr int kRandomValueMaxFactor = 3;
constexpr int kValueMaxLen = 100;

@ -922,7 +922,9 @@ DEFINE_bool(sync_fault_injection, false,
DEFINE_bool(best_efforts_recovery, false,
"If true, use best efforts recovery.");
DEFINE_bool(skip_verifydb, false, "If true, skip VerifyDb() calls.");
DEFINE_bool(skip_verifydb, false,
"If true, skip VerifyDb() calls and Get()/Iterator verifications"
"against expected state.");
DEFINE_bool(enable_compaction_filter, false,
"If true, configures a compaction filter that returns a kRemove "
@ -1027,4 +1029,10 @@ DEFINE_bool(allow_data_in_errors,
ROCKSDB_NAMESPACE::Options().allow_data_in_errors,
"If true, allow logging data, e.g. key, value in LOG files.");
DEFINE_int32(verify_iterator_with_expected_state_one_in, 0,
"If non-zero, when TestIterate() is to be called, there is a "
"1/verify_iterator_with_expected_state_one_in "
"chance that the iterator is verified against the expected state "
"file, instead of comparing keys between two iterators.");
#endif // GFLAGS

@ -922,12 +922,19 @@ void StressTest::OperateDb(ThreadState* thread) {
} else if (prob_op < iterate_bound) {
assert(delrange_bound <= prob_op);
// OPERATION iterate
int num_seeks = static_cast<int>(
std::min(static_cast<uint64_t>(thread->rand.Uniform(4)),
FLAGS_ops_per_thread - i - 1));
rand_keys = GenerateNKeys(thread, num_seeks, i);
i += num_seeks - 1;
TestIterate(thread, read_opts, rand_column_families, rand_keys);
if (!FLAGS_skip_verifydb &&
thread->rand.OneInOpt(
FLAGS_verify_iterator_with_expected_state_one_in)) {
TestIterateAgainstExpected(thread, read_opts, rand_column_families,
rand_keys, lock);
} else {
int num_seeks = static_cast<int>(
std::min(static_cast<uint64_t>(thread->rand.Uniform(4)),
FLAGS_ops_per_thread - i - 1));
rand_keys = GenerateNKeys(thread, num_seeks, i);
i += num_seeks - 1;
TestIterate(thread, read_opts, rand_column_families, rand_keys);
}
} else {
assert(iterate_bound <= prob_op);
TestCustomOperations(thread, rand_column_families);

@ -152,6 +152,14 @@ class StressTest {
const std::vector<int>& rand_column_families,
const std::vector<int64_t>& rand_keys);
virtual Status TestIterateAgainstExpected(
ThreadState* /* thread */, const ReadOptions& /* read_opts */,
const std::vector<int>& /* rand_column_families */,
const std::vector<int64_t>& /* rand_keys */,
std::unique_ptr<MutexLock>& /* lock */) {
return Status::NotSupported();
}
// Enum used by VerifyIterator() to identify the mode to validate.
enum LastIterateOp {
kLastOpSeek,

@ -361,7 +361,7 @@ class NonBatchedOpsStressTest : public StressTest {
// found case
thread->stats.AddGets(1, 1);
// we only have the latest expected state
if (!read_opts_copy.timestamp &&
if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp &&
thread->shared->Get(rand_column_families[0], rand_keys[0]) ==
SharedState::DELETION_SENTINEL) {
thread->shared->SetVerificationFailure();
@ -373,7 +373,7 @@ class NonBatchedOpsStressTest : public StressTest {
} else if (s.IsNotFound()) {
// not found case
thread->stats.AddGets(1, 0);
if (!read_opts_copy.timestamp) {
if (!FLAGS_skip_verifydb && !read_opts_copy.timestamp) {
auto expected =
thread->shared->Get(rand_column_families[0], rand_keys[0]);
if (expected != SharedState::DELETION_SENTINEL &&
@ -1000,6 +1000,226 @@ class NonBatchedOpsStressTest : public StressTest {
}
#endif // ROCKSDB_LITE
// Given a key K, this creates an iterator which scans the range
// [K, K + FLAGS_num_iterations) forward and backward.
// Then does a random sequence of Next/Prev operations.
Status TestIterateAgainstExpected(
ThreadState* thread, const ReadOptions& read_opts,
const std::vector<int>& rand_column_families,
const std::vector<int64_t>& rand_keys,
std::unique_ptr<MutexLock>& lock) override {
// Lock the whole range over which we might iterate to ensure it doesn't
// change under us.
std::vector<std::unique_ptr<MutexLock>> range_locks;
int64_t lb = rand_keys[0];
int rand_column_family = rand_column_families[0];
auto shared = thread->shared;
int64_t max_key = shared->GetMaxKey();
if (static_cast<uint64_t>(lb) > max_key - FLAGS_num_iterations) {
lock.reset();
lb = thread->rand.Next() % (max_key - FLAGS_num_iterations + 1);
range_locks.emplace_back(
new MutexLock(shared->GetMutexForKey(rand_column_family, lb)));
} else {
range_locks.emplace_back(std::move(lock));
}
for (int j = 1; j < static_cast<int>(FLAGS_num_iterations); ++j) {
if (((lb + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
range_locks.emplace_back(
new MutexLock(shared->GetMutexForKey(rand_column_family, lb + j)));
}
}
int64_t ub = lb + FLAGS_num_iterations;
// Locks acquired for [lb, ub)
ReadOptions readoptscopy(read_opts);
readoptscopy.total_order_seek = true;
auto cfh = column_families_[rand_column_family];
std::string op_logs;
std::unique_ptr<Iterator> iter(db_->NewIterator(readoptscopy, cfh));
auto check_no_key_in_range = [&](int64_t start, int64_t end) {
for (auto j = std::max(start, lb); j < std::min(end, ub); ++j) {
auto expected_value =
shared->Get(rand_column_family, static_cast<int64_t>(j));
if (expected_value != shared->DELETION_SENTINEL &&
expected_value != shared->UNKNOWN_SENTINEL) {
// Fail fast to preserve the DB state.
thread->shared->SetVerificationFailure();
if (iter->Valid()) {
fprintf(stderr,
"Expected state has key %s, iterator is at key %s\n",
Slice(Key(j)).ToString(true).c_str(),
iter->key().ToString(true).c_str());
} else {
fprintf(stderr, "Expected state has key %s, iterator is invalid\n",
Slice(Key(j)).ToString(true).c_str());
}
fprintf(stderr, "Column family: %s, op_logs: %s\n",
cfh->GetName().c_str(), op_logs.c_str());
thread->stats.AddErrors(1);
return false;
}
}
return true;
};
// Forward and backward scan to ensure we cover the entire range [lb, ub).
// The random sequence Next and Prev test below tends to be very short
// ranged.
int64_t last_key = lb - 1;
std::string key_str = Key(lb);
iter->Seek(Slice(key_str));
op_logs += "S " + Slice(key_str).ToString(true) + " ";
uint64_t curr;
while (true) {
if (!iter->Valid()) {
if (!iter->status().ok()) {
thread->shared->SetVerificationFailure();
fprintf(stderr, "TestIterate against expected state error: %s\n",
iter->status().ToString().c_str());
fprintf(stderr, "Column family: %s, op_logs: %s\n",
cfh->GetName().c_str(), op_logs.c_str());
thread->stats.AddErrors(1);
return iter->status();
}
if (!check_no_key_in_range(last_key + 1, static_cast<int64_t>(ub))) {
// error reported in check_no_key_in_range()
return Status::OK();
}
break;
}
// iter is valid, the range (last_key, current key) was skipped
GetIntVal(iter->key().ToString(), &curr);
if (!check_no_key_in_range(last_key + 1, static_cast<int64_t>(curr))) {
return Status::OK();
}
last_key = static_cast<int64_t>(curr);
if (last_key >= ub - 1) {
break;
}
iter->Next();
op_logs += "N";
}
// backward scan
key_str = Key(ub - 1);
iter->SeekForPrev(Slice(key_str));
op_logs += " SFP " + Slice(key_str).ToString(true) + " ";
last_key = ub;
while (true) {
if (!iter->Valid()) {
if (!iter->status().ok()) {
thread->shared->SetVerificationFailure();
fprintf(stderr, "TestIterate against expected state error: %s\n",
iter->status().ToString().c_str());
fprintf(stderr, "Column family: %s, op_logs: %s\n",
cfh->GetName().c_str(), op_logs.c_str());
thread->stats.AddErrors(1);
return iter->status();
}
if (!check_no_key_in_range(lb, last_key)) {
return Status::OK();
}
break;
}
// the range (current key, last key) was skipped
GetIntVal(iter->key().ToString(), &curr);
if (!check_no_key_in_range(static_cast<int64_t>(curr + 1), last_key)) {
return Status::OK();
}
last_key = static_cast<int64_t>(curr);
if (last_key <= lb) {
break;
}
iter->Prev();
op_logs += "P";
}
// start from middle of [lb, ub) otherwise it is easy to iterate out of
// locked range
int64_t mid = lb + static_cast<int64_t>(FLAGS_num_iterations / 2);
key_str = Key(mid);
Slice key = key_str;
if (thread->rand.OneIn(2)) {
iter->Seek(key);
op_logs += " S " + key.ToString(true) + " ";
if (!iter->Valid() && iter->status().ok()) {
if (!check_no_key_in_range(mid, ub)) {
return Status::OK();
}
}
} else {
iter->SeekForPrev(key);
op_logs += " SFP " + key.ToString(true) + " ";
if (!iter->Valid() && iter->status().ok()) {
// iterator says nothing <= mid
if (!check_no_key_in_range(lb, mid + 1)) {
return Status::OK();
}
}
}
for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); i++) {
GetIntVal(iter->key().ToString(), &curr);
if (curr < static_cast<uint64_t>(lb)) {
iter->Next();
op_logs += "N";
} else if (curr >= static_cast<uint64_t>(ub)) {
iter->Prev();
op_logs += "P";
} else {
uint32_t expected_value =
shared->Get(rand_column_family, static_cast<int64_t>(curr));
if (expected_value == shared->DELETION_SENTINEL) {
// Fail fast to preserve the DB state.
thread->shared->SetVerificationFailure();
fprintf(stderr, "Iterator has key %s, but expected state does not.\n",
iter->key().ToString(true).c_str());
fprintf(stderr, "Column family: %s, op_logs: %s\n",
cfh->GetName().c_str(), op_logs.c_str());
thread->stats.AddErrors(1);
break;
}
if (thread->rand.OneIn(2)) {
iter->Next();
op_logs += "N";
if (!iter->Valid()) {
break;
}
uint64_t next;
GetIntVal(iter->key().ToString(), &next);
if (!check_no_key_in_range(static_cast<int64_t>(curr + 1),
static_cast<int64_t>(next))) {
return Status::OK();
}
} else {
iter->Prev();
op_logs += "P";
if (!iter->Valid()) {
break;
}
uint64_t prev;
GetIntVal(iter->key().ToString(), &prev);
if (!check_no_key_in_range(static_cast<int64_t>(prev + 1),
static_cast<int64_t>(curr))) {
return Status::OK();
}
}
}
}
if (!iter->status().ok()) {
thread->shared->SetVerificationFailure();
fprintf(stderr, "TestIterate against expected state error: %s\n",
iter->status().ToString().c_str());
fprintf(stderr, "Column family: %s, op_logs: %s\n",
cfh->GetName().c_str(), op_logs.c_str());
thread->stats.AddErrors(1);
return iter->status();
}
thread->stats.AddIterations(1);
return Status::OK();
}
bool VerifyOrSyncValue(int cf, int64_t key, const ReadOptions& /*opts*/,
SharedState* shared, const std::string& value_from_db,
const Status& s, bool strict = false) const {

@ -288,6 +288,7 @@ simple_default_params = {
"write_buffer_size": 32 * 1024 * 1024,
"level_compaction_dynamic_level_bytes": False,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
"verify_iterator_with_expected_state_one_in": 5 # this locks a range of keys
}
blackbox_simple_default_params = {

Loading…
Cancel
Save