From f786b4a5b4f1f162a7e7452b33e2e5cf0d755b9b Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Tue, 9 Jul 2019 12:57:02 -0700 Subject: [PATCH] Improve result print on atomic flush stress test failure (#5549) Summary: When atomic flush stress test fails, we print internal keys within the range with mismatched key/values for all column families. Test plan (on devserver) Manually hack the code to randomly insert wrong data. Run the test. ``` $make clean && COMPILE_WITH_TSAN=1 make -j32 db_stress $./db_stress -test_atomic_flush=true -ops_per_thread=10000 ``` Check that proper error messages are printed, as follows: ``` 2019/07/08-17:40:14 Starting verification Verification failed Latest Sequence Number: 190903 [default] 000000000000050B => 56290000525350515E5F5C5D5A5B5859 [3] 0000000000000533 => EE100000EAEBE8E9E6E7E4E5E2E3E0E1FEFFFCFDFAFBF8F9 Internal keys in CF 'default', [000000000000050B, 0000000000000533] (max 8) key 000000000000050B seq 139920 type 1 key 0000000000000533 seq 0 type 1 Internal keys in CF '3', [000000000000050B, 0000000000000533] (max 8) key 0000000000000533 seq 0 type 1 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/5549 Differential Revision: D16158709 Pulled By: riversand963 fbshipit-source-id: f07fa87763f87b3bd908da03c956709c6456bcab --- tools/db_stress.cc | 77 +++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 66a10d4f3..3f767a9e7 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -923,7 +923,8 @@ class SharedState { stress_test_(stress_test), verification_failure_(false), no_overwrite_ids_(FLAGS_column_families), - values_(nullptr) { + values_(nullptr), + printing_verification_results_(false) { // Pick random keys in each column family that will not experience // overwrite @@ -1204,6 +1205,16 @@ class SharedState { return expected_mmap_buffer_.get() != nullptr; } + bool PrintingVerificationResults() { + bool tmp = false; + return !printing_verification_results_.compare_exchange_strong( + tmp, true, std::memory_order_relaxed); + } + + void FinishPrintingVerificationResults() { + printing_verification_results_.store(false, std::memory_order_relaxed); + } + private: port::Mutex mu_; port::CondVar cv_; @@ -1231,6 +1242,7 @@ class SharedState { // and storing it in the container may require copying depending on the impl. std::vector > > key_locks_; std::unique_ptr expected_mmap_buffer_; + std::atomic printing_verification_results_; }; const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe; @@ -4235,6 +4247,7 @@ class AtomicFlushStressTest : public StressTest { } break; } else if (valid_cnt != iters.size()) { + shared->SetVerificationFailure(); for (size_t i = 0; i != num; ++i) { if (!iters[i]->Valid()) { if (statuses[i].ok()) { @@ -4250,13 +4263,19 @@ class AtomicFlushStressTest : public StressTest { column_families_[i]->GetName().c_str()); } } - shared->SetVerificationFailure(); + break; + } + if (shared->HasVerificationFailedYet()) { break; } // If the program reaches here, then all column families' iterators are // still valid. + if (shared->PrintingVerificationResults()) { + continue; + } Slice key; Slice value; + int num_mismatched_cfs = 0; for (size_t i = 0; i != num; ++i) { if (i == 0) { key = iters[i]->key(); @@ -4264,11 +4283,16 @@ class AtomicFlushStressTest : public StressTest { } else { int cmp = key.compare(iters[i]->key()); if (cmp != 0) { - fprintf(stderr, "Verification failed\n"); - fprintf(stderr, "[%s] %s => %s\n", - column_families_[0]->GetName().c_str(), - key.ToString(true /* hex */).c_str(), - value.ToString(true /* hex */).c_str()); + ++num_mismatched_cfs; + if (1 == num_mismatched_cfs) { + fprintf(stderr, "Verification failed\n"); + fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n", + db_->GetLatestSequenceNumber()); + fprintf(stderr, "[%s] %s => %s\n", + column_families_[0]->GetName().c_str(), + key.ToString(true /* hex */).c_str(), + value.ToString(true /* hex */).c_str()); + } fprintf(stderr, "[%s] %s => %s\n", column_families_[i]->GetName().c_str(), iters[i]->key().ToString(true /* hex */).c_str(), @@ -4283,29 +4307,38 @@ class AtomicFlushStressTest : public StressTest { begin_key = iters[i]->key(); end_key = key; } - // We should print both of CF 0 and i but GetAllKeyVersions() now - // only supports default CF. std::vector versions; const size_t kMaxNumIKeys = 8; - Status s = GetAllKeyVersions(db_, begin_key, end_key, kMaxNumIKeys, - &versions); - fprintf(stderr, - "Internal keys in default CF [%s, %s] (max %" ROCKSDB_PRIszt - ")\n", - begin_key.ToString(true /* hex */).c_str(), - end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys); - for (const KeyVersion& kv : versions) { - fprintf(stderr, " key %s seq %" PRIu64 " type %d\n", - Slice(kv.user_key).ToString(true).c_str(), kv.sequence, - kv.type); + const auto print_key_versions = [&](ColumnFamilyHandle* cfh) { + Status s = GetAllKeyVersions(db_, cfh, begin_key, end_key, + kMaxNumIKeys, &versions); + if (!s.ok()) { + fprintf(stderr, "%s\n", s.ToString().c_str()); + return; + } + assert(nullptr != cfh); + fprintf(stderr, + "Internal keys in CF '%s', [%s, %s] (max %" ROCKSDB_PRIszt + ")\n", + cfh->GetName().c_str(), + begin_key.ToString(true /* hex */).c_str(), + end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys); + for (const KeyVersion& kv : versions) { + fprintf(stderr, " key %s seq %" PRIu64 " type %d\n", + Slice(kv.user_key).ToString(true).c_str(), kv.sequence, + kv.type); + } + }; + if (1 == num_mismatched_cfs) { + print_key_versions(column_families_[0]); } + print_key_versions(column_families_[i]); #endif // ROCKSDB_LITE - fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n", - db_->GetLatestSequenceNumber()); shared->SetVerificationFailure(); } } } + shared->FinishPrintingVerificationResults(); for (auto& iter : iters) { iter->Next(); }