Improve stress test for MultiOpsTxnsStressTest (#9829)

Summary:
Adds more coverage to `MultiOpsTxnsStressTest` with a focus on write-prepared transactions.

1. Add a hack to manually evict commit cache entries. We currently cannot assign small values to `wp_commit_cache_bits` because it requires a prepared transaction to commit within a certain range of sequence numbers, otherwise it will throw.
2. Add coverage for commit-time-write-batch. If write policy is write-prepared, we need to set `use_only_the_last_commit_time_batch_for_recovery` to true.
3. After each flush/compaction, verify data consistency. This is possible since data size can be small: default numbers of primary/secondary keys are just 1000.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9829

Test Plan:
```
TEST_TMPDIR=/dev/shm/rocksdb_crashtest_blackbox/ make blackbox_crash_test_with_multiops_wp_txn
```

Reviewed By: pdillinger

Differential Revision: D35806678

Pulled By: riversand963

fbshipit-source-id: d7fde7a29fda0fb481a61f553e0ca0c47da93616
main
Yanqin Jin 3 years ago committed by Facebook GitHub Bot
parent d9d456de49
commit 94e245a14d
  1. 2
      db_stress_tool/db_stress_test_base.cc
  2. 7
      db_stress_tool/db_stress_test_base.h
  3. 383
      db_stress_tool/multi_ops_txns_stress.cc
  4. 62
      db_stress_tool/multi_ops_txns_stress.h
  5. 5
      tools/db_crashtest.py
  6. 18
      utilities/transactions/write_prepared_txn_db.cc
  7. 1
      utilities/transactions/write_prepared_txn_db.h

@ -2613,6 +2613,7 @@ void StressTest::Open() {
options_.listeners.emplace_back(new DbStressListener( options_.listeners.emplace_back(new DbStressListener(
FLAGS_db, options_.db_paths, cf_descriptors, db_stress_listener_env)); FLAGS_db, options_.db_paths, cf_descriptors, db_stress_listener_env));
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE
RegisterAdditionalListeners();
options_.create_missing_column_families = true; options_.create_missing_column_families = true;
if (!FLAGS_use_txn) { if (!FLAGS_use_txn) {
#ifndef NDEBUG #ifndef NDEBUG
@ -2751,6 +2752,7 @@ void StressTest::Open() {
static_cast<size_t>(FLAGS_wp_snapshot_cache_bits); static_cast<size_t>(FLAGS_wp_snapshot_cache_bits);
txn_db_options.wp_commit_cache_bits = txn_db_options.wp_commit_cache_bits =
static_cast<size_t>(FLAGS_wp_commit_cache_bits); static_cast<size_t>(FLAGS_wp_commit_cache_bits);
PrepareTxnDbOptions(txn_db_options);
s = TransactionDB::Open(options_, txn_db_options, FLAGS_db, s = TransactionDB::Open(options_, txn_db_options, FLAGS_db,
cf_descriptors, &column_families_, &txn_db_); cf_descriptors, &column_families_, &txn_db_);
if (!s.ok()) { if (!s.ok()) {

@ -16,6 +16,7 @@ namespace ROCKSDB_NAMESPACE {
class SystemClock; class SystemClock;
class Transaction; class Transaction;
class TransactionDB; class TransactionDB;
struct TransactionDBOptions;
class StressTest { class StressTest {
public: public:
@ -224,6 +225,12 @@ class StressTest {
void CheckAndSetOptionsForUserTimestamp(); void CheckAndSetOptionsForUserTimestamp();
virtual void RegisterAdditionalListeners() {}
#ifndef ROCKSDB_LITE
virtual void PrepareTxnDbOptions(TransactionDBOptions& /*txn_db_opts*/) {}
#endif
std::shared_ptr<Cache> cache_; std::shared_ptr<Cache> cache_;
std::shared_ptr<Cache> compressed_cache_; std::shared_ptr<Cache> compressed_cache_;
std::shared_ptr<const FilterPolicy> filter_policy_; std::shared_ptr<const FilterPolicy> filter_policy_;

@ -15,6 +15,7 @@
#ifndef NDEBUG #ifndef NDEBUG
#include "utilities/fault_injection_fs.h" #include "utilities/fault_injection_fs.h"
#endif // NDEBUG #endif // NDEBUG
#include "utilities/transactions/write_prepared_txn_db.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -31,6 +32,21 @@ DEFINE_int32(delay_snapshot_read_one_in, 0,
"With a chance of 1/N, inject a random delay between taking " "With a chance of 1/N, inject a random delay between taking "
"snapshot and read."); "snapshot and read.");
DEFINE_int32(rollback_one_in, 0,
"If non-zero, rollback non-read-only transactions with a "
"probability of 1/N.");
DEFINE_int32(clear_wp_commit_cache_one_in, 0,
"If non-zero, evict all commit entries from commit cache with a "
"probability of 1/N. This options applies to write-prepared and "
"write-unprepared transactions.");
extern "C" bool rocksdb_write_prepared_TEST_ShouldClearCommitCache(void) {
static Random rand(static_cast<uint32_t>(db_stress_env->NowMicros()));
return FLAGS_clear_wp_commit_cache_one_in > 0 &&
rand.OneIn(FLAGS_clear_wp_commit_cache_one_in);
}
// MultiOpsTxnsStressTest can either operate on a database with pre-populated // MultiOpsTxnsStressTest can either operate on a database with pre-populated
// data (possibly from previous ones), or create a new db and preload it with // data (possibly from previous ones), or create a new db and preload it with
// data specified via `-lb_a`, `-ub_a`, `-lb_c`, `-ub_c`, etc. Among these, we // data specified via `-lb_a`, `-ub_a`, `-lb_c`, `-ub_c`, etc. Among these, we
@ -75,8 +91,9 @@ void MultiOpsTxnsStressTest::KeyGenerator::FinishInit() {
"Cannot allocate key in [%u, %u)\nStart with a new DB or try change " "Cannot allocate key in [%u, %u)\nStart with a new DB or try change "
"the number of threads for testing via -threads=<#threads>\n", "the number of threads for testing via -threads=<#threads>\n",
static_cast<unsigned int>(low_), static_cast<unsigned int>(high_)); static_cast<unsigned int>(low_), static_cast<unsigned int>(high_));
fflush(stdout);
fflush(stderr); fflush(stderr);
std::terminate(); assert(false);
} }
initialized_ = true; initialized_ = true;
} }
@ -131,33 +148,43 @@ void MultiOpsTxnsStressTest::KeyGenerator::UndoAllocation(uint32_t new_val) {
} }
std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey(uint32_t a) { std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey(uint32_t a) {
char buf[8]; std::string ret;
EncodeFixed32(buf, kPrimaryIndexId); PutFixed32(&ret, kPrimaryIndexId);
std::reverse(buf, buf + 4); PutFixed32(&ret, a);
EncodeFixed32(buf + 4, a);
std::reverse(buf + 4, buf + 8); char* const buf = &ret[0];
return std::string(buf, sizeof(buf)); std::reverse(buf, buf + sizeof(kPrimaryIndexId));
std::reverse(buf + sizeof(kPrimaryIndexId),
buf + sizeof(kPrimaryIndexId) + sizeof(a));
return ret;
} }
std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c) { std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c) {
char buf[8]; std::string ret;
EncodeFixed32(buf, kSecondaryIndexId); PutFixed32(&ret, kSecondaryIndexId);
std::reverse(buf, buf + 4); PutFixed32(&ret, c);
EncodeFixed32(buf + 4, c);
std::reverse(buf + 4, buf + 8); char* const buf = &ret[0];
return std::string(buf, sizeof(buf)); std::reverse(buf, buf + sizeof(kSecondaryIndexId));
std::reverse(buf + sizeof(kSecondaryIndexId),
buf + sizeof(kSecondaryIndexId) + sizeof(c));
return ret;
} }
std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c, std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c,
uint32_t a) { uint32_t a) {
char buf[12]; std::string ret;
EncodeFixed32(buf, kSecondaryIndexId); PutFixed32(&ret, kSecondaryIndexId);
std::reverse(buf, buf + 4); PutFixed32(&ret, c);
EncodeFixed32(buf + 4, c); PutFixed32(&ret, a);
EncodeFixed32(buf + 8, a);
std::reverse(buf + 4, buf + 8); char* const buf = &ret[0];
std::reverse(buf + 8, buf + 12); std::reverse(buf, buf + sizeof(kSecondaryIndexId));
return std::string(buf, sizeof(buf)); std::reverse(buf + sizeof(kSecondaryIndexId),
buf + sizeof(kSecondaryIndexId) + sizeof(c));
std::reverse(buf + sizeof(kSecondaryIndexId) + sizeof(c),
buf + sizeof(kSecondaryIndexId) + sizeof(c) + sizeof(a));
return ret;
} }
std::tuple<Status, uint32_t, uint32_t> std::tuple<Status, uint32_t, uint32_t>
@ -201,40 +228,26 @@ std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey() const {
} }
std::string MultiOpsTxnsStressTest::Record::EncodePrimaryIndexValue() const { std::string MultiOpsTxnsStressTest::Record::EncodePrimaryIndexValue() const {
char buf[8]; std::string ret;
EncodeFixed32(buf, b_); PutFixed32(&ret, b_);
EncodeFixed32(buf + 4, c_); PutFixed32(&ret, c_);
return std::string(buf, sizeof(buf)); return ret;
} }
std::pair<std::string, std::string> std::pair<std::string, std::string>
MultiOpsTxnsStressTest::Record::EncodeSecondaryIndexEntry() const { MultiOpsTxnsStressTest::Record::EncodeSecondaryIndexEntry() const {
std::string secondary_index_key; std::string secondary_index_key = EncodeSecondaryKey(c_, a_);
char buf[12];
EncodeFixed32(buf, kSecondaryIndexId);
std::reverse(buf, buf + 4);
EncodeFixed32(buf + 4, c_);
EncodeFixed32(buf + 8, a_);
std::reverse(buf + 4, buf + 8);
std::reverse(buf + 8, buf + 12);
secondary_index_key.assign(buf, sizeof(buf));
// Secondary index value is always 4-byte crc32 of the secondary key // Secondary index value is always 4-byte crc32 of the secondary key
std::string secondary_index_value; std::string secondary_index_value;
uint32_t crc = crc32c::Value(buf, sizeof(buf)); uint32_t crc =
crc32c::Value(secondary_index_key.data(), secondary_index_key.size());
PutFixed32(&secondary_index_value, crc); PutFixed32(&secondary_index_value, crc);
return std::make_pair(secondary_index_key, secondary_index_value); return std::make_pair(std::move(secondary_index_key), secondary_index_value);
} }
std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey() const { std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey() const {
char buf[12]; return EncodeSecondaryKey(c_, a_);
EncodeFixed32(buf, kSecondaryIndexId);
std::reverse(buf, buf + 4);
EncodeFixed32(buf + 4, c_);
EncodeFixed32(buf + 8, a_);
std::reverse(buf + 4, buf + 8);
std::reverse(buf + 8, buf + 12);
return std::string(buf, sizeof(buf));
} }
Status MultiOpsTxnsStressTest::Record::DecodePrimaryIndexEntry( Status MultiOpsTxnsStressTest::Record::DecodePrimaryIndexEntry(
@ -244,27 +257,22 @@ Status MultiOpsTxnsStressTest::Record::DecodePrimaryIndexEntry(
return Status::Corruption("Primary index key length is not 8"); return Status::Corruption("Primary index key length is not 8");
} }
const char* const index_id_buf = primary_index_key.data(); uint32_t index_id = 0;
uint32_t index_id =
static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[0])) << 24; [[maybe_unused]] bool res = GetFixed32(&primary_index_key, &index_id);
index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[1])) assert(res);
<< 16; index_id = EndianSwapValue(index_id);
index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[2]))
<< 8;
index_id +=
static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[3]));
primary_index_key.remove_prefix(sizeof(uint32_t));
if (index_id != kPrimaryIndexId) { if (index_id != kPrimaryIndexId) {
std::ostringstream oss; std::ostringstream oss;
oss << "Unexpected primary index id: " << index_id; oss << "Unexpected primary index id: " << index_id;
return Status::Corruption(oss.str()); return Status::Corruption(oss.str());
} }
const char* const buf = primary_index_key.data(); res = GetFixed32(&primary_index_key, &a_);
a_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[0])) << 24; assert(res);
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[1])) << 16; a_ = EndianSwapValue(a_);
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[2])) << 8; assert(primary_index_key.empty());
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[3]));
if (primary_index_value.size() != 8) { if (primary_index_value.size() != 8) {
return Status::Corruption("Primary index value length is not 8"); return Status::Corruption("Primary index value length is not 8");
@ -282,33 +290,28 @@ Status MultiOpsTxnsStressTest::Record::DecodeSecondaryIndexEntry(
uint32_t crc = uint32_t crc =
crc32c::Value(secondary_index_key.data(), secondary_index_key.size()); crc32c::Value(secondary_index_key.data(), secondary_index_key.size());
const char* const index_id_buf = secondary_index_key.data(); uint32_t index_id = 0;
uint32_t index_id =
static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[0])) << 24; [[maybe_unused]] bool res = GetFixed32(&secondary_index_key, &index_id);
index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[1])) assert(res);
<< 16; index_id = EndianSwapValue(index_id);
index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[2]))
<< 8;
index_id +=
static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[3]));
secondary_index_key.remove_prefix(sizeof(uint32_t));
if (index_id != kSecondaryIndexId) { if (index_id != kSecondaryIndexId) {
std::ostringstream oss; std::ostringstream oss;
oss << "Unexpected secondary index id: " << index_id; oss << "Unexpected secondary index id: " << index_id;
return Status::Corruption(oss.str()); return Status::Corruption(oss.str());
} }
const char* const buf = secondary_index_key.data();
assert(secondary_index_key.size() == 8); assert(secondary_index_key.size() == 8);
c_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[0])) << 24; res = GetFixed32(&secondary_index_key, &c_);
c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[1])) << 16; assert(res);
c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[2])) << 8; c_ = EndianSwapValue(c_);
c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[3]));
a_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[4])) << 24; assert(secondary_index_key.size() == 4);
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[5])) << 16; res = GetFixed32(&secondary_index_key, &a_);
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[6])) << 8; assert(res);
a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[7])); a_ = EndianSwapValue(a_);
assert(secondary_index_key.empty());
if (secondary_index_value.size() != 4) { if (secondary_index_value.size() != 4) {
return Status::Corruption("Secondary index value length is not 4"); return Status::Corruption("Secondary index value length is not 4");
@ -520,9 +523,35 @@ Status MultiOpsTxnsStressTest::TestCustomOperations(
// Should never reach here. // Should never reach here.
assert(false); assert(false);
} }
return s; return s;
} }
void MultiOpsTxnsStressTest::RegisterAdditionalListeners() {
options_.listeners.emplace_back(new MultiOpsTxnsStressListener(this));
}
#ifndef ROCKSDB_LITE
void MultiOpsTxnsStressTest::PrepareTxnDbOptions(
TransactionDBOptions& txn_db_opts) {
// MultiOpsTxnStressTest uses SingleDelete to delete secondary keys, thus we
// register this callback to let TxnDb know that when rolling back
// a transaction, use only SingleDelete to cancel prior Put from the same
// transaction if applicable.
txn_db_opts.rollback_deletion_type_callback =
[](TransactionDB* /*db*/, ColumnFamilyHandle* /*column_family*/,
const Slice& key) {
Slice ks = key;
uint32_t index_id = 0;
[[maybe_unused]] bool res = GetFixed32(&ks, &index_id);
assert(res);
index_id = EndianSwapValue(index_id);
assert(index_id <= Record::kSecondaryIndexId);
return index_id == Record::kSecondaryIndexId;
};
}
#endif // !ROCKSDB_LITE
Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread,
uint32_t old_a, uint32_t old_a,
uint32_t old_a_pos, uint32_t old_a_pos,
@ -561,8 +590,10 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread,
} }
if (s.IsNotFound()) { if (s.IsNotFound()) {
thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0); thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0);
} else if (s.IsBusy()) { } else if (s.IsBusy() || s.IsIncomplete()) {
// ignore. // ignore.
// Incomplete also means rollback by application. See the transaction
// implementations.
} else { } else {
thread->stats.AddErrors(1); thread->stats.AddErrors(1);
} }
@ -631,6 +662,16 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread,
return s; return s;
} }
if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
s = Status::Incomplete();
return s;
}
s = WriteToCommitTimeWriteBatch(*txn);
if (!s.ok()) {
return s;
}
s = txn->Commit(); s = txn->Commit();
auto& key_gen = key_gen_for_a_.at(thread->tid); auto& key_gen = key_gen_for_a_.at(thread->tid);
@ -677,11 +718,12 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread,
Record::kPrimaryIndexEntrySize + Record::kSecondaryIndexEntrySize); Record::kPrimaryIndexEntrySize + Record::kSecondaryIndexEntrySize);
return; return;
} else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() || } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() ||
s.IsMergeInProgress()) { s.IsMergeInProgress() || s.IsIncomplete()) {
// ww-conflict detected, or // ww-conflict detected, or
// lock cannot be acquired, or // lock cannot be acquired, or
// memtable history is not large enough for conflict checking, or // memtable history is not large enough for conflict checking, or
// Merge operation cannot be resolved. // Merge operation cannot be resolved, or
// application rollback.
// TODO (yanqin) add stats for other cases? // TODO (yanqin) add stats for other cases?
} else if (s.IsNotFound()) { } else if (s.IsNotFound()) {
// ignore. // ignore.
@ -727,8 +769,9 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread,
Record record; Record record;
s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
if (!s.ok()) { if (!s.ok()) {
fprintf(stderr, "Cannot decode secondary key: %s\n", fprintf(stderr, "Cannot decode secondary key (%s => %s): %s\n",
s.ToString().c_str()); it->key().ToString(true).c_str(),
it->value().ToString(true).c_str(), s.ToString().c_str());
assert(false); assert(false);
break; break;
} }
@ -749,21 +792,31 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread,
} else if (s.IsNotFound()) { } else if (s.IsNotFound()) {
// We can also fail verification here. // We can also fail verification here.
std::ostringstream oss; std::ostringstream oss;
oss << "pk should exist: " << Slice(pk).ToString(true); auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
assert(dbimpl);
oss << "snap " << read_opts.snapshot->GetSequenceNumber()
<< " (published " << dbimpl->GetLastPublishedSequence()
<< "), pk should exist: " << Slice(pk).ToString(true);
fprintf(stderr, "%s\n", oss.str().c_str()); fprintf(stderr, "%s\n", oss.str().c_str());
assert(false); assert(false);
break; break;
} }
if (!s.ok()) { if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str()); std::ostringstream oss;
auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
assert(dbimpl);
oss << "snap " << read_opts.snapshot->GetSequenceNumber()
<< " (published " << dbimpl->GetLastPublishedSequence() << "), "
<< s.ToString();
fprintf(stderr, "%s\n", oss.str().c_str());
assert(false); assert(false);
break; break;
} }
auto result = Record::DecodePrimaryIndexValue(value); auto result = Record::DecodePrimaryIndexValue(value);
s = std::get<0>(result); s = std::get<0>(result);
if (!s.ok()) { if (!s.ok()) {
fprintf(stderr, "Cannot decode primary index value: %s\n", fprintf(stderr, "Cannot decode primary index value %s: %s\n",
s.ToString().c_str()); Slice(value).ToString(true).c_str(), s.ToString().c_str());
assert(false); assert(false);
break; break;
} }
@ -771,8 +824,12 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread,
uint32_t c = std::get<2>(result); uint32_t c = std::get<2>(result);
if (c != old_c) { if (c != old_c) {
std::ostringstream oss; std::ostringstream oss;
oss << "c in primary index does not match secondary index: " << c auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
<< " != " << old_c; assert(dbimpl);
oss << "snap " << read_opts.snapshot->GetSequenceNumber()
<< " (published " << dbimpl->GetLastPublishedSequence()
<< "), pk/sk mismatch. pk: (a=" << record.a_value() << ", "
<< "c=" << c << "), sk: (c=" << old_c << ")";
s = Status::Corruption(); s = Status::Corruption();
fprintf(stderr, "%s\n", oss.str().c_str()); fprintf(stderr, "%s\n", oss.str().c_str());
assert(false); assert(false);
@ -811,6 +868,16 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread,
return s; return s;
} }
if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
s = Status::Incomplete();
return s;
}
s = WriteToCommitTimeWriteBatch(*txn);
if (!s.ok()) {
return s;
}
s = txn->Commit(); s = txn->Commit();
if (s.ok()) { if (s.ok()) {
@ -856,7 +923,7 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread,
} else if (s.IsInvalidArgument()) { } else if (s.IsInvalidArgument()) {
// ignored. // ignored.
} else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() || } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() ||
s.IsMergeInProgress()) { s.IsMergeInProgress() || s.IsIncomplete()) {
// ignored. // ignored.
} else { } else {
thread->stats.AddErrors(1); thread->stats.AddErrors(1);
@ -874,8 +941,8 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread,
auto result = Record::DecodePrimaryIndexValue(value); auto result = Record::DecodePrimaryIndexValue(value);
if (!std::get<0>(result).ok()) { if (!std::get<0>(result).ok()) {
s = std::get<0>(result); s = std::get<0>(result);
fprintf(stderr, "Cannot decode primary index value: %s\n", fprintf(stderr, "Cannot decode primary index value %s: %s\n",
s.ToString().c_str()); Slice(value).ToString(true).c_str(), s.ToString().c_str());
assert(false); assert(false);
return s; return s;
} }
@ -892,6 +959,17 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread,
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
s = Status::Incomplete();
return s;
}
s = WriteToCommitTimeWriteBatch(*txn);
if (!s.ok()) {
return s;
}
s = txn->Commit(); s = txn->Commit();
if (s.ok()) { if (s.ok()) {
delete txn; delete txn;
@ -1050,12 +1128,15 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
// First, iterate primary index. // First, iterate primary index.
size_t primary_index_entries_count = 0; size_t primary_index_entries_count = 0;
{ {
char buf[4]; std::string iter_ub_str;
EncodeFixed32(buf, Record::kPrimaryIndexId + 1); PutFixed32(&iter_ub_str, Record::kPrimaryIndexId + 1);
std::reverse(buf, buf + sizeof(buf)); std::reverse(iter_ub_str.begin(), iter_ub_str.end());
std::string iter_ub_str(buf, sizeof(buf));
Slice iter_ub = iter_ub_str; Slice iter_ub = iter_ub_str;
std::string start_key;
PutFixed32(&start_key, Record::kPrimaryIndexId);
std::reverse(start_key.begin(), start_key.end());
// This `ReadOptions` is for validation purposes. Ignore // This `ReadOptions` is for validation purposes. Ignore
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation. // `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
ReadOptions ropts; ReadOptions ropts;
@ -1064,7 +1145,7 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
ropts.iterate_upper_bound = &iter_ub; ropts.iterate_upper_bound = &iter_ub;
std::unique_ptr<Iterator> it(db_->NewIterator(ropts)); std::unique_ptr<Iterator> it(db_->NewIterator(ropts));
for (it->SeekToFirst(); it->Valid(); it->Next()) { for (it->Seek(start_key); it->Valid(); it->Next()) {
Record record; Record record;
Status s = record.DecodePrimaryIndexEntry(it->key(), it->value()); Status s = record.DecodePrimaryIndexEntry(it->key(), it->value());
if (!s.ok()) { if (!s.ok()) {
@ -1101,10 +1182,9 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
// Second, iterate secondary index. // Second, iterate secondary index.
size_t secondary_index_entries_count = 0; size_t secondary_index_entries_count = 0;
{ {
char buf[4]; std::string start_key;
EncodeFixed32(buf, Record::kSecondaryIndexId); PutFixed32(&start_key, Record::kSecondaryIndexId);
std::reverse(buf, buf + sizeof(buf)); std::reverse(start_key.begin(), start_key.end());
const std::string start_key(buf, sizeof(buf));
// This `ReadOptions` is for validation purposes. Ignore // This `ReadOptions` is for validation purposes. Ignore
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation. // `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
@ -1118,7 +1198,8 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
Record record; Record record;
Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
if (!s.ok()) { if (!s.ok()) {
oss << "Cannot decode secondary index entry"; oss << "Cannot decode secondary index entry "
<< it->key().ToString(true) << "=>" << it->value().ToString(true);
VerificationAbort(thread->shared, oss.str(), s); VerificationAbort(thread->shared, oss.str(), s);
assert(false); assert(false);
return; return;
@ -1132,7 +1213,7 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
s = db_->Get(ropts, pk, &value); s = db_->Get(ropts, pk, &value);
if (!s.ok()) { if (!s.ok()) {
oss << "Error searching pk " << Slice(pk).ToString(true) << ". " oss << "Error searching pk " << Slice(pk).ToString(true) << ". "
<< s.ToString(); << s.ToString() << ". sk " << it->key().ToString(true);
VerificationAbort(thread->shared, oss.str(), s); VerificationAbort(thread->shared, oss.str(), s);
assert(false); assert(false);
return; return;
@ -1148,8 +1229,10 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
} }
uint32_t c_in_primary = std::get<2>(result); uint32_t c_in_primary = std::get<2>(result);
if (c_in_primary != record.c_value()) { if (c_in_primary != record.c_value()) {
oss << "Pk/sk mismatch. pk: (c=" << c_in_primary oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>"
<< "), sk: (c=" << record.c_value() << ")"; << Slice(value).ToString(true) << " (a=" << record.a_value()
<< ", c=" << c_in_primary << "), sk: " << it->key().ToString(true)
<< " (c=" << record.c_value() << ")";
VerificationAbort(thread->shared, oss.str(), s); VerificationAbort(thread->shared, oss.str(), s);
assert(false); assert(false);
return; return;
@ -1167,6 +1250,75 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const {
} }
} }
void MultiOpsTxnsStressTest::VerifyPkSkFast(int job_id) {
const Snapshot* const snapshot = db_->GetSnapshot();
assert(snapshot);
ManagedSnapshot snapshot_guard(db_, snapshot);
std::ostringstream oss;
auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
assert(dbimpl);
oss << "Job " << job_id << ": [" << snapshot->GetSequenceNumber() << ","
<< dbimpl->GetLastPublishedSequence() << "] ";
std::string start_key;
PutFixed32(&start_key, Record::kSecondaryIndexId);
std::reverse(start_key.begin(), start_key.end());
// This `ReadOptions` is for validation purposes. Ignore
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
ReadOptions ropts;
ropts.snapshot = snapshot;
ropts.total_order_seek = true;
std::unique_ptr<Iterator> it(db_->NewIterator(ropts));
for (it->Seek(start_key); it->Valid(); it->Next()) {
Record record;
Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
if (!s.ok()) {
oss << "Cannot decode secondary index entry " << it->key().ToString(true)
<< "=>" << it->value().ToString(true);
fprintf(stderr, "%s\n", oss.str().c_str());
fflush(stderr);
assert(false);
}
// After decoding secondary index entry, we know a and c. Crc is verified
// in decoding phase.
//
// Form a primary key and search in the primary index.
std::string pk = Record::EncodePrimaryKey(record.a_value());
std::string value;
s = db_->Get(ropts, pk, &value);
if (!s.ok()) {
oss << "Error searching pk " << Slice(pk).ToString(true) << ". "
<< s.ToString() << ". sk " << it->key().ToString(true);
fprintf(stderr, "%s\n", oss.str().c_str());
fflush(stderr);
assert(false);
}
auto result = Record::DecodePrimaryIndexValue(value);
s = std::get<0>(result);
if (!s.ok()) {
oss << "Error decoding primary index value "
<< Slice(value).ToString(true) << ". " << s.ToString();
fprintf(stderr, "%s\n", oss.str().c_str());
fflush(stderr);
assert(false);
}
uint32_t c_in_primary = std::get<2>(result);
if (c_in_primary != record.c_value()) {
oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>"
<< Slice(value).ToString(true) << " (a=" << record.a_value()
<< ", c=" << c_in_primary << "), sk: " << it->key().ToString(true)
<< " (c=" << record.c_value() << ")";
fprintf(stderr, "%s\n", oss.str().c_str());
fflush(stderr);
assert(false);
}
}
}
std::pair<uint32_t, uint32_t> MultiOpsTxnsStressTest::ChooseExistingA( std::pair<uint32_t, uint32_t> MultiOpsTxnsStressTest::ChooseExistingA(
ThreadState* thread) { ThreadState* thread) {
uint32_t tid = thread->tid; uint32_t tid = thread->tid;
@ -1193,6 +1345,22 @@ uint32_t MultiOpsTxnsStressTest::GenerateNextC(ThreadState* thread) {
return key_gen->Allocate(); return key_gen->Allocate();
} }
#ifndef ROCKSDB_LITE
Status MultiOpsTxnsStressTest::WriteToCommitTimeWriteBatch(Transaction& txn) {
WriteBatch* ctwb = txn.GetCommitTimeWriteBatch();
assert(ctwb);
// Do not change the content in key_buf.
static constexpr char key_buf[sizeof(Record::kMetadataPrefix) + 4] = {
'\0', '\0', '\0', '\0', '\0', '\0', '\0', '\xff'};
uint64_t counter_val = counter_.Next();
char val_buf[sizeof(counter_val)];
EncodeFixed64(val_buf, counter_val);
return ctwb->Put(Slice(key_buf, sizeof(key_buf)),
Slice(val_buf, sizeof(val_buf)));
}
#endif // !ROCKSDB_LITE
std::string MultiOpsTxnsStressTest::KeySpaces::EncodeTo() const { std::string MultiOpsTxnsStressTest::KeySpaces::EncodeTo() const {
std::string result; std::string result;
PutFixed32(&result, lb_a); PutFixed32(&result, lb_a);
@ -1428,8 +1596,9 @@ void MultiOpsTxnsStressTest::ScanExistingDb(SharedState* shared, int threads) {
Record record; Record record;
Status s = record.DecodePrimaryIndexEntry(it->key(), it->value()); Status s = record.DecodePrimaryIndexEntry(it->key(), it->value());
if (!s.ok()) { if (!s.ok()) {
fprintf(stderr, "Cannot decode primary index entry: %s\n", fprintf(stderr, "Cannot decode primary index entry (%s => %s): %s\n",
s.ToString().c_str()); it->key().ToString(true).c_str(),
it->value().ToString(true).c_str(), s.ToString().c_str());
assert(false); assert(false);
} }
uint32_t a = record.a_value(); uint32_t a = record.a_value();

@ -111,6 +111,7 @@ class MultiOpsTxnsStressTest : public StressTest {
public: public:
class Record { class Record {
public: public:
static constexpr uint32_t kMetadataPrefix = 0;
static constexpr uint32_t kPrimaryIndexId = 1; static constexpr uint32_t kPrimaryIndexId = 1;
static constexpr uint32_t kSecondaryIndexId = 2; static constexpr uint32_t kSecondaryIndexId = 2;
@ -261,6 +262,12 @@ class MultiOpsTxnsStressTest : public StressTest {
ThreadState* thread, ThreadState* thread,
const std::vector<int>& rand_column_families) override; const std::vector<int>& rand_column_families) override;
void RegisterAdditionalListeners() override;
#ifndef ROCKSDB_LITE
void PrepareTxnDbOptions(TransactionDBOptions& txn_db_opts) override;
#endif // !ROCKSDB_LITE
Status PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t old_a, Status PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t old_a,
uint32_t old_a_pos, uint32_t new_a); uint32_t old_a_pos, uint32_t new_a);
@ -280,7 +287,17 @@ class MultiOpsTxnsStressTest : public StressTest {
VerifyDb(thread); VerifyDb(thread);
} }
void VerifyPkSkFast(int job_id);
protected: protected:
class Counter {
public:
uint64_t Next() { return value_.fetch_add(1); }
private:
std::atomic<uint64_t> value_ = Env::Default()->NowNanos();
};
using KeySet = std::set<uint32_t>; using KeySet = std::set<uint32_t>;
class KeyGenerator { class KeyGenerator {
public: public:
@ -330,9 +347,21 @@ class MultiOpsTxnsStressTest : public StressTest {
uint32_t GenerateNextC(ThreadState* thread); uint32_t GenerateNextC(ThreadState* thread);
#ifndef ROCKSDB_LITE
// Some applications, e.g. MyRocks writes a KV pair to the database via
// commit-time-write-batch (ctwb) in additional to the transaction's regular
// write batch. The key is usually constant representing some system
// metadata, while the value is monoticailly increasing which represents the
// actual value of the metadata. Method WriteToCommitTimeWriteBatch()
// emulates this scenario.
Status WriteToCommitTimeWriteBatch(Transaction& txn);
#endif //! ROCKSDB_LITE
std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_a_; std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_a_;
std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_c_; std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_c_;
Counter counter_{};
private: private:
struct KeySpaces { struct KeySpaces {
uint32_t lb_a = 0; uint32_t lb_a = 0;
@ -370,5 +399,38 @@ class InvariantChecker {
"MultiOpsTxnsStressTest::Record::c_ must be 4 bytes"); "MultiOpsTxnsStressTest::Record::c_ must be 4 bytes");
}; };
class MultiOpsTxnsStressListener : public EventListener {
public:
explicit MultiOpsTxnsStressListener(MultiOpsTxnsStressTest* stress_test)
: stress_test_(stress_test) {
assert(stress_test_);
}
#ifndef ROCKSDB_LITE
~MultiOpsTxnsStressListener() override {}
void OnFlushCompleted(DB* db, const FlushJobInfo& info) override {
assert(db);
#ifdef NDEBUG
(void)db;
#endif
assert(info.cf_id == 0);
stress_test_->VerifyPkSkFast(info.job_id);
}
void OnCompactionCompleted(DB* db, const CompactionJobInfo& info) override {
assert(db);
#ifdef NDEBUG
(void)db;
#endif
assert(info.cf_id == 0);
stress_test_->VerifyPkSkFast(info.job_id);
}
#endif //! ROCKSDB_LITE
private:
MultiOpsTxnsStressTest* const stress_test_ = nullptr;
};
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
#endif // GFLAGS #endif // GFLAGS

@ -383,6 +383,7 @@ multiops_txn_default_params = {
# compactions. # compactions.
"flush_one_in": 1000, "flush_one_in": 1000,
"key_spaces_path": setup_multiops_txn_key_spaces_file(), "key_spaces_path": setup_multiops_txn_key_spaces_file(),
"rollback_one_in": 4,
} }
multiops_wc_txn_params = { multiops_wc_txn_params = {
@ -401,6 +402,10 @@ multiops_wp_txn_params = {
"enable_pipelined_write": 0, "enable_pipelined_write": 0,
# OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
"checkpoint_one_in": 0, "checkpoint_one_in": 0,
# Required to be 1 in order to use commit-time-batch
"use_only_the_last_commit_time_batch_for_recovery": 1,
"recycle_log_file_num": 0,
"clear_wp_commit_cache_one_in": 10,
} }
def finalize_and_sanitize(src_params): def finalize_and_sanitize(src_params):

@ -26,6 +26,18 @@
#include "utilities/transactions/pessimistic_transaction.h" #include "utilities/transactions/pessimistic_transaction.h"
#include "utilities/transactions/transaction_db_mutex_impl.h" #include "utilities/transactions/transaction_db_mutex_impl.h"
// This function is for testing only. If it returns true, then all entries in
// the commit cache will be evicted. Unit and/or stress tests (db_stress)
// can implement this function and customize how frequently commit cache
// eviction occurs.
// TODO: remove this function once we can configure commit cache to be very
// small so that eviction occurs very frequently. This requires the commit
// cache entry to be able to encode prepare and commit sequence numbers so that
// the commit sequence number does not have to be within a certain range of
// prepare sequence number.
extern "C" bool rocksdb_write_prepared_TEST_ShouldClearCommitCache(void)
__attribute__((__weak__));
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
Status WritePreparedTxnDB::Initialize( Status WritePreparedTxnDB::Initialize(
@ -505,6 +517,12 @@ void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq, uint64_t commit_seq,
// legit when a commit entry in a write batch overwrite the previous one // legit when a commit entry in a write batch overwrite the previous one
max_evicted_seq = evicted.commit_seq; max_evicted_seq = evicted.commit_seq;
} }
#ifdef OS_LINUX
if (rocksdb_write_prepared_TEST_ShouldClearCommitCache &&
rocksdb_write_prepared_TEST_ShouldClearCommitCache()) {
max_evicted_seq = last;
}
#endif // OS_LINUX
ROCKS_LOG_DETAILS(info_log_, ROCKS_LOG_DETAILS(info_log_,
"%lu Evicting %" PRIu64 ",%" PRIu64 " with max %" PRIu64 "%lu Evicting %" PRIu64 ",%" PRIu64 " with max %" PRIu64
" => %lu", " => %lu",

@ -513,6 +513,7 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
friend class WriteUnpreparedTxn; friend class WriteUnpreparedTxn;
friend class WriteUnpreparedTxnDB; friend class WriteUnpreparedTxnDB;
friend class WriteUnpreparedTransactionTest_RecoveryTest_Test; friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
friend class MultiOpsTxnsStressTest;
void Init(const TransactionDBOptions& txn_db_opts); void Init(const TransactionDBOptions& txn_db_opts);

Loading…
Cancel
Save