fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
548 lines
18 KiB
548 lines
18 KiB
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#include "utilities/transactions/transaction_test.h"
|
|
#include "utilities/transactions/write_unprepared_txn.h"
|
|
#include "utilities/transactions/write_unprepared_txn_db.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
class WriteUnpreparedTransactionTestBase : public TransactionTestBase {
|
|
public:
|
|
WriteUnpreparedTransactionTestBase(bool use_stackable_db,
|
|
bool two_write_queue,
|
|
TxnDBWritePolicy write_policy)
|
|
: TransactionTestBase(use_stackable_db, two_write_queue, write_policy,
|
|
kOrderedWrite) {}
|
|
};
|
|
|
|
class WriteUnpreparedTransactionTest
|
|
: public WriteUnpreparedTransactionTestBase,
|
|
virtual public ::testing::WithParamInterface<
|
|
std::tuple<bool, bool, TxnDBWritePolicy>> {
|
|
public:
|
|
WriteUnpreparedTransactionTest()
|
|
: WriteUnpreparedTransactionTestBase(std::get<0>(GetParam()),
|
|
std::get<1>(GetParam()),
|
|
std::get<2>(GetParam())){}
|
|
};
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
WriteUnpreparedTransactionTest, WriteUnpreparedTransactionTest,
|
|
::testing::Values(std::make_tuple(false, false, WRITE_UNPREPARED),
|
|
std::make_tuple(false, true, WRITE_UNPREPARED)));
|
|
|
|
TEST_P(WriteUnpreparedTransactionTest, ReadYourOwnWrite) {
|
|
// The following tests checks whether reading your own write for
|
|
// a transaction works for write unprepared, when there are uncommitted
|
|
// values written into DB.
|
|
auto verify_state = [](Iterator* iter, const std::string& key,
|
|
const std::string& value) {
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_OK(iter->status());
|
|
ASSERT_EQ(key, iter->key().ToString());
|
|
ASSERT_EQ(value, iter->value().ToString());
|
|
};
|
|
|
|
// Test always reseeking vs never reseeking.
|
|
for (uint64_t max_skip : {0, std::numeric_limits<int>::max()}) {
|
|
options.max_sequential_skip_in_iterations = max_skip;
|
|
options.disable_auto_compactions = true;
|
|
ReOpen();
|
|
|
|
TransactionOptions txn_options;
|
|
WriteOptions woptions;
|
|
ReadOptions roptions;
|
|
|
|
ASSERT_OK(db->Put(woptions, "a", ""));
|
|
ASSERT_OK(db->Put(woptions, "b", ""));
|
|
|
|
Transaction* txn = db->BeginTransaction(woptions, txn_options);
|
|
WriteUnpreparedTxn* wup_txn = dynamic_cast<WriteUnpreparedTxn*>(txn);
|
|
txn->SetSnapshot();
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
std::string stored_value = "v" + ToString(i);
|
|
ASSERT_OK(txn->Put("a", stored_value));
|
|
ASSERT_OK(txn->Put("b", stored_value));
|
|
wup_txn->FlushWriteBatchToDB(false);
|
|
|
|
// Test Get()
|
|
std::string value;
|
|
ASSERT_OK(txn->Get(roptions, "a", &value));
|
|
ASSERT_EQ(value, stored_value);
|
|
ASSERT_OK(txn->Get(roptions, "b", &value));
|
|
ASSERT_EQ(value, stored_value);
|
|
|
|
// Test Next()
|
|
auto iter = txn->GetIterator(roptions);
|
|
iter->Seek("a");
|
|
verify_state(iter, "a", stored_value);
|
|
|
|
iter->Next();
|
|
verify_state(iter, "b", stored_value);
|
|
|
|
iter->SeekToFirst();
|
|
verify_state(iter, "a", stored_value);
|
|
|
|
iter->Next();
|
|
verify_state(iter, "b", stored_value);
|
|
|
|
delete iter;
|
|
|
|
// Test Prev()
|
|
iter = txn->GetIterator(roptions);
|
|
iter->SeekForPrev("b");
|
|
verify_state(iter, "b", stored_value);
|
|
|
|
iter->Prev();
|
|
verify_state(iter, "a", stored_value);
|
|
|
|
iter->SeekToLast();
|
|
verify_state(iter, "b", stored_value);
|
|
|
|
iter->Prev();
|
|
verify_state(iter, "a", stored_value);
|
|
|
|
delete iter;
|
|
}
|
|
|
|
delete txn;
|
|
}
|
|
}
|
|
|
|
#ifndef ROCKSDB_VALGRIND_RUN
|
|
TEST_P(WriteUnpreparedTransactionTest, ReadYourOwnWriteStress) {
|
|
// This is a stress test where different threads are writing random keys, and
|
|
// then before committing or aborting the transaction, it validates to see
|
|
// that it can read the keys it wrote, and the keys it did not write respect
|
|
// the snapshot. To avoid row lock contention (and simply stressing the
|
|
// locking system), each thread is mostly only writing to its own set of keys.
|
|
const uint32_t kNumIter = 1000;
|
|
const uint32_t kNumThreads = 10;
|
|
const uint32_t kNumKeys = 5;
|
|
|
|
std::default_random_engine rand(static_cast<uint32_t>(
|
|
std::hash<std::thread::id>()(std::this_thread::get_id())));
|
|
|
|
enum Action { NO_SNAPSHOT, RO_SNAPSHOT, REFRESH_SNAPSHOT };
|
|
// Test with
|
|
// 1. no snapshots set
|
|
// 2. snapshot set on ReadOptions
|
|
// 3. snapshot set, and refreshing after every write.
|
|
for (Action a : {NO_SNAPSHOT, RO_SNAPSHOT, REFRESH_SNAPSHOT}) {
|
|
WriteOptions write_options;
|
|
txn_db_options.transaction_lock_timeout = -1;
|
|
options.disable_auto_compactions = true;
|
|
ReOpen();
|
|
|
|
std::vector<std::string> keys;
|
|
for (uint32_t k = 0; k < kNumKeys * kNumThreads; k++) {
|
|
keys.push_back("k" + ToString(k));
|
|
}
|
|
std::shuffle(keys.begin(), keys.end(), rand);
|
|
|
|
// This counter will act as a "sequence number" to help us validate
|
|
// visibility logic with snapshots. If we had direct access to the seqno of
|
|
// snapshots and key/values, then we should directly compare those instead.
|
|
std::atomic<int64_t> counter(0);
|
|
|
|
std::function<void(uint32_t)> stress_thread = [&](int id) {
|
|
size_t tid = std::hash<std::thread::id>()(std::this_thread::get_id());
|
|
Random64 rnd(static_cast<uint32_t>(tid));
|
|
|
|
Transaction* txn;
|
|
TransactionOptions txn_options;
|
|
// batch_size of 1 causes writes to DB for every marker.
|
|
txn_options.write_batch_flush_threshold = 1;
|
|
ReadOptions read_options;
|
|
|
|
for (uint32_t i = 0; i < kNumIter; i++) {
|
|
std::set<std::string> owned_keys(&keys[id * kNumKeys],
|
|
&keys[(id + 1) * kNumKeys]);
|
|
// Add unowned keys to make the workload more interesting, but this
|
|
// increases row lock contention, so just do it sometimes.
|
|
if (rnd.OneIn(2)) {
|
|
owned_keys.insert(keys[rnd.Uniform(kNumKeys * kNumThreads)]);
|
|
}
|
|
|
|
txn = db->BeginTransaction(write_options, txn_options);
|
|
txn->SetName(ToString(id));
|
|
txn->SetSnapshot();
|
|
if (a >= RO_SNAPSHOT) {
|
|
read_options.snapshot = txn->GetSnapshot();
|
|
ASSERT_TRUE(read_options.snapshot != nullptr);
|
|
}
|
|
|
|
uint64_t buf[2];
|
|
buf[0] = id;
|
|
|
|
// When scanning through the database, make sure that all unprepared
|
|
// keys have value >= snapshot and all other keys have value < snapshot.
|
|
int64_t snapshot_num = counter.fetch_add(1);
|
|
|
|
Status s;
|
|
for (const auto& key : owned_keys) {
|
|
buf[1] = counter.fetch_add(1);
|
|
s = txn->Put(key, Slice((const char*)buf, sizeof(buf)));
|
|
if (!s.ok()) {
|
|
break;
|
|
}
|
|
if (a == REFRESH_SNAPSHOT) {
|
|
txn->SetSnapshot();
|
|
read_options.snapshot = txn->GetSnapshot();
|
|
snapshot_num = counter.fetch_add(1);
|
|
}
|
|
}
|
|
|
|
// Failure is possible due to snapshot validation. In this case,
|
|
// rollback and move onto next iteration.
|
|
if (!s.ok()) {
|
|
ASSERT_TRUE(s.IsBusy());
|
|
ASSERT_OK(txn->Rollback());
|
|
delete txn;
|
|
continue;
|
|
}
|
|
|
|
auto verify_key = [&owned_keys, &a, &id, &snapshot_num](
|
|
const std::string& key,
|
|
const std::string& value) {
|
|
if (owned_keys.count(key) > 0) {
|
|
ASSERT_EQ(value.size(), 16);
|
|
|
|
// Since this key is part of owned_keys, then this key must be
|
|
// unprepared by this transaction identified by 'id'
|
|
ASSERT_EQ(((int64_t*)value.c_str())[0], id);
|
|
if (a == REFRESH_SNAPSHOT) {
|
|
// If refresh snapshot is true, then the snapshot is refreshed
|
|
// after every Put(), meaning that the current snapshot in
|
|
// snapshot_num must be greater than the "seqno" of any keys
|
|
// written by the current transaction.
|
|
ASSERT_LT(((int64_t*)value.c_str())[1], snapshot_num);
|
|
} else {
|
|
// If refresh snapshot is not on, then the snapshot was taken at
|
|
// the beginning of the transaction, meaning all writes must come
|
|
// after snapshot_num
|
|
ASSERT_GT(((int64_t*)value.c_str())[1], snapshot_num);
|
|
}
|
|
} else if (a >= RO_SNAPSHOT) {
|
|
// If this is not an unprepared key, just assert that the key
|
|
// "seqno" is smaller than the snapshot seqno.
|
|
ASSERT_EQ(value.size(), 16);
|
|
ASSERT_LT(((int64_t*)value.c_str())[1], snapshot_num);
|
|
}
|
|
};
|
|
|
|
// Validate Get()/Next()/Prev(). Do only one of them to save time, and
|
|
// reduce lock contention.
|
|
switch (rnd.Uniform(3)) {
|
|
case 0: // Validate Get()
|
|
{
|
|
for (const auto& key : keys) {
|
|
std::string value;
|
|
s = txn->Get(read_options, Slice(key), &value);
|
|
if (!s.ok()) {
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
ASSERT_EQ(owned_keys.count(key), 0);
|
|
} else {
|
|
verify_key(key, value);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 1: // Validate Next()
|
|
{
|
|
Iterator* iter = txn->GetIterator(read_options);
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
verify_key(iter->key().ToString(), iter->value().ToString());
|
|
}
|
|
delete iter;
|
|
break;
|
|
}
|
|
case 2: // Validate Prev()
|
|
{
|
|
Iterator* iter = txn->GetIterator(read_options);
|
|
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
|
|
verify_key(iter->key().ToString(), iter->value().ToString());
|
|
}
|
|
delete iter;
|
|
break;
|
|
}
|
|
default:
|
|
ASSERT_TRUE(false);
|
|
}
|
|
|
|
if (rnd.OneIn(2)) {
|
|
ASSERT_OK(txn->Commit());
|
|
} else {
|
|
ASSERT_OK(txn->Rollback());
|
|
}
|
|
delete txn;
|
|
}
|
|
};
|
|
|
|
std::vector<port::Thread> threads;
|
|
for (uint32_t i = 0; i < kNumThreads; i++) {
|
|
threads.emplace_back(stress_thread, i);
|
|
}
|
|
|
|
for (auto& t : threads) {
|
|
t.join();
|
|
}
|
|
}
|
|
}
|
|
#endif // ROCKSDB_VALGRIND_RUN
|
|
|
|
// This tests how write unprepared behaves during recovery when the DB crashes
|
|
// after a transaction has either been unprepared or prepared, and tests if
|
|
// the changes are correctly applied for prepared transactions if we decide to
|
|
// rollback/commit.
|
|
TEST_P(WriteUnpreparedTransactionTest, RecoveryTest) {
|
|
WriteOptions write_options;
|
|
write_options.disableWAL = false;
|
|
TransactionOptions txn_options;
|
|
std::vector<Transaction*> prepared_trans;
|
|
WriteUnpreparedTxnDB* wup_db;
|
|
options.disable_auto_compactions = true;
|
|
|
|
enum Action { UNPREPARED, ROLLBACK, COMMIT };
|
|
|
|
// batch_size of 1 causes writes to DB for every marker.
|
|
for (size_t batch_size : {1, 1000000}) {
|
|
txn_options.write_batch_flush_threshold = batch_size;
|
|
for (bool empty : {true, false}) {
|
|
for (Action a : {UNPREPARED, ROLLBACK, COMMIT}) {
|
|
for (int num_batches = 1; num_batches < 10; num_batches++) {
|
|
// Reset database.
|
|
prepared_trans.clear();
|
|
ReOpen();
|
|
wup_db = dynamic_cast<WriteUnpreparedTxnDB*>(db);
|
|
if (!empty) {
|
|
for (int i = 0; i < num_batches; i++) {
|
|
ASSERT_OK(db->Put(WriteOptions(), "k" + ToString(i),
|
|
"before value" + ToString(i)));
|
|
}
|
|
}
|
|
|
|
// Write num_batches unprepared batches.
|
|
Transaction* txn = db->BeginTransaction(write_options, txn_options);
|
|
WriteUnpreparedTxn* wup_txn = dynamic_cast<WriteUnpreparedTxn*>(txn);
|
|
txn->SetName("xid");
|
|
for (int i = 0; i < num_batches; i++) {
|
|
ASSERT_OK(txn->Put("k" + ToString(i), "value" + ToString(i)));
|
|
if (txn_options.write_batch_flush_threshold == 1) {
|
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
|
} else {
|
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
|
}
|
|
}
|
|
if (a == UNPREPARED) {
|
|
// This is done to prevent the destructor from rolling back the
|
|
// transaction for us, since we want to pretend we crashed and
|
|
// test that recovery does the rollback.
|
|
wup_txn->unprep_seqs_.clear();
|
|
} else {
|
|
txn->Prepare();
|
|
}
|
|
delete txn;
|
|
|
|
// Crash and run recovery code paths.
|
|
wup_db->db_impl_->FlushWAL(true);
|
|
wup_db->TEST_Crash();
|
|
ReOpenNoDelete();
|
|
assert(db != nullptr);
|
|
|
|
db->GetAllPreparedTransactions(&prepared_trans);
|
|
ASSERT_EQ(prepared_trans.size(), a == UNPREPARED ? 0 : 1);
|
|
if (a == ROLLBACK) {
|
|
ASSERT_OK(prepared_trans[0]->Rollback());
|
|
delete prepared_trans[0];
|
|
} else if (a == COMMIT) {
|
|
ASSERT_OK(prepared_trans[0]->Commit());
|
|
delete prepared_trans[0];
|
|
}
|
|
|
|
Iterator* iter = db->NewIterator(ReadOptions());
|
|
iter->SeekToFirst();
|
|
// Check that DB has before values.
|
|
if (!empty || a == COMMIT) {
|
|
for (int i = 0; i < num_batches; i++) {
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "k" + ToString(i));
|
|
if (a == COMMIT) {
|
|
ASSERT_EQ(iter->value().ToString(), "value" + ToString(i));
|
|
} else {
|
|
ASSERT_EQ(iter->value().ToString(),
|
|
"before value" + ToString(i));
|
|
}
|
|
iter->Next();
|
|
}
|
|
}
|
|
ASSERT_FALSE(iter->Valid());
|
|
delete iter;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Basic test to see that unprepared batch gets written to DB when batch size
|
|
// is exceeded. It also does some basic checks to see if commit/rollback works
|
|
// as expected for write unprepared.
|
|
TEST_P(WriteUnpreparedTransactionTest, UnpreparedBatch) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
const int kNumKeys = 10;
|
|
|
|
// batch_size of 1 causes writes to DB for every marker.
|
|
for (size_t batch_size : {1, 1000000}) {
|
|
txn_options.write_batch_flush_threshold = batch_size;
|
|
for (bool prepare : {false, true}) {
|
|
for (bool commit : {false, true}) {
|
|
ReOpen();
|
|
Transaction* txn = db->BeginTransaction(write_options, txn_options);
|
|
WriteUnpreparedTxn* wup_txn = dynamic_cast<WriteUnpreparedTxn*>(txn);
|
|
txn->SetName("xid");
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
txn->Put("k" + ToString(i), "v" + ToString(i));
|
|
if (txn_options.write_batch_flush_threshold == 1) {
|
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
|
} else {
|
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
|
}
|
|
}
|
|
|
|
if (prepare) {
|
|
ASSERT_OK(txn->Prepare());
|
|
}
|
|
|
|
Iterator* iter = db->NewIterator(ReadOptions());
|
|
iter->SeekToFirst();
|
|
assert(!iter->Valid());
|
|
ASSERT_FALSE(iter->Valid());
|
|
delete iter;
|
|
|
|
if (commit) {
|
|
ASSERT_OK(txn->Commit());
|
|
} else {
|
|
ASSERT_OK(txn->Rollback());
|
|
}
|
|
delete txn;
|
|
|
|
iter = db->NewIterator(ReadOptions());
|
|
iter->SeekToFirst();
|
|
|
|
for (int i = 0; i < (commit ? kNumKeys : 0); i++) {
|
|
ASSERT_TRUE(iter->Valid());
|
|
ASSERT_EQ(iter->key().ToString(), "k" + ToString(i));
|
|
ASSERT_EQ(iter->value().ToString(), "v" + ToString(i));
|
|
iter->Next();
|
|
}
|
|
ASSERT_FALSE(iter->Valid());
|
|
delete iter;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Test whether logs containing unprepared/prepared batches are kept even
|
|
// after memtable finishes flushing, and whether they are removed when
|
|
// transaction commits/aborts.
|
|
//
|
|
// TODO(lth): Merge with TransactionTest/TwoPhaseLogRollingTest tests.
|
|
TEST_P(WriteUnpreparedTransactionTest, MarkLogWithPrepSection) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
// batch_size of 1 causes writes to DB for every marker.
|
|
txn_options.write_batch_flush_threshold = 1;
|
|
const int kNumKeys = 10;
|
|
|
|
WriteOptions wopts;
|
|
wopts.sync = true;
|
|
|
|
for (bool prepare : {false, true}) {
|
|
for (bool commit : {false, true}) {
|
|
ReOpen();
|
|
auto wup_db = dynamic_cast<WriteUnpreparedTxnDB*>(db);
|
|
auto db_impl = wup_db->db_impl_;
|
|
|
|
Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
|
|
ASSERT_OK(txn1->SetName("xid1"));
|
|
|
|
Transaction* txn2 = db->BeginTransaction(write_options, txn_options);
|
|
ASSERT_OK(txn2->SetName("xid2"));
|
|
|
|
// Spread this transaction across multiple log files.
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(txn1->Put("k1" + ToString(i), "v" + ToString(i)));
|
|
if (i >= kNumKeys / 2) {
|
|
ASSERT_OK(txn2->Put("k2" + ToString(i), "v" + ToString(i)));
|
|
}
|
|
|
|
if (i > 0) {
|
|
db_impl->TEST_SwitchWAL();
|
|
}
|
|
}
|
|
|
|
ASSERT_GT(txn1->GetLogNumber(), 0);
|
|
ASSERT_GT(txn2->GetLogNumber(), 0);
|
|
|
|
ASSERT_EQ(db_impl->TEST_FindMinLogContainingOutstandingPrep(),
|
|
txn1->GetLogNumber());
|
|
ASSERT_GT(db_impl->TEST_LogfileNumber(), txn1->GetLogNumber());
|
|
|
|
if (prepare) {
|
|
ASSERT_OK(txn1->Prepare());
|
|
ASSERT_OK(txn2->Prepare());
|
|
}
|
|
|
|
ASSERT_GE(db_impl->TEST_LogfileNumber(), txn1->GetLogNumber());
|
|
ASSERT_GE(db_impl->TEST_LogfileNumber(), txn2->GetLogNumber());
|
|
|
|
ASSERT_EQ(db_impl->TEST_FindMinLogContainingOutstandingPrep(),
|
|
txn1->GetLogNumber());
|
|
if (commit) {
|
|
ASSERT_OK(txn1->Commit());
|
|
} else {
|
|
ASSERT_OK(txn1->Rollback());
|
|
}
|
|
|
|
ASSERT_EQ(db_impl->TEST_FindMinLogContainingOutstandingPrep(),
|
|
txn2->GetLogNumber());
|
|
|
|
if (commit) {
|
|
ASSERT_OK(txn2->Commit());
|
|
} else {
|
|
ASSERT_OK(txn2->Rollback());
|
|
}
|
|
|
|
ASSERT_EQ(db_impl->TEST_FindMinLogContainingOutstandingPrep(), 0);
|
|
|
|
delete txn1;
|
|
delete txn2;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace rocksdb
|
|
|
|
int main(int argc, char** argv) {
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|
|
|
|
#else
|
|
#include <stdio.h>
|
|
|
|
int main(int /*argc*/, char** /*argv*/) {
|
|
fprintf(stderr,
|
|
"SKIPPED as Transactions are not supported in ROCKSDB_LITE\n");
|
|
return 0;
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|
|
|