@ -37,6 +37,9 @@ INSTANTIATE_TEST_CASE_P(
std : : make_tuple ( false , true , WRITE_UNPREPARED ) ) ) ;
TEST_P ( WriteUnpreparedTransactionTest , ReadYourOwnWrite ) {
// The following tests checks whether reading your own write for
// a transaction works for write unprepared, when there are uncommitted
// values written into DB.
auto verify_state = [ ] ( Iterator * iter , const std : : string & key ,
const std : : string & value ) {
ASSERT_TRUE ( iter - > Valid ( ) ) ;
@ -45,155 +48,251 @@ TEST_P(WriteUnpreparedTransactionTest, ReadYourOwnWrite) {
ASSERT_EQ ( value , iter - > value ( ) . ToString ( ) ) ;
} ;
options . disable_auto_compactions = true ;
ReOpen ( ) ;
// The following tests checks whether reading your own write for
// a transaction works for write unprepared, when there are uncommitted
// values written into DB.
//
// Although the values written by DB::Put are technically committed, we add
// their seq num to unprep_seqs_ to pretend that they were written into DB
// as part of an unprepared batch, and then check if they are visible to the
// transaction.
auto snapshot0 = db - > GetSnapshot ( ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " a " , " v1 " ) ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " b " , " v2 " ) ) ;
auto snapshot2 = db - > GetSnapshot ( ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " a " , " v3 " ) ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " b " , " v4 " ) ) ;
auto snapshot4 = db - > GetSnapshot ( ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " a " , " v5 " ) ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " b " , " v6 " ) ) ;
auto snapshot6 = db - > GetSnapshot ( ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " a " , " v7 " ) ) ;
ASSERT_OK ( db - > Put ( WriteOptions ( ) , " b " , " v8 " ) ) ;
auto snapshot8 = db - > GetSnapshot ( ) ;
TransactionOptions txn_options ;
WriteOptions write_options ;
Transaction * txn = db - > BeginTransaction ( write_options , txn_options ) ;
WriteUnpreparedTxn * wup_txn = dynamic_cast < WriteUnpreparedTxn * > ( txn ) ;
ReadOptions roptions ;
roptions . snapshot = snapshot0 ;
wup_txn - > unprep_seqs_ [ snapshot2 - > GetSequenceNumber ( ) + 1 ] =
snapshot4 - > GetSequenceNumber ( ) - snapshot2 - > GetSequenceNumber ( ) ;
auto iter = txn - > GetIterator ( roptions ) ;
// Test always reseeking vs never reseeking.
for ( uint64_t max_skip : { 0 , std : : numeric_limits < int > : : max ( ) } ) {
options . max_sequential_skip_in_iterations = max_skip ;
options . disable_auto_compactions = true ;
ReOpen ( ) ;
// Test Get().
std : : string value ;
TransactionOptions txn_options ;
WriteOptions woptions ;
ReadOptions roptions ;
ASSERT_OK ( txn - > Get ( r options, Slice ( " a " ) , & value ) ) ;
ASSERT_EQ ( value , " v3 " ) ;
ASSERT_OK ( db - > Put ( woptions , " a " , " " ) ) ;
ASSERT_OK ( db - > Put ( woptions , " b " , " " ) ) ;
ASSERT_OK ( txn - > Get ( roptions , Slice ( " b " ) , & value ) ) ;
ASSERT_EQ ( value , " v4 " ) ;
Transaction * txn = db - > BeginTransaction ( woptions , txn_options ) ;
WriteUnpreparedTxn * wup_txn = dynamic_cast < WriteUnpreparedTxn * > ( txn ) ;
txn - > SetSnapshot ( ) ;
wup_txn - > unprep_seqs_ [ snapshot6 - > GetSequenceNumber ( ) + 1 ] =
snapshot8 - > GetSequenceNumber ( ) - snapshot6 - > GetSequenceNumber ( ) ;
delete iter ;
iter = txn - > GetIterator ( roptions ) ;
for ( int i = 0 ; i < 5 ; i + + ) {
std : : string stored_value = " v " + ToString ( i ) ;
ASSERT_OK ( txn - > Put ( " a " , stored_value ) ) ;
ASSERT_OK ( txn - > Put ( " b " , stored_value ) ) ;
wup_txn - > FlushWriteBatchToDB ( false ) ;
ASSERT_OK ( txn - > Get ( roptions , Slice ( " a " ) , & value ) ) ;
ASSERT_EQ ( value , " v7 " ) ;
// Test Get()
std : : string value ;
ASSERT_OK ( txn - > Get ( roptions , " a " , & value ) ) ;
ASSERT_EQ ( value , stored_value ) ;
ASSERT_OK ( txn - > Get ( roptions , " b " , & value ) ) ;
ASSERT_EQ ( value , stored_value ) ;
ASSERT_OK ( txn - > Get ( roptions , Slice ( " b " ) , & value ) ) ;
ASSERT_EQ ( value , " v8 " ) ;
// Test Next()
auto iter = txn - > GetIterator ( roptions ) ;
iter - > Seek ( " a " ) ;
verify_state ( iter , " a " , stored_value ) ;
wup_txn - > unprep_seqs_ . clear ( ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , stored_value ) ;
// Test Next().
wup_txn - > unprep_seqs_ [ snapshot2 - > GetSequenceNumber ( ) + 1 ] =
snapshot4 - > GetSequenceNumber ( ) - snapshot2 - > GetSequenceNumber ( ) ;
delete iter ;
iter = txn - > GetIterator ( roptions ) ;
iter - > SeekToFirst ( ) ;
verify_state ( iter , " a " , stored_value ) ;
iter - > Seek ( " a " ) ;
verify_state ( iter , " a " , " v3 " ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , stored_value ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , " v4 " ) ;
delete iter ;
iter - > SeekToFirst ( ) ;
verify_state ( iter , " a " , " v3 " ) ;
// Test Prev()
iter = txn - > GetIterator ( roptions ) ;
iter - > SeekForPrev ( " b " ) ;
verify_state ( iter , " b " , stored_value ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , " v4 " ) ;
iter - > Prev ( ) ;
verify_state ( iter , " a " , stored_value ) ;
wup_txn - > unprep_seqs_ [ snapshot6 - > GetSequenceNumber ( ) + 1 ] =
snapshot8 - > GetSequenceNumber ( ) - snapshot6 - > GetSequenceNumber ( ) ;
delete iter ;
iter = txn - > GetIterator ( roptions ) ;
iter - > SeekToLast ( ) ;
verify_state ( iter , " b " , stored_value ) ;
iter - > Seek ( " a " ) ;
verify_state ( iter , " a " , " v7 " ) ;
iter - > Prev ( ) ;
verify_state ( iter , " a " , stored_value ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , " v8 " ) ;
iter - > SeekToFirst ( ) ;
verify_state ( iter , " a " , " v7 " ) ;
iter - > Next ( ) ;
verify_state ( iter , " b " , " v8 " ) ;
wup_txn - > unprep_seqs_ . clear ( ) ;
// Test Prev(). For Prev(), we need to adjust the snapshot to match what is
// possible in WriteUnpreparedTxn.
//
// Because of row locks and ValidateSnapshot, there cannot be any committed
// entries after snapshot, but before the first prepared key.
roptions . snapshot = snapshot2 ;
wup_txn - > unprep_seqs_ [ snapshot2 - > GetSequenceNumber ( ) + 1 ] =
snapshot4 - > GetSequenceNumber ( ) - snapshot2 - > GetSequenceNumber ( ) ;
delete iter ;
iter = txn - > GetIterator ( roptions ) ;
delete iter ;
}
iter - > SeekForPrev ( " b " ) ;
verify_state ( iter , " b " , " v4 " ) ;
delete txn ;
}
}
iter - > Prev ( ) ;
verify_state ( iter , " a " , " v3 " ) ;
TEST_P ( WriteUnpreparedTransactionTest , ReadYourOwnWriteStress ) {
// This is a stress test where different threads are writing random keys, and
// then before committing or aborting the transaction, it validates to see
// that it can read the keys it wrote, and the keys it did not write respect
// the snapshot. To avoid row lock contention (and simply stressing the
// locking system), each thread is mostly only writing to its own set of keys.
const uint32_t kNumIter = 1000 ;
const uint32_t kNumThreads = 10 ;
const uint32_t kNumKeys = 5 ;
std : : default_random_engine rand ( static_cast < uint32_t > (
std : : hash < std : : thread : : id > ( ) ( std : : this_thread : : get_id ( ) ) ) ) ;
enum Action { NO_SNAPSHOT , RO_SNAPSHOT , REFRESH_SNAPSHOT } ;
// Test with
// 1. no snapshots set
// 2. snapshot set on ReadOptions
// 3. snapshot set, and refreshing after every write.
for ( Action a : { NO_SNAPSHOT , RO_SNAPSHOT , REFRESH_SNAPSHOT } ) {
WriteOptions write_options ;
txn_db_options . transaction_lock_timeout = - 1 ;
options . disable_auto_compactions = true ;
ReOpen ( ) ;
std : : vector < std : : string > keys ;
for ( uint32_t k = 0 ; k < kNumKeys * kNumThreads ; k + + ) {
keys . push_back ( " k " + ToString ( k ) ) ;
}
std : : shuffle ( keys . begin ( ) , keys . end ( ) , rand ) ;
// This counter will act as a "sequence number" to help us validate
// visibility logic with snapshots. If we had direct access to the seqno of
// snapshots and key/values, then we should directly compare those instead.
std : : atomic < int64_t > counter ( 0 ) ;
std : : function < void ( uint32_t ) > stress_thread = [ & ] ( int id ) {
size_t tid = std : : hash < std : : thread : : id > ( ) ( std : : this_thread : : get_id ( ) ) ;
Random64 rnd ( static_cast < uint32_t > ( tid ) ) ;
Transaction * txn ;
TransactionOptions txn_options ;
// batch_size of 1 causes writes to DB for every marker.
txn_options . max_write_batch_size = 1 ;
ReadOptions read_options ;
for ( uint32_t i = 0 ; i < kNumIter ; i + + ) {
std : : set < std : : string > owned_keys ( & keys [ id * kNumKeys ] ,
& keys [ ( id + 1 ) * kNumKeys ] ) ;
// Add unowned keys to make the workload more interesting, but this
// increases row lock contention, so just do it sometimes.
if ( rnd . OneIn ( 2 ) ) {
owned_keys . insert ( keys [ rnd . Uniform ( kNumKeys * kNumThreads ) ] ) ;
}
iter - > SeekToLast ( ) ;
verify_state ( iter , " b " , " v4 " ) ;
txn = db - > BeginTransaction ( write_options , txn_options ) ;
txn - > SetName ( ToString ( id ) ) ;
txn - > SetSnapshot ( ) ;
if ( a > = RO_SNAPSHOT ) {
read_options . snapshot = txn - > GetSnapshot ( ) ;
ASSERT_TRUE ( read_options . snapshot ! = nullptr ) ;
}
iter - > Prev ( ) ;
verify_state ( iter , " a " , " v3 " ) ;
uint64_t buf [ 2 ] ;
buf [ 0 ] = id ;
roptions . snapshot = snapshot6 ;
wup_txn - > unprep_seqs_ [ snapshot6 - > GetSequenceNumber ( ) + 1 ] =
snapshot8 - > GetSequenceNumber ( ) - snapshot6 - > GetSequenceNumber ( ) ;
delete iter ;
iter = txn - > GetIterator ( roptions ) ;
// When scanning through the database, make sure that all unprepared
// keys have value >= snapshot and all other keys have value < snapshot.
int64_t snapshot_num = counter . fetch_add ( 1 ) ;
iter - > SeekForPrev ( " b " ) ;
verify_state ( iter , " b " , " v8 " ) ;
Status s ;
for ( const auto & key : owned_keys ) {
buf [ 1 ] = counter . fetch_add ( 1 ) ;
s = txn - > Put ( key , Slice ( ( const char * ) buf , sizeof ( buf ) ) ) ;
if ( ! s . ok ( ) ) {
break ;
}
if ( a = = REFRESH_SNAPSHOT ) {
txn - > SetSnapshot ( ) ;
read_options . snapshot = txn - > GetSnapshot ( ) ;
snapshot_num = counter . fetch_add ( 1 ) ;
}
}
iter - > Prev ( ) ;
verify_state ( iter , " a " , " v7 " ) ;
// Failure is possible due to snapshot validation. In this case,
// rollback and move onto next iteration.
if ( ! s . ok ( ) ) {
ASSERT_TRUE ( s . IsBusy ( ) ) ;
ASSERT_OK ( txn - > Rollback ( ) ) ;
delete txn ;
continue ;
}
iter - > SeekToLast ( ) ;
verify_state ( iter , " b " , " v8 " ) ;
auto verify_key = [ & owned_keys , & a , & id , & snapshot_num ] (
const std : : string & key ,
const std : : string & value ) {
if ( owned_keys . count ( key ) > 0 ) {
ASSERT_EQ ( value . size ( ) , 16 ) ;
// Since this key is part of owned_keys, then this key must be
// unprepared by this transaction identified by 'id'
ASSERT_EQ ( ( ( int64_t * ) value . c_str ( ) ) [ 0 ] , id ) ;
if ( a = = REFRESH_SNAPSHOT ) {
// If refresh snapshot is true, then the snapshot is refreshed
// after every Put(), meaning that the current snapshot in
// snapshot_num must be greater than the "seqno" of any keys
// written by the current transaction.
ASSERT_LT ( ( ( int64_t * ) value . c_str ( ) ) [ 1 ] , snapshot_num ) ;
} else {
// If refresh snapshot is not on, then the snapshot was taken at
// the beginning of the transaction, meaning all writes must come
// after snapshot_num
ASSERT_GT ( ( ( int64_t * ) value . c_str ( ) ) [ 1 ] , snapshot_num ) ;
}
} else if ( a > = RO_SNAPSHOT ) {
// If this is not an unprepared key, just assert that the key
// "seqno" is smaller than the snapshot seqno.
ASSERT_EQ ( value . size ( ) , 16 ) ;
ASSERT_LT ( ( ( int64_t * ) value . c_str ( ) ) [ 1 ] , snapshot_num ) ;
}
} ;
// Validate Get()/Next()/Prev(). Do only one of them to save time, and
// reduce lock contention.
switch ( rnd . Uniform ( 3 ) ) {
case 0 : // Validate Get()
{
for ( const auto & key : keys ) {
std : : string value ;
s = txn - > Get ( read_options , Slice ( key ) , & value ) ;
if ( ! s . ok ( ) ) {
ASSERT_TRUE ( s . IsNotFound ( ) ) ;
ASSERT_EQ ( owned_keys . count ( key ) , 0 ) ;
} else {
verify_key ( key , value ) ;
}
}
break ;
}
case 1 : // Validate Next()
{
Iterator * iter = txn - > GetIterator ( read_options ) ;
for ( iter - > SeekToFirst ( ) ; iter - > Valid ( ) ; iter - > Next ( ) ) {
verify_key ( iter - > key ( ) . ToString ( ) , iter - > value ( ) . ToString ( ) ) ;
}
delete iter ;
break ;
}
case 2 : // Validate Prev()
{
Iterator * iter = txn - > GetIterator ( read_options ) ;
for ( iter - > SeekToLast ( ) ; iter - > Valid ( ) ; iter - > Prev ( ) ) {
verify_key ( iter - > key ( ) . ToString ( ) , iter - > value ( ) . ToString ( ) ) ;
}
delete iter ;
break ;
}
default :
ASSERT_TRUE ( false ) ;
}
iter - > Prev ( ) ;
verify_state ( iter , " a " , " v7 " ) ;
if ( rnd . OneIn ( 2 ) ) {
ASSERT_OK ( txn - > Commit ( ) ) ;
} else {
ASSERT_OK ( txn - > Rollback ( ) ) ;
}
delete txn ;
}
} ;
// Since the unprep_seqs_ data were faked for testing, we do not want the
// destructor for the transaction to be rolling back data that did not
// exist.
wup_txn - > unprep_seqs_ . clear ( ) ;
std : : vector < port : : Thread > threads ;
for ( uint32_t i = 0 ; i < kNumThreads ; i + + ) {
threads . emplace_back ( stress_thread , i ) ;
}
db - > ReleaseSnapshot ( snapshot0 ) ;
db - > ReleaseSnapshot ( snapshot2 ) ;
db - > ReleaseSnapshot ( snapshot4 ) ;
db - > ReleaseSnapshot ( snapshot6 ) ;
db - > ReleaseSnapshot ( snapshot8 ) ;
delete iter ;
delete txn ;
for ( auto & t : threads ) {
t . join ( ) ;
}
}
}
// This tests how write unprepared behaves during recovery when the DB crashes