@ -22,7 +22,6 @@
# include "rocksdb/slice.h"
# include "rocksdb/utilities/transaction_db_mutex.h"
# include "util/autovector.h"
# include "util/murmurhash.h"
# include "util/sync_point.h"
# include "util/thread_local.h"
@ -31,15 +30,20 @@
namespace rocksdb {
struct LockInfo {
TransactionID txn_id ;
bool exclusive ;
autovector < TransactionID > txn_ids ;
// Transaction locks are not valid after this time in us
uint64_t expiration_time ;
LockInfo ( TransactionID id , uint64_t time )
: txn_id ( id ) , expiration_time ( time ) { }
LockInfo ( TransactionID id , uint64_t time , bool ex )
: exclusive ( ex ) , expiration_time ( time ) {
txn_ids . push_back ( id ) ;
}
LockInfo ( const LockInfo & lock_info )
: txn_id ( lock_info . txn_id ) , expiration_time ( lock_info . expiration_time ) { }
: exclusive ( lock_info . exclusive ) ,
txn_ids ( lock_info . txn_ids ) ,
expiration_time ( lock_info . expiration_time ) { }
} ;
struct LockMapStripe {
@ -192,7 +196,8 @@ std::shared_ptr<LockMap> TransactionLockMgr::GetLockMap(
// transaction.
// If false, sets *expire_time to the expiration time of the lock according
// to Env->GetMicros() or 0 if no expiration.
bool TransactionLockMgr : : IsLockExpired ( const LockInfo & lock_info , Env * env ,
bool TransactionLockMgr : : IsLockExpired ( TransactionID txn_id ,
const LockInfo & lock_info , Env * env ,
uint64_t * expire_time ) {
auto now = env - > NowMicros ( ) ;
@ -203,20 +208,27 @@ bool TransactionLockMgr::IsLockExpired(const LockInfo& lock_info, Env* env,
// return how many microseconds until lock will be expired
* expire_time = lock_info . expiration_time ;
} else {
bool success =
txn_db_impl_ - > TryStealingExpiredTransactionLocks ( lock_info . txn_id ) ;
for ( auto id : lock_info . txn_ids ) {
if ( txn_id = = id ) {
continue ;
}
bool success = txn_db_impl_ - > TryStealingExpiredTransactionLocks ( id ) ;
if ( ! success ) {
expired = false ;
break ;
}
* expire_time = 0 ;
}
}
return expired ;
}
Status TransactionLockMgr : : TryLock ( TransactionImpl * txn ,
uint32_t column_family_id ,
const std : : string & key , Env * env ) {
const std : : string & key , Env * env ,
bool exclusive ) {
// Lookup lock map for this column family id
std : : shared_ptr < LockMap > lock_map_ptr = GetLockMap ( column_family_id ) ;
LockMap * lock_map = lock_map_ptr . get ( ) ;
@ -233,7 +245,7 @@ Status TransactionLockMgr::TryLock(TransactionImpl* txn,
assert ( lock_map - > lock_map_stripes_ . size ( ) > stripe_num ) ;
LockMapStripe * stripe = lock_map - > lock_map_stripes_ . at ( stripe_num ) ;
LockInfo lock_info ( txn - > GetID ( ) , txn - > GetExpirationTime ( ) ) ;
LockInfo lock_info ( txn - > GetID ( ) , txn - > GetExpirationTime ( ) , exclusive ) ;
int64_t timeout = txn - > GetLockTimeout ( ) ;
return AcquireWithTimeout ( txn , lock_map , stripe , column_family_id , key , env ,
@ -268,9 +280,9 @@ Status TransactionLockMgr::AcquireWithTimeout(
// Acquire lock if we are able to
uint64_t expire_time_hint = 0 ;
TransactionID wait_id = 0 ;
autovector < TransactionID > wait_ids ;
result = AcquireLocked ( lock_map , stripe , key , env , lock_info ,
& expire_time_hint , & wait_id ) ;
& expire_time_hint , & wait_ids ) ;
if ( ! result . ok ( ) & & timeout ! = 0 ) {
// If we weren't able to acquire the lock, we will keep retrying as long
@ -289,19 +301,19 @@ Status TransactionLockMgr::AcquireWithTimeout(
cv_end_time = end_time ;
}
assert ( result . IsBusy ( ) | | wait_id ! = 0 ) ;
assert ( result . IsBusy ( ) | | wait_ids . size ( ) ! = 0 ) ;
// We are dependent on a transaction to finish, so perform deadlock
// detection.
if ( wait_id ! = 0 ) {
if ( wait_ids . size ( ) ! = 0 ) {
if ( txn - > IsDeadlockDetect ( ) ) {
if ( IncrementWaiters ( txn , wait_id ) ) {
if ( IncrementWaiters ( txn , wait_ids ) ) {
result = Status : : Busy ( Status : : SubCode : : kDeadlock ) ;
stripe - > stripe_mutex - > UnLock ( ) ;
return result ;
}
}
txn - > SetWaitingTxn ( wait_id , column_family_id , & key ) ;
txn - > SetWaitingTxn ( wait_ids , column_family_id , & key ) ;
}
TEST_SYNC_POINT ( " TransactionLockMgr::AcquireWithTimeout:WaitingTxn " ) ;
@ -316,10 +328,10 @@ Status TransactionLockMgr::AcquireWithTimeout(
}
}
if ( wait_id ! = 0 ) {
txn - > SetWaitingTxn ( 0 , 0 , nullptr ) ;
if ( wait_ids . size ( ) ! = 0 ) {
txn - > ClearWaitingTxn ( ) ;
if ( txn - > IsDeadlockDetect ( ) ) {
DecrementWaiters ( txn , wait_id ) ;
DecrementWaiters ( txn , wait_ids ) ;
}
}
@ -332,7 +344,7 @@ Status TransactionLockMgr::AcquireWithTimeout(
if ( result . ok ( ) | | result . IsTimedOut ( ) ) {
result = AcquireLocked ( lock_map , stripe , key , env , lock_info ,
& expire_time_hint , & wait_id ) ;
& expire_time_hint , & wait_ids ) ;
}
} while ( ! result . ok ( ) & & ! timed_out ) ;
}
@ -342,56 +354,77 @@ Status TransactionLockMgr::AcquireWithTimeout(
return result ;
}
void TransactionLockMgr : : DecrementWaiters ( const TransactionImpl * txn ,
TransactionID wait_id ) {
void TransactionLockMgr : : DecrementWaiters (
const TransactionImpl * txn , const autovector < TransactionID > & wait_ids ) {
std : : lock_guard < std : : mutex > lock ( wait_txn_map_mutex_ ) ;
DecrementWaitersImpl ( txn , wait_id ) ;
DecrementWaitersImpl ( txn , wait_ids ) ;
}
void TransactionLockMgr : : DecrementWaitersImpl ( const TransactionImpl * txn ,
TransactionID wait_id ) {
void TransactionLockMgr : : DecrementWaitersImpl (
const TransactionImpl * txn , const autovector < TransactionID > & wait_ids ) {
auto id = txn - > GetID ( ) ;
assert ( wait_txn_map_ . Contains ( id ) ) ;
wait_txn_map_ . Delete ( id ) ;
for ( auto wait_id : wait_ids ) {
rev_wait_txn_map_ . Get ( wait_id ) - - ;
if ( rev_wait_txn_map_ . Get ( wait_id ) = = 0 ) {
rev_wait_txn_map_ . Delete ( wait_id ) ;
}
}
}
bool TransactionLockMgr : : IncrementWaiters ( const TransactionImpl * txn ,
TransactionID wait_id ) {
bool TransactionLockMgr : : IncrementWaiters (
const TransactionImpl * txn , const autovector < TransactionID > & wait_ids ) {
auto id = txn - > GetID ( ) ;
std : : vector < TransactionID > queue ( txn - > GetDeadlockDetectDepth ( ) ) ;
std : : lock_guard < std : : mutex > lock ( wait_txn_map_mutex_ ) ;
assert ( ! wait_txn_map_ . Contains ( id ) ) ;
wait_txn_map_ . Insert ( id , wait_id ) ;
wait_txn_map_ . Insert ( id , wait_ids ) ;
for ( auto wait_id : wait_ids ) {
if ( rev_wait_txn_map_ . Contains ( wait_id ) ) {
rev_wait_txn_map_ . Get ( wait_id ) + + ;
} else {
rev_wait_txn_map_ . Insert ( wait_id , 1 ) ;
}
}
// No deadlock if nobody is waiting on self.
if ( ! rev_wait_txn_map_ . Contains ( id ) ) {
return false ;
}
TransactionID next = wait_id ;
for ( int i = 0 ; i < txn - > GetDeadlockDetectDepth ( ) ; i + + ) {
const auto * next_ids = & wait_ids ;
for ( int tail = 0 , head = 0 ; head < txn - > GetDeadlockDetectDepth ( ) ; head + + ) {
uint i = 0 ;
if ( next_ids ) {
for ( ; i < next_ids - > size ( ) & & tail + i < txn - > GetDeadlockDetectDepth ( ) ;
i + + ) {
queue [ tail + i ] = ( * next_ids ) [ i ] ;
}
tail + = i ;
}
// No more items in the list, meaning no deadlock.
if ( tail = = head ) {
return false ;
}
auto next = queue [ head ] ;
if ( next = = id ) {
DecrementWaitersImpl ( txn , wait_id ) ;
DecrementWaitersImpl ( txn , wait_ids ) ;
return true ;
} else if ( ! wait_txn_map_ . Contains ( next ) ) {
return false ;
next_ids = nullptr ;
continue ;
} else {
next = wait_txn_map_ . Get ( next ) ;
next_ids = & wait_txn_map_ . Get ( next ) ;
}
}
// Wait cycle too big, just assume deadlock.
DecrementWaitersImpl ( txn , wait_id ) ;
DecrementWaitersImpl ( txn , wait_ids ) ;
return true ;
}
@ -404,25 +437,48 @@ Status TransactionLockMgr::AcquireLocked(LockMap* lock_map,
const std : : string & key , Env * env ,
const LockInfo & txn_lock_info ,
uint64_t * expire_time ,
TransactionID * txn_id ) {
autovector < TransactionID > * txn_ids ) {
assert ( txn_lock_info . txn_ids . size ( ) = = 1 ) ;
Status result ;
// Check if this key is already locked
if ( stripe - > keys . find ( key ) ! = stripe - > keys . end ( ) ) {
// Lock already held
LockInfo & lock_info = stripe - > keys . at ( key ) ;
if ( lock_info . txn_id ! = txn_lock_info . txn_id ) {
// locked by another txn. Check if it's expired
if ( IsLockExpired ( lock_info , env , expire_time ) ) {
assert ( lock_info . txn_ids . size ( ) = = 1 | | ! lock_info . exclusive ) ;
if ( lock_info . exclusive | | txn_lock_info . exclusive ) {
if ( lock_info . txn_ids . size ( ) = = 1 & &
lock_info . txn_ids [ 0 ] = = txn_lock_info . txn_ids [ 0 ] ) {
// The list contains one txn and we're it, so just take it.
lock_info . exclusive = txn_lock_info . exclusive ;
lock_info . expiration_time = txn_lock_info . expiration_time ;
} else {
// Check if it's expired. Skips over txn_lock_info.txn_ids[0] in case
// it's there for a shared lock with multiple holders which was not
// caught in the first case.
if ( IsLockExpired ( txn_lock_info . txn_ids [ 0 ] , lock_info , env ,
expire_time ) ) {
// lock is expired, can steal it
lock_info . txn_id = txn_lock_info . txn_id ;
lock_info . txn_ids = txn_lock_info . txn_ids ;
lock_info . exclusive = txn_lock_info . exclusive ;
lock_info . expiration_time = txn_lock_info . expiration_time ;
// lock_cnt does not change
} else {
result = Status : : TimedOut ( Status : : SubCode : : kLockTimeout ) ;
* txn_id = lock_info . txn_id ;
* txn_ids = lock_info . txn_ids ;
}
}
} else {
// We are requesting shared access to a shared lock, so just grant it.
lock_info . txn_ids . push_back ( txn_lock_info . txn_ids [ 0 ] ) ;
// Using std::max means that expiration time never goes down even when
// a transaction is removed from the list. The correct solution would be
// to track expiry for every transaction, but this would also work for
// now.
lock_info . expiration_time =
std : : max ( lock_info . expiration_time , txn_lock_info . expiration_time ) ;
}
} else { // Lock not held.
// Check lock limit
if ( max_num_locks_ > 0 & &
@ -442,40 +498,58 @@ Status TransactionLockMgr::AcquireLocked(LockMap* lock_map,
return result ;
}
void TransactionLockMgr : : UnLock ( TransactionImpl * txn , uint32_t column_family_id ,
const std : : string & key , Env * env ) {
std : : shared_ptr < LockMap > lock_map_ptr = GetLockMap ( column_family_id ) ;
LockMap * lock_map = lock_map_ptr . get ( ) ;
if ( lock_map = = nullptr ) {
// Column Family must have been dropped.
return ;
}
// Lock the mutex for the stripe that this key hashes to
size_t stripe_num = lock_map - > GetStripe ( key ) ;
assert ( lock_map - > lock_map_stripes_ . size ( ) > stripe_num ) ;
LockMapStripe * stripe = lock_map - > lock_map_stripes_ . at ( stripe_num ) ;
void TransactionLockMgr : : UnLockKey ( const TransactionImpl * txn ,
const std : : string & key ,
LockMapStripe * stripe , LockMap * lock_map ,
Env * env ) {
TransactionID txn_id = txn - > GetID ( ) ;
stripe - > stripe_mutex - > Lock ( ) ;
const auto & iter = stripe - > keys . find ( key ) ;
if ( iter ! = stripe - > keys . end ( ) & & iter - > second . txn_id = = txn_id ) {
auto stripe_iter = stripe - > keys . find ( key ) ;
if ( stripe_iter ! = stripe - > keys . end ( ) ) {
auto & txns = stripe_iter - > second . txn_ids ;
auto txn_it = std : : find ( txns . begin ( ) , txns . end ( ) , txn_id ) ;
// Found the key we locked. unlock it.
stripe - > keys . erase ( iter ) ;
if ( txn_it ! = txns . end ( ) ) {
if ( txns . size ( ) = = 1 ) {
stripe - > keys . erase ( stripe_iter ) ;
} else {
auto last_it = txns . end ( ) - 1 ;
if ( txn_it ! = last_it ) {
* txn_it = * last_it ;
}
txns . pop_back ( ) ;
}
if ( max_num_locks_ > 0 ) {
// Maintain lock count if there is a limit on the number of locks.
assert ( lock_map - > lock_cnt . load ( std : : memory_order_relaxed ) > 0 ) ;
lock_map - > lock_cnt - - ;
}
}
} else {
// This key is either not locked or locked by someone else. This should
// only happen if the unlocking transaction has expired.
assert ( txn - > GetExpirationTime ( ) > 0 & &
txn - > GetExpirationTime ( ) < env - > NowMicros ( ) ) ;
}
}
void TransactionLockMgr : : UnLock ( TransactionImpl * txn , uint32_t column_family_id ,
const std : : string & key , Env * env ) {
std : : shared_ptr < LockMap > lock_map_ptr = GetLockMap ( column_family_id ) ;
LockMap * lock_map = lock_map_ptr . get ( ) ;
if ( lock_map = = nullptr ) {
// Column Family must have been dropped.
return ;
}
// Lock the mutex for the stripe that this key hashes to
size_t stripe_num = lock_map - > GetStripe ( key ) ;
assert ( lock_map - > lock_map_stripes_ . size ( ) > stripe_num ) ;
LockMapStripe * stripe = lock_map - > lock_map_stripes_ . at ( stripe_num ) ;
stripe - > stripe_mutex - > Lock ( ) ;
UnLockKey ( txn , key , stripe , lock_map , env ) ;
stripe - > stripe_mutex - > UnLock ( ) ;
// Signal waiting threads to retry locking
@ -484,8 +558,6 @@ void TransactionLockMgr::UnLock(TransactionImpl* txn, uint32_t column_family_id,
void TransactionLockMgr : : UnLock ( const TransactionImpl * txn ,
const TransactionKeyMap * key_map , Env * env ) {
TransactionID txn_id = txn - > GetID ( ) ;
for ( auto & key_map_iter : * key_map ) {
uint32_t column_family_id = key_map_iter . first ;
auto & keys = key_map_iter . second ;
@ -520,22 +592,7 @@ void TransactionLockMgr::UnLock(const TransactionImpl* txn,
stripe - > stripe_mutex - > Lock ( ) ;
for ( const std : : string * key : stripe_keys ) {
const auto & iter = stripe - > keys . find ( * key ) ;
if ( iter ! = stripe - > keys . end ( ) & & iter - > second . txn_id = = txn_id ) {
// Found the key we locked. unlock it.
stripe - > keys . erase ( iter ) ;
if ( max_num_locks_ > 0 ) {
// Maintain lock count if there is a limit on the number of locks.
assert ( lock_map - > lock_cnt . load ( std : : memory_order_relaxed ) > 0 ) ;
lock_map - > lock_cnt - - ;
}
} else {
// This key is either not locked or locked by someone else. This
// should only
// happen if the unlocking transaction has expired.
assert ( txn - > GetExpirationTime ( ) > 0 & &
txn - > GetExpirationTime ( ) < env - > NowMicros ( ) ) ;
}
UnLockKey ( txn , * key , stripe , lock_map , env ) ;
}
stripe - > stripe_mutex - > UnLock ( ) ;
@ -565,7 +622,13 @@ TransactionLockMgr::LockStatusData TransactionLockMgr::GetLockStatusData() {
for ( const auto & j : stripes ) {
j - > stripe_mutex - > Lock ( ) ;
for ( const auto & it : j - > keys ) {
data . insert ( { i , { it . first , it . second . txn_id } } ) ;
struct KeyLockInfo info ;
info . exclusive = it . second . exclusive ;
info . key = it . first ;
for ( const auto & id : it . second . txn_ids ) {
info . ids . push_back ( id ) ;
}
data . insert ( { i , info } ) ;
}
}
}