|
|
@ -12,6 +12,7 @@ |
|
|
|
#include "utilities/transactions/write_prepared_txn_db.h" |
|
|
|
#include "utilities/transactions/write_prepared_txn_db.h" |
|
|
|
|
|
|
|
|
|
|
|
#include <inttypes.h> |
|
|
|
#include <inttypes.h> |
|
|
|
|
|
|
|
#include <algorithm> |
|
|
|
#include <string> |
|
|
|
#include <string> |
|
|
|
#include <unordered_set> |
|
|
|
#include <unordered_set> |
|
|
|
#include <vector> |
|
|
|
#include <vector> |
|
|
@ -234,8 +235,8 @@ bool WritePreparedTxnDB::IsInSnapshot(uint64_t prep_seq, |
|
|
|
// delayed_prepared_. Also we move evicted entries from commit cache to
|
|
|
|
// delayed_prepared_. Also we move evicted entries from commit cache to
|
|
|
|
// old_commit_map_ if it overlaps with any snapshot. Since prep_seq <=
|
|
|
|
// old_commit_map_ if it overlaps with any snapshot. Since prep_seq <=
|
|
|
|
// max_evicted_seq_, we have three cases: i) in delayed_prepared_, ii) in
|
|
|
|
// max_evicted_seq_, we have three cases: i) in delayed_prepared_, ii) in
|
|
|
|
// old_commit_map_, iii) committed with no conflict with any snapshot (i)
|
|
|
|
// old_commit_map_, iii) committed with no conflict with any snapshot. Case
|
|
|
|
// delayed_prepared_ is checked above
|
|
|
|
// (i) delayed_prepared_ is checked above
|
|
|
|
if (max_evicted_seq < snapshot_seq) { // then (ii) cannot be the case
|
|
|
|
if (max_evicted_seq < snapshot_seq) { // then (ii) cannot be the case
|
|
|
|
// only (iii) is the case: committed
|
|
|
|
// only (iii) is the case: committed
|
|
|
|
// commit_seq <= max_evicted_seq_ < snapshot_seq => commit_seq <
|
|
|
|
// commit_seq <= max_evicted_seq_ < snapshot_seq => commit_seq <
|
|
|
@ -255,12 +256,17 @@ bool WritePreparedTxnDB::IsInSnapshot(uint64_t prep_seq, |
|
|
|
return true; |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
{ |
|
|
|
{ |
|
|
|
// We should not normally reach here
|
|
|
|
// We should not normally reach here unless sapshot_seq is old. This is a
|
|
|
|
// TODO(myabandeh): check only if snapshot_seq is in the list of snaphots
|
|
|
|
// rare case and it is ok to pay the cost of mutex ReadLock for such old,
|
|
|
|
|
|
|
|
// reading transactions.
|
|
|
|
ReadLock rl(&old_commit_map_mutex_); |
|
|
|
ReadLock rl(&old_commit_map_mutex_); |
|
|
|
auto old_commit_entry = old_commit_map_.find(prep_seq); |
|
|
|
auto prep_set_entry = old_commit_map_.find(snapshot_seq); |
|
|
|
if (old_commit_entry == old_commit_map_.end() || |
|
|
|
bool found = prep_set_entry != old_commit_map_.end(); |
|
|
|
old_commit_entry->second <= snapshot_seq) { |
|
|
|
if (found) { |
|
|
|
|
|
|
|
auto& vec = prep_set_entry->second; |
|
|
|
|
|
|
|
found = std::binary_search(vec.begin(), vec.end(), prep_seq); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (!found) { |
|
|
|
ROCKSDB_LOG_DETAILS( |
|
|
|
ROCKSDB_LOG_DETAILS( |
|
|
|
info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32, |
|
|
|
info_log_, "IsInSnapshot %" PRIu64 " in %" PRIu64 " returns %" PRId32, |
|
|
|
prep_seq, snapshot_seq, 1); |
|
|
|
prep_seq, snapshot_seq, 1); |
|
|
@ -322,6 +328,9 @@ void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq, |
|
|
|
bool to_be_evicted = GetCommitEntry(indexed_seq, &evicted_64b, &evicted); |
|
|
|
bool to_be_evicted = GetCommitEntry(indexed_seq, &evicted_64b, &evicted); |
|
|
|
if (to_be_evicted) { |
|
|
|
if (to_be_evicted) { |
|
|
|
auto prev_max = max_evicted_seq_.load(std::memory_order_acquire); |
|
|
|
auto prev_max = max_evicted_seq_.load(std::memory_order_acquire); |
|
|
|
|
|
|
|
ROCKSDB_LOG_DETAILS(info_log_, |
|
|
|
|
|
|
|
"Evicting %" PRIu64 ",%" PRIu64 " with max %" PRIu64, |
|
|
|
|
|
|
|
evicted.prep_seq, evicted.commit_seq, prev_max); |
|
|
|
if (prev_max < evicted.commit_seq) { |
|
|
|
if (prev_max < evicted.commit_seq) { |
|
|
|
// Inc max in larger steps to avoid frequent updates
|
|
|
|
// Inc max in larger steps to avoid frequent updates
|
|
|
|
auto max_evicted_seq = evicted.commit_seq + INC_STEP_FOR_MAX_EVICTED; |
|
|
|
auto max_evicted_seq = evicted.commit_seq + INC_STEP_FOR_MAX_EVICTED; |
|
|
@ -431,6 +440,36 @@ const std::vector<SequenceNumber> WritePreparedTxnDB::GetSnapshotListFromDB( |
|
|
|
return db_impl_->snapshots().GetAll(nullptr, max); |
|
|
|
return db_impl_->snapshots().GetAll(nullptr, max); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void WritePreparedTxnDB::ReleaseSnapshot(const Snapshot* snapshot) { |
|
|
|
|
|
|
|
auto snap_seq = snapshot->GetSequenceNumber(); |
|
|
|
|
|
|
|
ReleaseSnapshotInternal(snap_seq); |
|
|
|
|
|
|
|
db_impl_->ReleaseSnapshot(snapshot); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void WritePreparedTxnDB::ReleaseSnapshotInternal( |
|
|
|
|
|
|
|
const SequenceNumber snap_seq) { |
|
|
|
|
|
|
|
// relax is enough since max increases monotonically, i.e., if snap_seq <
|
|
|
|
|
|
|
|
// old_max => snap_seq < new_max as well.
|
|
|
|
|
|
|
|
if (snap_seq < max_evicted_seq_.load(std::memory_order_relaxed)) { |
|
|
|
|
|
|
|
// Then this is a rare case that transaction did not finish before max
|
|
|
|
|
|
|
|
// advances. It is expected for a few read-only backup snapshots. For such
|
|
|
|
|
|
|
|
// snapshots we might have kept around a couple of entries in the
|
|
|
|
|
|
|
|
// old_commit_map_. Check and do garbage collection if that is the case.
|
|
|
|
|
|
|
|
bool need_gc = false; |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
ReadLock rl(&old_commit_map_mutex_); |
|
|
|
|
|
|
|
auto prep_set_entry = old_commit_map_.find(snap_seq); |
|
|
|
|
|
|
|
need_gc = prep_set_entry != old_commit_map_.end(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (need_gc) { |
|
|
|
|
|
|
|
WriteLock wl(&old_commit_map_mutex_); |
|
|
|
|
|
|
|
old_commit_map_.erase(snap_seq); |
|
|
|
|
|
|
|
old_commit_map_empty_.store(old_commit_map_.empty(), |
|
|
|
|
|
|
|
std::memory_order_release); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void WritePreparedTxnDB::UpdateSnapshots( |
|
|
|
void WritePreparedTxnDB::UpdateSnapshots( |
|
|
|
const std::vector<SequenceNumber>& snapshots, |
|
|
|
const std::vector<SequenceNumber>& snapshots, |
|
|
|
const SequenceNumber& version) { |
|
|
|
const SequenceNumber& version) { |
|
|
@ -541,8 +580,8 @@ void WritePreparedTxnDB::CheckAgainstSnapshots(const CommitEntry& evicted) { |
|
|
|
bool WritePreparedTxnDB::MaybeUpdateOldCommitMap( |
|
|
|
bool WritePreparedTxnDB::MaybeUpdateOldCommitMap( |
|
|
|
const uint64_t& prep_seq, const uint64_t& commit_seq, |
|
|
|
const uint64_t& prep_seq, const uint64_t& commit_seq, |
|
|
|
const uint64_t& snapshot_seq, const bool next_is_larger = true) { |
|
|
|
const uint64_t& snapshot_seq, const bool next_is_larger = true) { |
|
|
|
// If we do not store an entry in old_commit_map we assume it is committed in
|
|
|
|
// If we do not store an entry in old_commit_map_ we assume it is committed in
|
|
|
|
// all snapshots. if commit_seq <= snapshot_seq, it is considered already in
|
|
|
|
// all snapshots. If commit_seq <= snapshot_seq, it is considered already in
|
|
|
|
// the snapshot so we need not to keep the entry around for this snapshot.
|
|
|
|
// the snapshot so we need not to keep the entry around for this snapshot.
|
|
|
|
if (commit_seq <= snapshot_seq) { |
|
|
|
if (commit_seq <= snapshot_seq) { |
|
|
|
// continue the search if the next snapshot could be smaller than commit_seq
|
|
|
|
// continue the search if the next snapshot could be smaller than commit_seq
|
|
|
@ -552,9 +591,11 @@ bool WritePreparedTxnDB::MaybeUpdateOldCommitMap( |
|
|
|
if (prep_seq <= snapshot_seq) { // overlapping range
|
|
|
|
if (prep_seq <= snapshot_seq) { // overlapping range
|
|
|
|
WriteLock wl(&old_commit_map_mutex_); |
|
|
|
WriteLock wl(&old_commit_map_mutex_); |
|
|
|
old_commit_map_empty_.store(false, std::memory_order_release); |
|
|
|
old_commit_map_empty_.store(false, std::memory_order_release); |
|
|
|
old_commit_map_[prep_seq] = commit_seq; |
|
|
|
auto& vec = old_commit_map_[snapshot_seq]; |
|
|
|
// Storing once is enough. No need to check it for other snapshots.
|
|
|
|
vec.insert(std::upper_bound(vec.begin(), vec.end(), prep_seq), prep_seq); |
|
|
|
return false; |
|
|
|
// We need to store it once for each overlapping snapshot. Returning true to
|
|
|
|
|
|
|
|
// continue the search if there is more overlapping snapshot.
|
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
// continue the search if the next snapshot could be larger than prep_seq
|
|
|
|
// continue the search if the next snapshot could be larger than prep_seq
|
|
|
|
return next_is_larger; |
|
|
|
return next_is_larger; |
|
|
|