WritePrepared: release snapshot equal to max (#4944)

Summary:
WritePrepared maintains a list of snapshots that are <= max_evicted_seq_. Based on this list, old_commit_map_ is updated if an evicted commit entry overlaps with such snapshot. Such lists are garbage collected when the release of snapshot is reported to WritePreparedTxnDB, which is the next time max_evicted_seq_ is updated and yet the snapshot is not found is the list returned from DB. This logic was broken since ReleaseSnapshotInternal was using "< max_evicted_seq_" to cleanup old_commit_map_, which would leave a snapshot uncleaned if it "= max_evicted_seq_". The patch fixes that and adds a unit test to check for the bug.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4944

Differential Revision: D13945000

Pulled By: maysamyabandeh

fbshipit-source-id: 0c904294f735911f52348a148bf1f945282fc17c
main
Maysam Yabandeh 5 years ago committed by Facebook Github Bot
parent 30468d8eb4
commit dcb73e7735
  1. 31
      utilities/transactions/write_prepared_transaction_test.cc
  2. 6
      utilities/transactions/write_prepared_txn_db.cc
  3. 1
      utilities/transactions/write_prepared_txn_db.h

@ -1239,6 +1239,37 @@ TEST_P(WritePreparedTransactionTest, MaxCatchupWithNewSnapshot) {
db->ReleaseSnapshot(snap);
}
// Check that old_commit_map_ cleanup works correctly if the snapshot equals
// max_evicted_seq_.
TEST_P(WritePreparedTransactionTest, CleanupSnapshotEqualToMax) {
const size_t snapshot_cache_bits = 7; // same as default
const size_t commit_cache_bits = 0; // only 1 entry => frequent eviction
DestroyAndReopenWithExtraOptions(snapshot_cache_bits, commit_cache_bits);
WriteOptions woptions;
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
// Insert something to increase seq
ASSERT_OK(db->Put(woptions, "key", "value"));
auto snap = db->GetSnapshot();
auto snap_seq = snap->GetSequenceNumber();
// Another insert should trigger eviction + load snapshot from db
ASSERT_OK(db->Put(woptions, "key", "value"));
// This is the scenario that we check agaisnt
ASSERT_EQ(snap_seq, wp_db->max_evicted_seq_);
// old_commit_map_ now has some data that needs gc
ASSERT_EQ(1, wp_db->snapshots_total_);
ASSERT_EQ(1, wp_db->old_commit_map_.size());
db->ReleaseSnapshot(snap);
// Another insert should trigger eviction + load snapshot from db
ASSERT_OK(db->Put(woptions, "key", "value"));
// the snapshot and related metadata must be properly garbage collected
ASSERT_EQ(0, wp_db->snapshots_total_);
ASSERT_TRUE(wp_db->snapshots_all_.empty());
ASSERT_EQ(0, wp_db->old_commit_map_.size());
}
TEST_P(WritePreparedTransactionTest, AdvanceSeqByOne) {
auto snap = db->GetSnapshot();
auto seq1 = snap->GetSequenceNumber();

@ -710,9 +710,9 @@ const std::vector<SequenceNumber> WritePreparedTxnDB::GetSnapshotListFromDB(
void WritePreparedTxnDB::ReleaseSnapshotInternal(
const SequenceNumber snap_seq) {
// relax is enough since max increases monotonically, i.e., if snap_seq <
// old_max => snap_seq < new_max as well.
if (snap_seq < max_evicted_seq_.load(std::memory_order_relaxed)) {
// TODO(myabandeh): relax should enough since the synchronizatin is already
// done by snapshots_mutex_ under which this function is called.
if (snap_seq <= max_evicted_seq_.load(std::memory_order_acquire)) {
// Then this is a rare case that transaction did not finish before max
// advances. It is expected for a few read-only backup snapshots. For such
// snapshots we might have kept around a couple of entries in the

@ -414,6 +414,7 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
WritePreparedTransactionTest_AdvanceMaxEvictedSeqWithDuplicatesTest_Test;
friend class WritePreparedTransactionTest_AdvanceSeqByOne_Test;
friend class WritePreparedTransactionTest_BasicRecoveryTest_Test;
friend class WritePreparedTransactionTest_CleanupSnapshotEqualToMax_Test;
friend class WritePreparedTransactionTest_DoubleSnapshot_Test;
friend class WritePreparedTransactionTest_IsInSnapshotEmptyMapTest_Test;
friend class WritePreparedTransactionTest_IsInSnapshotReleased_Test;

Loading…
Cancel
Save