From dcb73e773522244282159f5a7ee7a26101377d8c Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Mon, 4 Feb 2019 12:53:55 -0800 Subject: [PATCH] WritePrepared: release snapshot equal to max (#4944) Summary: WritePrepared maintains a list of snapshots that are <= max_evicted_seq_. Based on this list, old_commit_map_ is updated if an evicted commit entry overlaps with such snapshot. Such lists are garbage collected when the release of snapshot is reported to WritePreparedTxnDB, which is the next time max_evicted_seq_ is updated and yet the snapshot is not found is the list returned from DB. This logic was broken since ReleaseSnapshotInternal was using "< max_evicted_seq_" to cleanup old_commit_map_, which would leave a snapshot uncleaned if it "= max_evicted_seq_". The patch fixes that and adds a unit test to check for the bug. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4944 Differential Revision: D13945000 Pulled By: maysamyabandeh fbshipit-source-id: 0c904294f735911f52348a148bf1f945282fc17c --- .../write_prepared_transaction_test.cc | 31 +++++++++++++++++++ .../transactions/write_prepared_txn_db.cc | 6 ++-- .../transactions/write_prepared_txn_db.h | 1 + 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/utilities/transactions/write_prepared_transaction_test.cc b/utilities/transactions/write_prepared_transaction_test.cc index e4badf793..6af9ac085 100644 --- a/utilities/transactions/write_prepared_transaction_test.cc +++ b/utilities/transactions/write_prepared_transaction_test.cc @@ -1239,6 +1239,37 @@ TEST_P(WritePreparedTransactionTest, MaxCatchupWithNewSnapshot) { db->ReleaseSnapshot(snap); } +// Check that old_commit_map_ cleanup works correctly if the snapshot equals +// max_evicted_seq_. +TEST_P(WritePreparedTransactionTest, CleanupSnapshotEqualToMax) { + const size_t snapshot_cache_bits = 7; // same as default + const size_t commit_cache_bits = 0; // only 1 entry => frequent eviction + DestroyAndReopenWithExtraOptions(snapshot_cache_bits, commit_cache_bits); + WriteOptions woptions; + WritePreparedTxnDB* wp_db = dynamic_cast(db); + // Insert something to increase seq + ASSERT_OK(db->Put(woptions, "key", "value")); + auto snap = db->GetSnapshot(); + auto snap_seq = snap->GetSequenceNumber(); + // Another insert should trigger eviction + load snapshot from db + ASSERT_OK(db->Put(woptions, "key", "value")); + // This is the scenario that we check agaisnt + ASSERT_EQ(snap_seq, wp_db->max_evicted_seq_); + // old_commit_map_ now has some data that needs gc + ASSERT_EQ(1, wp_db->snapshots_total_); + ASSERT_EQ(1, wp_db->old_commit_map_.size()); + + db->ReleaseSnapshot(snap); + + // Another insert should trigger eviction + load snapshot from db + ASSERT_OK(db->Put(woptions, "key", "value")); + + // the snapshot and related metadata must be properly garbage collected + ASSERT_EQ(0, wp_db->snapshots_total_); + ASSERT_TRUE(wp_db->snapshots_all_.empty()); + ASSERT_EQ(0, wp_db->old_commit_map_.size()); +} + TEST_P(WritePreparedTransactionTest, AdvanceSeqByOne) { auto snap = db->GetSnapshot(); auto seq1 = snap->GetSequenceNumber(); diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index c0cf42550..a86e3b004 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -710,9 +710,9 @@ const std::vector WritePreparedTxnDB::GetSnapshotListFromDB( void WritePreparedTxnDB::ReleaseSnapshotInternal( const SequenceNumber snap_seq) { - // relax is enough since max increases monotonically, i.e., if snap_seq < - // old_max => snap_seq < new_max as well. - if (snap_seq < max_evicted_seq_.load(std::memory_order_relaxed)) { + // TODO(myabandeh): relax should enough since the synchronizatin is already + // done by snapshots_mutex_ under which this function is called. + if (snap_seq <= max_evicted_seq_.load(std::memory_order_acquire)) { // Then this is a rare case that transaction did not finish before max // advances. It is expected for a few read-only backup snapshots. For such // snapshots we might have kept around a couple of entries in the diff --git a/utilities/transactions/write_prepared_txn_db.h b/utilities/transactions/write_prepared_txn_db.h index d053b80a8..60fa28f62 100644 --- a/utilities/transactions/write_prepared_txn_db.h +++ b/utilities/transactions/write_prepared_txn_db.h @@ -414,6 +414,7 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { WritePreparedTransactionTest_AdvanceMaxEvictedSeqWithDuplicatesTest_Test; friend class WritePreparedTransactionTest_AdvanceSeqByOne_Test; friend class WritePreparedTransactionTest_BasicRecoveryTest_Test; + friend class WritePreparedTransactionTest_CleanupSnapshotEqualToMax_Test; friend class WritePreparedTransactionTest_DoubleSnapshot_Test; friend class WritePreparedTransactionTest_IsInSnapshotEmptyMapTest_Test; friend class WritePreparedTransactionTest_IsInSnapshotReleased_Test;