From d56ac22b44fdf6ce5e0e6dcad58fa2faa2dd3b93 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Wed, 9 Jan 2019 16:09:36 -0800 Subject: [PATCH] Remove duplicates from SnapshotList::GetAll (#4860) Summary: The vector returned by SnapshotList::GetAll could have duplicate entries if two separate snapshots have the same sequence number. However, when this vector is used in compaction the duplicate entires are of no use and could be safely ignored. Moreover not having duplicate entires simplifies reasoning in the compaction_iterator.cc code. For example when searching for the previous_snap we currently use the snapshot before the current one but the way the code uses that it expects it to be also less than the current snapshot, which would be simpler to read if there is no duplicate entry in the snapshot list. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4860 Differential Revision: D13615502 Pulled By: maysamyabandeh fbshipit-source-id: d45bf01213ead5f39db811f951802da6fcc3332b --- db/compaction_iterator.cc | 2 +- db/db_test2.cc | 34 ++++++++++++++++++++++++++++++++++ db/flush_job_test.cc | 8 ++++---- db/snapshot_impl.h | 7 +++++-- 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/db/compaction_iterator.cc b/db/compaction_iterator.cc index 7e8c8061e..413ecab08 100644 --- a/db/compaction_iterator.cc +++ b/db/compaction_iterator.cc @@ -80,7 +80,7 @@ CompactionIterator::CompactionIterator( #ifndef NDEBUG // findEarliestVisibleSnapshot assumes this ordering. for (size_t i = 1; i < snapshots_->size(); ++i) { - assert(snapshots_->at(i - 1) <= snapshots_->at(i)); + assert(snapshots_->at(i - 1) < snapshots_->at(i)); } #endif if (compaction_filter_ != nullptr) { diff --git a/db/db_test2.cc b/db/db_test2.cc index 6e71d4437..466159e31 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1346,6 +1346,40 @@ TEST_F(DBTest2, FirstSnapshotTest) { db_->ReleaseSnapshot(s1); } +#ifndef ROCKSDB_LITE +TEST_F(DBTest2, DuplicateSnapshot) { + Options options; + options = CurrentOptions(options); + std::vector snapshots; + DBImpl* dbi = reinterpret_cast(db_); + SequenceNumber oldest_ww_snap, first_ww_snap; + + Put("k", "v"); // inc seq + snapshots.push_back(db_->GetSnapshot()); + snapshots.push_back(db_->GetSnapshot()); + Put("k", "v"); // inc seq + snapshots.push_back(db_->GetSnapshot()); + snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary()); + first_ww_snap = snapshots.back()->GetSequenceNumber(); + Put("k", "v"); // inc seq + snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary()); + snapshots.push_back(db_->GetSnapshot()); + Put("k", "v"); // inc seq + snapshots.push_back(db_->GetSnapshot()); + + { + InstrumentedMutexLock l(dbi->mutex()); + auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap); + ASSERT_EQ(seqs.size(), 4); // duplicates are not counted + ASSERT_EQ(oldest_ww_snap, first_ww_snap); + } + + for (auto s : snapshots) { + db_->ReleaseSnapshot(s); + } +} +#endif // ROCKSDB_LITE + class PinL0IndexAndFilterBlocksTest : public DBTestBase, public testing::WithParamInterface> { diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 5ac5f2f93..30a52faa8 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -365,17 +365,17 @@ TEST_F(FlushJobTest, Snapshots) { auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); - std::vector snapshots; std::set snapshots_set; int keys = 10000; int max_inserts_per_keys = 8; Random rnd(301); for (int i = 0; i < keys / 2; ++i) { - snapshots.push_back(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1); - snapshots_set.insert(snapshots.back()); + snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1); } - std::sort(snapshots.begin(), snapshots.end()); + // set has already removed the duplicate snapshots + std::vector snapshots(snapshots_set.begin(), + snapshots_set.end()); new_mem->Ref(); SequenceNumber current_seqno = 0; diff --git a/db/snapshot_impl.h b/db/snapshot_impl.h index edaabade7..3483a26c6 100644 --- a/db/snapshot_impl.h +++ b/db/snapshot_impl.h @@ -86,7 +86,7 @@ class SnapshotList { } // retrieve all snapshot numbers up until max_seq. They are sorted in - // ascending order. + // ascending order (with no duplicates). std::vector GetAll( SequenceNumber* oldest_write_conflict_snapshot = nullptr, const SequenceNumber& max_seq = kMaxSequenceNumber) const { @@ -104,7 +104,10 @@ class SnapshotList { if (s->next_->number_ > max_seq) { break; } - ret.push_back(s->next_->number_); + // Avoid duplicates + if (ret.empty() || ret.back() != s->next_->number_) { + ret.push_back(s->next_->number_); + } if (oldest_write_conflict_snapshot != nullptr && *oldest_write_conflict_snapshot == kMaxSequenceNumber &&