Remove duplicates from SnapshotList::GetAll (#4860)

Summary:
The vector returned by SnapshotList::GetAll could have duplicate entries if two separate snapshots have the same sequence number. However, when this vector is used in compaction the duplicate entires are of no use and could be safely ignored. Moreover not having duplicate entires simplifies reasoning in the compaction_iterator.cc code. For example when searching for the previous_snap we currently use the snapshot before the current one but the way the code uses that it expects it to be also less than the current snapshot, which would be simpler to read if there is no duplicate entry in the snapshot list.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4860

Differential Revision: D13615502

Pulled By: maysamyabandeh

fbshipit-source-id: d45bf01213ead5f39db811f951802da6fcc3332b
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 75714b4c08
commit d56ac22b44
  1. 2
      db/compaction_iterator.cc
  2. 34
      db/db_test2.cc
  3. 8
      db/flush_job_test.cc
  4. 5
      db/snapshot_impl.h

@ -80,7 +80,7 @@ CompactionIterator::CompactionIterator(
#ifndef NDEBUG #ifndef NDEBUG
// findEarliestVisibleSnapshot assumes this ordering. // findEarliestVisibleSnapshot assumes this ordering.
for (size_t i = 1; i < snapshots_->size(); ++i) { for (size_t i = 1; i < snapshots_->size(); ++i) {
assert(snapshots_->at(i - 1) <= snapshots_->at(i)); assert(snapshots_->at(i - 1) < snapshots_->at(i));
} }
#endif #endif
if (compaction_filter_ != nullptr) { if (compaction_filter_ != nullptr) {

@ -1346,6 +1346,40 @@ TEST_F(DBTest2, FirstSnapshotTest) {
db_->ReleaseSnapshot(s1); db_->ReleaseSnapshot(s1);
} }
#ifndef ROCKSDB_LITE
TEST_F(DBTest2, DuplicateSnapshot) {
Options options;
options = CurrentOptions(options);
std::vector<const Snapshot*> snapshots;
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
SequenceNumber oldest_ww_snap, first_ww_snap;
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
snapshots.push_back(db_->GetSnapshot());
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
first_ww_snap = snapshots.back()->GetSequenceNumber();
Put("k", "v"); // inc seq
snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
snapshots.push_back(db_->GetSnapshot());
Put("k", "v"); // inc seq
snapshots.push_back(db_->GetSnapshot());
{
InstrumentedMutexLock l(dbi->mutex());
auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap);
ASSERT_EQ(seqs.size(), 4); // duplicates are not counted
ASSERT_EQ(oldest_ww_snap, first_ww_snap);
}
for (auto s : snapshots) {
db_->ReleaseSnapshot(s);
}
}
#endif // ROCKSDB_LITE
class PinL0IndexAndFilterBlocksTest class PinL0IndexAndFilterBlocksTest
: public DBTestBase, : public DBTestBase,
public testing::WithParamInterface<std::tuple<bool, bool>> { public testing::WithParamInterface<std::tuple<bool, bool>> {

@ -365,17 +365,17 @@ TEST_F(FlushJobTest, Snapshots) {
auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
kMaxSequenceNumber); kMaxSequenceNumber);
std::vector<SequenceNumber> snapshots;
std::set<SequenceNumber> snapshots_set; std::set<SequenceNumber> snapshots_set;
int keys = 10000; int keys = 10000;
int max_inserts_per_keys = 8; int max_inserts_per_keys = 8;
Random rnd(301); Random rnd(301);
for (int i = 0; i < keys / 2; ++i) { for (int i = 0; i < keys / 2; ++i) {
snapshots.push_back(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1); snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1);
snapshots_set.insert(snapshots.back());
} }
std::sort(snapshots.begin(), snapshots.end()); // set has already removed the duplicate snapshots
std::vector<SequenceNumber> snapshots(snapshots_set.begin(),
snapshots_set.end());
new_mem->Ref(); new_mem->Ref();
SequenceNumber current_seqno = 0; SequenceNumber current_seqno = 0;

@ -86,7 +86,7 @@ class SnapshotList {
} }
// retrieve all snapshot numbers up until max_seq. They are sorted in // retrieve all snapshot numbers up until max_seq. They are sorted in
// ascending order. // ascending order (with no duplicates).
std::vector<SequenceNumber> GetAll( std::vector<SequenceNumber> GetAll(
SequenceNumber* oldest_write_conflict_snapshot = nullptr, SequenceNumber* oldest_write_conflict_snapshot = nullptr,
const SequenceNumber& max_seq = kMaxSequenceNumber) const { const SequenceNumber& max_seq = kMaxSequenceNumber) const {
@ -104,7 +104,10 @@ class SnapshotList {
if (s->next_->number_ > max_seq) { if (s->next_->number_ > max_seq) {
break; break;
} }
// Avoid duplicates
if (ret.empty() || ret.back() != s->next_->number_) {
ret.push_back(s->next_->number_); ret.push_back(s->next_->number_);
}
if (oldest_write_conflict_snapshot != nullptr && if (oldest_write_conflict_snapshot != nullptr &&
*oldest_write_conflict_snapshot == kMaxSequenceNumber && *oldest_write_conflict_snapshot == kMaxSequenceNumber &&

Loading…
Cancel
Save