From 45f213b55836372d1cb8b5e085db939f2745577e Mon Sep 17 00:00:00 2001 From: Abhishek Madan Date: Wed, 17 Oct 2018 11:45:30 -0700 Subject: [PATCH] Lazily initialize RangeDelAggregator stripe map entries (#4497) Summary: When there are no range deletions, flush and compaction perform a binary search on an effectively empty map every time they call ShouldDelete. This PR lazily initializes each stripe map entry so that the binary search can be elided in these cases. After this PR, the total amount of time spent in compactions is 52.541331s, and the total amount of time spent in flush is 5.532608s, the former of which is a significant improvement from the results after #4495. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4497 Differential Revision: D10428610 Pulled By: abhimadan fbshipit-source-id: 6f7e1ce3698fac3ef86d1197955e6b72e0931a0f --- db/db_range_del_test.cc | 32 ++++++++++++++++++++++ db/range_del_aggregator.cc | 56 ++++++++++++++++++++++++-------------- db/range_del_aggregator.h | 10 +++++-- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index f233c1331..4e97651b7 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -1357,6 +1357,38 @@ TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { db_->ReleaseSnapshot(snapshot); } +TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { + const int kFileBytes = 1 << 20; + + Options options = CurrentOptions(); + options.compression = kNoCompression; + options.disable_auto_compactions = true; + options.target_file_size_base = kFileBytes; + Reopen(options); + + ASSERT_OK(Put(Key(0), "a")); + const Snapshot* snapshot = db_->GetSnapshot(); + + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), + Key(10))); + + db_->Flush(FlushOptions()); + + ReadOptions read_opts; + read_opts.snapshot = snapshot; + auto* iter = db_->NewIterator(read_opts); + + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(Key(0), iter->key()); + + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + delete iter; + db_->ReleaseSnapshot(snapshot); +} + #endif // ROCKSDB_LITE } // namespace rocksdb diff --git a/db/range_del_aggregator.cc b/db/range_del_aggregator.cc index 58ab3f5e4..81e1da039 100644 --- a/db/range_del_aggregator.cc +++ b/db/range_del_aggregator.cc @@ -441,11 +441,9 @@ RangeDelAggregator::RangeDelAggregator(const InternalKeyComparator& icmp, void RangeDelAggregator::InitRep(const std::vector& snapshots) { assert(rep_ == nullptr); rep_.reset(new Rep()); - for (auto snapshot : snapshots) { - rep_->stripe_map_.emplace(snapshot, NewRangeDelMap()); - } + rep_->snapshots_ = snapshots; // Data newer than any snapshot falls in this catch-all stripe - rep_->stripe_map_.emplace(kMaxSequenceNumber, NewRangeDelMap()); + rep_->snapshots_.emplace_back(kMaxSequenceNumber); rep_->pinned_iters_mgr_.StartPinning(); } @@ -474,11 +472,11 @@ bool RangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) { assert(IsValueType(parsed.type)); assert(rep_ != nullptr); - auto& tombstone_map = GetRangeDelMap(parsed.sequence); - if (tombstone_map.IsEmpty()) { + auto* tombstone_map = GetRangeDelMapIfExists(parsed.sequence); + if (tombstone_map == nullptr || tombstone_map->IsEmpty()) { return false; } - return tombstone_map.ShouldDelete(parsed, mode); + return tombstone_map->ShouldDelete(parsed, mode); } bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, @@ -492,7 +490,7 @@ bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, ParsedInternalKey start_ikey(start, kMaxSequenceNumber, kMaxValue); ParsedInternalKey end_ikey(end, 0, static_cast(0)); for (const auto& stripe : rep_->stripe_map_) { - if (stripe.second->IsRangeOverlapped(start_ikey, end_ikey)) { + if (stripe.second.first->IsRangeOverlapped(start_ikey, end_ikey)) { return true; } } @@ -587,24 +585,42 @@ void RangeDelAggregator::InvalidateRangeDelMapPositions() { return; } for (auto& stripe : rep_->stripe_map_) { - stripe.second->InvalidatePosition(); + stripe.second.first->InvalidatePosition(); } } -RangeDelMap& RangeDelAggregator::GetRangeDelMap(SequenceNumber seq) { +RangeDelMap* RangeDelAggregator::GetRangeDelMapIfExists(SequenceNumber seq) { assert(rep_ != nullptr); // The stripe includes seqnum for the snapshot above and excludes seqnum for // the snapshot below. - StripeMap::iterator iter; - if (seq > 0) { - // upper_bound() checks strict inequality so need to subtract one - iter = rep_->stripe_map_.upper_bound(seq - 1); - } else { - iter = rep_->stripe_map_.begin(); + if (rep_->stripe_map_.empty()) { + return nullptr; } + StripeMap::iterator iter = rep_->stripe_map_.lower_bound(seq); + if (iter == rep_->stripe_map_.end()) { + return nullptr; + } + size_t snapshot_idx = iter->second.second; + if (snapshot_idx > 0 && seq <= rep_->snapshots_[snapshot_idx - 1]) { + return nullptr; + } + return iter->second.first.get(); +} + +RangeDelMap& RangeDelAggregator::GetRangeDelMap(SequenceNumber seq) { + assert(rep_ != nullptr); + // The stripe includes seqnum for the snapshot above and excludes seqnum for + // the snapshot below. + std::vector::iterator iter = + std::lower_bound(rep_->snapshots_.begin(), rep_->snapshots_.end(), seq); // catch-all stripe justifies this assertion in either of above cases - assert(iter != rep_->stripe_map_.end()); - return *iter->second; + assert(iter != rep_->snapshots_.end()); + if (rep_->stripe_map_.find(*iter) == rep_->stripe_map_.end()) { + rep_->stripe_map_.emplace( + *iter, + std::make_pair(NewRangeDelMap(), iter - rep_->snapshots_.begin())); + } + return *rep_->stripe_map_[*iter].first; } bool RangeDelAggregator::IsEmpty() { @@ -612,7 +628,7 @@ bool RangeDelAggregator::IsEmpty() { return true; } for (const auto& stripe : rep_->stripe_map_) { - if (!stripe.second->IsEmpty()) { + if (!stripe.second.first->IsEmpty()) { return false; } } @@ -696,7 +712,7 @@ std::unique_ptr RangeDelAggregator::NewIterator() { new MergingRangeDelIter(icmp_.user_comparator())); if (rep_ != nullptr) { for (const auto& stripe : rep_->stripe_map_) { - iter->AddIterator(stripe.second->NewIterator()); + iter->AddIterator(stripe.second.first->NewIterator()); } } return std::move(iter); diff --git a/db/range_del_aggregator.h b/db/range_del_aggregator.h index 459de866d..8a89ec9f1 100644 --- a/db/range_del_aggregator.h +++ b/db/range_del_aggregator.h @@ -200,10 +200,15 @@ class RangeDelAggregator { private: // Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e., - // their seqnums are greater than the next smaller snapshot's seqnum. - typedef std::map> StripeMap; + // their seqnums are greater than the next smaller snapshot's seqnum, and the + // corresponding index into the list of snapshots. Each entry is lazily + // initialized. + typedef std::map, size_t>> + StripeMap; struct Rep { + std::vector snapshots_; StripeMap stripe_map_; PinnedIteratorsManager pinned_iters_mgr_; std::list pinned_slices_; @@ -215,6 +220,7 @@ class RangeDelAggregator { void InitRep(const std::vector& snapshots); std::unique_ptr NewRangeDelMap(); + RangeDelMap* GetRangeDelMapIfExists(SequenceNumber seq); RangeDelMap& GetRangeDelMap(SequenceNumber seq); SequenceNumber upper_bound_;