From cad248f5c6859443790d2cf40bd57131d496f830 Mon Sep 17 00:00:00 2001 From: Abhishek Madan Date: Tue, 11 Dec 2018 11:44:24 -0800 Subject: [PATCH] Prepare FragmentedRangeTombstoneIterator for use in compaction (#4740) Summary: To support the flush/compaction use cases of RangeDelAggregator in v2, FragmentedRangeTombstoneIterator now supports dropping tombstones that cannot be read in the compaction output file. Furthermore, FragmentedRangeTombstoneIterator supports the "snapshot striping" use case by allowing an iterator to be split by a list of snapshots. RangeDelAggregatorV2 will use these changes in a follow-up change. In the process of making these changes, other miscellaneous cleanups were also done in these files. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4740 Differential Revision: D13287382 Pulled By: abhimadan fbshipit-source-id: f5aeb03e1b3058049b80c02a558ee48f723fa48c --- db/memtable.cc | 2 +- db/range_del_aggregator_bench.cc | 4 +- db/range_del_aggregator_v2_test.cc | 34 ++-- db/range_tombstone_fragmenter.cc | 122 ++++++++++--- db/range_tombstone_fragmenter.h | 48 +++-- db/range_tombstone_fragmenter_test.cc | 247 ++++++++++++++++++++------ table/block_based_table_reader.cc | 2 +- 7 files changed, 341 insertions(+), 118 deletions(-) diff --git a/db/memtable.cc b/db/memtable.cc index c0166bb40..51b54d636 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -428,7 +428,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator( comparator_.comparator); auto* fragmented_iter = new FragmentedRangeTombstoneIterator( - fragmented_tombstone_list, read_seq, comparator_.comparator); + fragmented_tombstone_list, comparator_.comparator, read_seq); return fragmented_iter; } diff --git a/db/range_del_aggregator_bench.cc b/db/range_del_aggregator_bench.cc index 01974702b..9fdcefc39 100644 --- a/db/range_del_aggregator_bench.cc +++ b/db/range_del_aggregator_bench.cc @@ -220,8 +220,8 @@ int main(int argc, char** argv) { std::unique_ptr fragmented_range_del_iter( new rocksdb::FragmentedRangeTombstoneIterator( - fragmented_range_tombstone_lists.back().get(), - rocksdb::kMaxSequenceNumber, icmp)); + fragmented_range_tombstone_lists.back().get(), icmp, + rocksdb::kMaxSequenceNumber)); if (FLAGS_use_v2_aggregator) { rocksdb::StopWatchNano stop_watch_add_tombstones( diff --git a/db/range_del_aggregator_v2_test.cc b/db/range_del_aggregator_v2_test.cc index 576d3339e..79cb548b1 100644 --- a/db/range_del_aggregator_v2_test.cc +++ b/db/range_del_aggregator_v2_test.cc @@ -173,8 +173,8 @@ TEST_F(RangeDelAggregatorV2Test, EmptyTruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -192,8 +192,8 @@ TEST_F(RangeDelAggregatorV2Test, UntruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -226,8 +226,8 @@ TEST_F(RangeDelAggregatorV2Test, UntruncatedIterWithSnapshot) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, 9 /* snapshot */, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + 9 /* snapshot */)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -259,8 +259,8 @@ TEST_F(RangeDelAggregatorV2Test, TruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); InternalKey smallest("d", 7, kTypeValue); InternalKey largest("m", 9, kTypeValue); @@ -294,8 +294,8 @@ TEST_F(RangeDelAggregatorV2Test, SingleIterInAggregator) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber); range_del_agg.AddTombstones(std::move(input_iter)); @@ -321,8 +321,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregator) { RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator( - fragment_list.get(), kMaxSequenceNumber, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } @@ -353,8 +353,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregatorWithUpperBound) { RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, 19); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), - 19 /* snapshot */, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter)); } @@ -392,8 +392,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregator) { const auto& fragment_list = fragment_lists[i]; const auto& bounds = iter_bounds[i]; std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), - 19 /* snapshot */, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &bounds.first, &bounds.second); } @@ -432,7 +432,7 @@ TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregatorSameLevel) { auto add_iter_to_agg = [&](size_t i) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_lists[i].get(), - 19 /* snapshot */, bytewise_icmp)); + bytewise_icmp, 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first, &iter_bounds[i].second); }; diff --git a/db/range_tombstone_fragmenter.cc b/db/range_tombstone_fragmenter.cc index 4137f25cf..1748c5430 100644 --- a/db/range_tombstone_fragmenter.cc +++ b/db/range_tombstone_fragmenter.cc @@ -20,7 +20,8 @@ namespace rocksdb { FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp) { + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots) { if (unfragmented_tombstones == nullptr) { return; } @@ -43,7 +44,8 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( } } if (is_sorted) { - FragmentTombstones(std::move(unfragmented_tombstones), icmp); + FragmentTombstones(std::move(unfragmented_tombstones), icmp, for_compaction, + snapshots); return; } @@ -61,12 +63,13 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( // VectorIterator implicitly sorts by key during construction. auto iter = std::unique_ptr( new VectorIterator(std::move(keys), std::move(values), &icmp)); - FragmentTombstones(std::move(iter), icmp); + FragmentTombstones(std::move(iter), icmp, for_compaction, snapshots); } void FragmentedRangeTombstoneList::FragmentTombstones( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp) { + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots) { Slice cur_start_key(nullptr, 0); auto cmp = ParsedInternalKeyComparator(&icmp); @@ -117,10 +120,38 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(), std::greater()); + size_t start_idx = tombstone_seqs_.size(); size_t end_idx = start_idx + seqnums_to_flush.size(); - tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), - seqnums_to_flush.end()); + + if (for_compaction) { + // Drop all tombstone seqnums that are not preserved by a snapshot. + SequenceNumber next_snapshot = kMaxSequenceNumber; + for (auto seq : seqnums_to_flush) { + if (seq <= next_snapshot) { + // This seqnum is visible by a lower snapshot. + tombstone_seqs_.push_back(seq); + seq_set_.insert(seq); + auto upper_bound_it = + std::lower_bound(snapshots.begin(), snapshots.end(), seq); + if (upper_bound_it == snapshots.begin()) { + // This seqnum is the topmost one visible by the earliest + // snapshot. None of the seqnums below it will be visible, so we + // can skip them. + break; + } + next_snapshot = *std::prev(upper_bound_it); + } + } + end_idx = tombstone_seqs_.size(); + } else { + // The fragmentation is being done for reads, so preserve all seqnums. + tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), + seqnums_to_flush.end()); + seq_set_.insert(seqnums_to_flush.begin(), seqnums_to_flush.end()); + } + + assert(start_idx < end_idx); tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, end_idx); cur_start_key = cur_end_key; @@ -178,33 +209,41 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } } +bool FragmentedRangeTombstoneList::ContainsRange(SequenceNumber lower, + SequenceNumber upper) const { + auto seq_it = seq_set_.lower_bound(lower); + return seq_it != seq_set_.end() && *seq_it <= upper; +} + FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, - const InternalKeyComparator& icmp) + const FragmentedRangeTombstoneList* tombstones, + const InternalKeyComparator& icmp, SequenceNumber _upper_bound, + SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), + icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_(tombstones), - snapshot_(snapshot) { + upper_bound_(_upper_bound), + lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); - pos_ = tombstones_->end(); - pinned_pos_ = tombstones_->end(); + Invalidate(); } FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - SequenceNumber snapshot, const InternalKeyComparator& icmp) + const InternalKeyComparator& icmp, SequenceNumber _upper_bound, + SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), + icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_ref_(tombstones), tombstones_(tombstones_ref_.get()), - snapshot_(snapshot) { + upper_bound_(_upper_bound), + lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); - pos_ = tombstones_->end(); - seq_pos_ = tombstones_->seq_end(); - pinned_pos_ = tombstones_->end(); - pinned_seq_pos_ = tombstones_->seq_end(); + Invalidate(); } void FragmentedRangeTombstoneIterator::SeekToFirst() { @@ -220,7 +259,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopFirst() { pos_ = tombstones_->begin(); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } @@ -237,7 +276,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopLast() { pos_ = std::prev(tombstones_->end()); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } @@ -270,7 +309,7 @@ void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone( } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( @@ -289,25 +328,28 @@ void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() { while (pos_ != tombstones_->end() && - seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || + *seq_pos_ < lower_bound_)) { ++pos_; if (pos_ == tombstones_->end()) { + Invalidate(); return; } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } } void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() { while (pos_ != tombstones_->end() && - seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || + *seq_pos_ < lower_bound_)) { if (pos_ == tombstones_->begin()) { Invalidate(); return; @@ -315,7 +357,7 @@ void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() { --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } } @@ -333,14 +375,13 @@ void FragmentedRangeTombstoneIterator::TopNext() { } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::Prev() { if (seq_pos_ == tombstones_->seq_begin()) { - pos_ = tombstones_->end(); - seq_pos_ = tombstones_->seq_end(); + Invalidate(); return; } --seq_pos_; @@ -358,7 +399,7 @@ void FragmentedRangeTombstoneIterator::TopPrev() { --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } @@ -372,4 +413,27 @@ SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum( return ValidPos() && ucmp_->Compare(start_key(), user_key) <= 0 ? seq() : 0; } +std::map> +FragmentedRangeTombstoneIterator::SplitBySnapshot( + const std::vector& snapshots) { + std::map> + splits; + SequenceNumber lower = 0; + SequenceNumber upper; + for (size_t i = 0; i <= snapshots.size(); i++) { + if (i >= snapshots.size()) { + upper = kMaxSequenceNumber; + } else { + upper = snapshots[i]; + } + if (tombstones_->ContainsRange(lower, upper)) { + splits.emplace(upper, std::unique_ptr( + new FragmentedRangeTombstoneIterator( + tombstones_, *icmp_, upper, lower))); + } + lower = upper + 1; + } + return splits; +} + } // namespace rocksdb diff --git a/db/range_tombstone_fragmenter.h b/db/range_tombstone_fragmenter.h index 2ad346af1..306a0347b 100644 --- a/db/range_tombstone_fragmenter.h +++ b/db/range_tombstone_fragmenter.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -38,7 +39,8 @@ struct FragmentedRangeTombstoneList { }; FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, bool for_compaction = false, + const std::vector& snapshots = {}); std::vector::const_iterator begin() const { return tombstones_.begin(); @@ -60,7 +62,11 @@ struct FragmentedRangeTombstoneList { return tombstone_seqs_.end(); } - bool empty() const { return tombstones_.size() == 0; } + bool empty() const { return tombstones_.empty(); } + + // Returns true if the stored tombstones contain with one with a sequence + // number in [lower, upper]. + bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const; private: // Given an ordered range tombstone iterator unfragmented_tombstones, @@ -68,10 +74,12 @@ struct FragmentedRangeTombstoneList { // tombstones_ and tombstone_seqs_. void FragmentTombstones( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots); std::vector tombstones_; std::vector tombstone_seqs_; + std::set seq_set_; std::list pinned_slices_; PinnedIteratorsManager pinned_iters_mgr_; }; @@ -88,11 +96,13 @@ struct FragmentedRangeTombstoneList { class FragmentedRangeTombstoneIterator : public InternalIterator { public: FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, - const InternalKeyComparator& icmp); + const FragmentedRangeTombstoneList* tombstones, + const InternalKeyComparator& icmp, SequenceNumber upper_bound, + SequenceNumber lower_bound = 0); FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - SequenceNumber snapshot, const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, SequenceNumber upper_bound, + SequenceNumber lower_bound = 0); void SeekToFirst() override; void SeekToLast() override; @@ -136,10 +146,6 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { seq_pos_ = tombstones_->seq_end(); } - // TODO: implement properly - RangeTombstone tombstone() const { - return RangeTombstone(start_key(), end_key(), seq()); - } Slice start_key() const { return pos_->start_key; } Slice end_key() const { return pos_->end_key; } SequenceNumber seq() const { return *seq_pos_; } @@ -151,12 +157,24 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber, kTypeRangeDeletion); } - ParsedInternalKey internal_key() const { - return ParsedInternalKey(pos_->start_key, *seq_pos_, kTypeRangeDeletion); - } SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); + // Splits the iterator into n+1 iterators (where n is the number of + // snapshots), each providing a view over a "stripe" of sequence numbers. The + // iterators are keyed by the upper bound of their ranges (the provided + // snapshots + kMaxSequenceNumber). + // + // NOTE: the iterators in the returned map are no longer valid if their + // parent iterator is deleted, since they do not modify the refcount of the + // underlying tombstone list. Therefore, this map should be deleted before + // the parent iterator. + std::map> + SplitBySnapshot(const std::vector& snapshots); + + SequenceNumber upper_bound() const { return upper_bound_; } + SequenceNumber lower_bound() const { return lower_bound_; } + private: using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; @@ -217,10 +235,12 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { const RangeTombstoneStackStartComparator tombstone_start_cmp_; const RangeTombstoneStackEndComparator tombstone_end_cmp_; + const InternalKeyComparator* icmp_; const Comparator* ucmp_; std::shared_ptr tombstones_ref_; const FragmentedRangeTombstoneList* tombstones_; - SequenceNumber snapshot_; + SequenceNumber upper_bound_; + SequenceNumber lower_bound_; std::vector::const_iterator pos_; std::vector::const_iterator seq_pos_; mutable std::vector::const_iterator pinned_pos_; diff --git a/db/range_tombstone_fragmenter_test.cc b/db/range_tombstone_fragmenter_test.cc index fc6eddc29..ddd3f7741 100644 --- a/db/range_tombstone_fragmenter_test.cc +++ b/db/range_tombstone_fragmenter_test.cc @@ -29,15 +29,26 @@ std::unique_ptr MakeRangeDelIter( new test::VectorIterator(keys, values)); } +void CheckIterPosition(const RangeTombstone& tombstone, + const FragmentedRangeTombstoneIterator* iter) { + // Test InternalIterator interface. + EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); + EXPECT_EQ(tombstone.end_key_, iter->value()); + EXPECT_EQ(tombstone.seq_, iter->seq()); + + // Test FragmentedRangeTombstoneIterator interface. + EXPECT_EQ(tombstone.start_key_, iter->start_key()); + EXPECT_EQ(tombstone.end_key_, iter->end_key()); + EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); +} + void VerifyFragmentedRangeDels( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToFirst(); - for (size_t i = 0; i < expected_tombstones.size() && iter->Valid(); - i++, iter->Next()) { - EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_); - EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_); - EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_); + for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { + ASSERT_TRUE(iter->Valid()); + CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } @@ -46,11 +57,9 @@ void VerifyVisibleTombstones( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToTopFirst(); - for (size_t i = 0; i < expected_tombstones.size() && iter->Valid(); - i++, iter->TopNext()) { - EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_); - EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_); - EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_); + for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) { + ASSERT_TRUE(iter->Valid()); + CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } @@ -69,9 +78,7 @@ void VerifySeek(FragmentedRangeTombstoneIterator* iter, ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); - EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); - EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); + CheckIterPosition(testcase.expected_position, iter); } } } @@ -84,9 +91,7 @@ void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); - EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); - EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); + CheckIterPosition(testcase.expected_position, iter); } } } @@ -112,8 +117,10 @@ TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}}); @@ -124,8 +131,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, @@ -138,8 +147,10 @@ TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, @@ -152,8 +163,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}}); @@ -165,8 +178,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}, @@ -186,8 +201,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 30}, {"a", "c", 20}, {"a", "c", 10}, @@ -211,16 +228,16 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 9 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter3(&fragment_list, 7 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter4(&fragment_list, 5 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter5(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 9 /* upper_bound */); + FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp, + 7 /* upper_bound */); + FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp, + 5 /* upper_bound */); + FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) { VerifyFragmentedRangeDels(iter, {{"a", "c", 10}, {"c", "e", 10}, @@ -234,6 +251,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { {"l", "n", 4}}); } + ASSERT_EQ(0, iter1.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound()); VerifyVisibleTombstones(&iter1, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, @@ -243,6 +262,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter2.lower_bound()); + ASSERT_EQ(9, iter2.upper_bound()); VerifyVisibleTombstones(&iter2, {{"c", "e", 8}, {"e", "g", 8}, {"g", "i", 6}, @@ -251,6 +272,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter3.lower_bound()); + ASSERT_EQ(7, iter3.upper_bound()); VerifyVisibleTombstones(&iter3, {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}, @@ -259,10 +282,14 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter4.lower_bound()); + ASSERT_EQ(5, iter4.upper_bound()); VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter5.lower_bound()); + ASSERT_EQ(3, iter5.upper_bound()); VerifyVisibleTombstones(&iter5, {{"j", "l", 2}}); VerifyMaxCoveringTombstoneSeqnum( &iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}}); @@ -277,8 +304,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, 9 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + 9 /* upper_bound */); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(9, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, @@ -293,6 +322,116 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } +TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, + {} /* snapshots */); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, + OverlapAndRepeatedStartKeyForCompactionWithSnapshot) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, + {20, 9} /* upper_bounds */); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"c", "e", 8}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), + bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + + auto split_iters = iter.SplitBySnapshot({} /* snapshots */); + ASSERT_EQ(1, split_iters.size()); + + auto* split_iter = split_iters[kMaxSequenceNumber].get(); + ASSERT_EQ(0, split_iter->lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound()); + VerifyVisibleTombstones(split_iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), + bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + + auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */); + ASSERT_EQ(5, split_iters.size()); + + auto* split_iter1 = split_iters[3].get(); + ASSERT_EQ(0, split_iter1->lower_bound()); + ASSERT_EQ(3, split_iter1->upper_bound()); + VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}}); + + auto* split_iter2 = split_iters[5].get(); + ASSERT_EQ(4, split_iter2->lower_bound()); + ASSERT_EQ(5, split_iter2->upper_bound()); + VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}}); + + auto* split_iter3 = split_iters[7].get(); + ASSERT_EQ(6, split_iter3->lower_bound()); + ASSERT_EQ(7, split_iter3->upper_bound()); + VerifyVisibleTombstones(split_iter3, + {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}}); + + auto* split_iter4 = split_iters[9].get(); + ASSERT_EQ(8, split_iter4->lower_bound()); + ASSERT_EQ(9, split_iter4->upper_bound()); + VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}}); + + auto* split_iter5 = split_iters[kMaxSequenceNumber].get(); + ASSERT_EQ(10, split_iter5->lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound()); + VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}}); +} + TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, @@ -304,8 +443,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek( &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); @@ -313,8 +452,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"a", {"j", "l", 2}}, {"e", {"j", "l", 2}}, {"l", {}, true /* out of range */}}); @@ -334,8 +473,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek( &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); @@ -343,8 +482,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"b", {"j", "l", 2}}, {"f", {"j", "l", 2}}, {"m", {}, true /* out of range */}}); @@ -364,8 +503,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek(&iter1, {{"c", {"c", "e", 10}}, {"g", {"g", "i", 6}}, {"i", {"j", "l", 4}}, @@ -375,8 +514,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { {"i", {"g", "i", 6}}, {"n", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"c", {"j", "l", 2}}, {"g", {"j", "l", 2}}, {"i", {"j", "l", 2}}, @@ -398,8 +537,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}}); VerifySeekForPrev(&iter, {{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}}); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 974ba4b24..959044a09 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2412,7 +2412,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( snapshot = read_options.snapshot->GetSequenceNumber(); } return new FragmentedRangeTombstoneIterator( - rep_->fragmented_range_dels, snapshot, rep_->internal_comparator); + rep_->fragmented_range_dels, rep_->internal_comparator, snapshot); } InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator(