From 9fad3e21eb90d215b6719097baba417bc1eeca3c Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 9 May 2019 14:15:12 -0700 Subject: [PATCH] Merging iterator to avoid child iterator reseek for some cases (#5286) Summary: When reseek happens in merging iterator, reseeking a child iterator can be avoided if: (1) the iterator represents imutable data (2) reseek() to a larger key than the current key (3) the current key of the child iterator is larger than the seek key because it is guaranteed that the result will fall into the same position. This optimization will be useful for use cases where users keep seeking to keys nearby in ascending order. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5286 Differential Revision: D15283635 Pulled By: siying fbshipit-source-id: 35f79ffd5ce3609146faa8cd55f2bfd733502f83 --- HISTORY.md | 1 + db/db_iterator_test.cc | 69 ++++++++++++++++++++++++++++++++ db/version_set.cc | 3 +- table/block_based_table_reader.h | 3 +- table/internal_iterator.h | 5 ++- table/iterator_wrapper.h | 7 +++- table/merging_iterator.cc | 19 ++++++++- 7 files changed, 101 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index fb1db417e..99235a33d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,7 @@ ### Performance Improvements * Reduce binary search when iterator reseek into the same data block. * DBIter::Next() can skip user key checking if previous entry's seqnum is 0. +* Merging iterator to avoid child iterator reseek for some cases ## 6.2.0 (4/30/2019) ### New Features diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index 78b387577..cc1af2e0a 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2548,6 +2548,75 @@ TEST_P(DBIteratorTest, AvoidReseekLevelIterator) { SyncPoint::GetInstance()->DisableProcessing(); } +TEST_P(DBIteratorTest, AvoidReseekChildIterator) { + Options options = CurrentOptions(); + options.compression = CompressionType::kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 800; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + + Random rnd(301); + std::string random_str = RandomString(&rnd, 180); + + ASSERT_OK(Put("1", random_str)); + ASSERT_OK(Put("2", random_str)); + ASSERT_OK(Put("3", random_str)); + ASSERT_OK(Put("4", random_str)); + ASSERT_OK(Put("8", random_str)); + ASSERT_OK(Put("9", random_str)); + ASSERT_OK(Flush()); + ASSERT_OK(Put("5", random_str)); + ASSERT_OK(Put("6", random_str)); + ASSERT_OK(Put("7", random_str)); + ASSERT_OK(Flush()); + + // These two keys will be kept in memtable. + ASSERT_OK(Put("0", random_str)); + ASSERT_OK(Put("8", random_str)); + + int num_iter_wrapper_seek = 0; + SyncPoint::GetInstance()->SetCallBack( + "IteratorWrapper::Seek:0", + [&](void* /*arg*/) { num_iter_wrapper_seek++; }); + SyncPoint::GetInstance()->EnableProcessing(); + { + std::unique_ptr iter(NewIterator(ReadOptions())); + iter->Seek("1"); + ASSERT_TRUE(iter->Valid()); + // DBIter always wraps internal iterator with IteratorWrapper, + // and in merging iterator each child iterator will be wrapped + // with IteratorWrapper. + ASSERT_EQ(4, num_iter_wrapper_seek); + + // child position: 1 and 5 + num_iter_wrapper_seek = 0; + iter->Seek("2"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(3, num_iter_wrapper_seek); + + // child position: 2 and 5 + num_iter_wrapper_seek = 0; + iter->Seek("6"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(4, num_iter_wrapper_seek); + + // child position: 8 and 6 + num_iter_wrapper_seek = 0; + iter->Seek("7"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(3, num_iter_wrapper_seek); + + // child position: 8 and 7 + num_iter_wrapper_seek = 0; + iter->Seek("5"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(4, num_iter_wrapper_seek); + } + + SyncPoint::GetInstance()->DisableProcessing(); +} + INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest, testing::Values(true, false)); diff --git a/db/version_set.cc b/db/version_set.cc index 8463a5aa7..84302556e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -896,7 +896,8 @@ class LevelIterator final : public InternalIterator { bool skip_filters, int level, RangeDelAggregator* range_del_agg, const std::vector* compaction_boundaries = nullptr) - : table_cache_(table_cache), + : InternalIterator(false), + table_cache_(table_cache), read_options_(read_options), env_options_(env_options), icomparator_(icomparator), diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 1fcc8cbfa..74d2caeb2 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -590,7 +590,8 @@ class BlockBasedTableIterator : public InternalIteratorBase { bool key_includes_seq = true, bool index_key_is_full = true, bool for_compaction = false) - : table_(table), + : InternalIteratorBase(false), + table_(table), read_options_(read_options), icomp_(icomp), user_comparator_(icomp.user_comparator()), diff --git a/table/internal_iterator.h b/table/internal_iterator.h index 6b713e7b9..8f1cc9dd6 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -20,7 +20,8 @@ class PinnedIteratorsManager; template class InternalIteratorBase : public Cleanable { public: - InternalIteratorBase() {} + InternalIteratorBase() : is_mutable_(true) {} + InternalIteratorBase(bool _is_mutable) : is_mutable_(_is_mutable) {} virtual ~InternalIteratorBase() {} // An iterator is either positioned at a key/value pair, or @@ -119,6 +120,7 @@ class InternalIteratorBase : public Cleanable { virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) { return Status::NotSupported(""); } + bool is_mutable() const { return is_mutable_; } protected: void SeekForPrevImpl(const Slice& target, const Comparator* cmp) { @@ -130,6 +132,7 @@ class InternalIteratorBase : public Cleanable { Prev(); } } + bool is_mutable_; private: // No copying allowed diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index fc5eb2613..a570e53c1 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -69,7 +69,12 @@ class IteratorWrapperBase { assert(!valid_ || iter_->status().ok()); } void Prev() { assert(iter_); iter_->Prev(); Update(); } - void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } + void Seek(const Slice& k) { + TEST_SYNC_POINT("IteratorWrapper::Seek:0"); + assert(iter_); + iter_->Seek(k); + Update(); + } void SeekForPrev(const Slice& k) { assert(iter_); iter_->SeekForPrev(k); diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index bd4a186b3..e5df6bdf6 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -127,14 +127,29 @@ class MergingIterator : public InternalIterator { } void Seek(const Slice& target) override { + bool is_increasing_reseek = false; + if (current_ != nullptr && direction_ == kForward && status_.ok() && + comparator_->Compare(target, key()) >= 0) { + is_increasing_reseek = true; + } ClearHeaps(); status_ = Status::OK(); for (auto& child : children_) { - { + // If upper bound never changes, we can skip Seek() for + // the !Valid() case too, but people do hack the code to change + // upper bound between Seek(), so it's not a good idea to break + // the API. + // If DBIter is used on top of merging iterator, we probably + // can skip mutable child iterators if they are invalid too, + // but it's a less clean API. We can optimize for it later if + // needed. + if (!is_increasing_reseek || !child.Valid() || + comparator_->Compare(target, child.key()) > 0 || + child.iter()->is_mutable()) { PERF_TIMER_GUARD(seek_child_seek_time); child.Seek(target); + PERF_COUNTER_ADD(seek_child_seek_count, 1); } - PERF_COUNTER_ADD(seek_child_seek_count, 1); if (child.Valid()) { assert(child.status().ok());