From 846e05005d78dfd4276cce6753967cb16930aabb Mon Sep 17 00:00:00 2001 From: sdong Date: Tue, 1 Oct 2019 11:20:50 -0700 Subject: [PATCH] Revert "Merging iterator to avoid child iterator reseek for some cases (#5286)" (#5871) Summary: This reverts commit 9fad3e21eb90d215b6719097baba417bc1eeca3c. Iterator verification in stress tests sometimes fail for assertion table/block_based/block_based_table_reader.cc:2973: void rocksdb::BlockBasedTableIterator::FindBlockForward() [with TBlockIter = rocksdb::DataBlockIter; TValue = rocksdb::Slice]: Assertion `!next_block_is_out_of_bound || user_comparator_.Compare(*read_options_.iterate_upper_bound, index_iter_->user_key()) <= 0' failed. It is likely to be linked to https://github.com/facebook/rocksdb/pull/5286 together with https://github.com/facebook/rocksdb/pull/5468 as the former PR makes some child iterator's seek being avoided, so that upper bound condition fails to be updated there. Strictly speaking, the former PR was merged before the latter one, but the latter one feels a more important improvement so I choose to revert the former one for now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5871 Differential Revision: D17689196 fbshipit-source-id: 4ded5be68f67bee2782d31a29cb72ea68f59dd8c --- HISTORY.md | 2 +- db/db_iterator_test.cc | 69 -------------------- db/version_set.cc | 3 +- table/block_based/block_based_table_reader.h | 3 +- table/internal_iterator.h | 5 +- table/iterator_wrapper.h | 1 - table/merging_iterator.cc | 33 ++-------- 7 files changed, 12 insertions(+), 104 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c1fa45ad4..2431688f5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ * Fix bloom filter lookups by the MultiGet batching API when BlockBasedTableOptions::whole_key_filtering is false, by checking that a key is in the perfix_extractor domain and extracting the prefix before looking up. * Fix a bug in file ingestion caused by incorrect file number allocation when the number of column families involved in the ingestion exceeds 2. * Fix a bug when format_version=3, partitioned fitlers, and prefix search are used in conjunction. The bug could result into Seek::(prefix) returning NotFound for an existing prefix. +* Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strong results when reseek happens with a different iterator upper bound. ### New Features * Introduced DBOptions::max_write_batch_group_size_bytes to configure maximum limit on number of bytes that are written in a single batch of WAL or memtable write. It is followed when the leader write size is larger than 1/8 of this limit. * VerifyChecksum() by default will issue readahead. Allow ReadOptions to be passed in to those functions to override the readhead size. For checksum verifying before external SST file ingestion, a new option IngestExternalFileOptions.verify_checksums_readahead_size, is added for this readahead setting. @@ -108,7 +109,6 @@ * Fix a bug caused by secondary not skipping the beginning of new MANIFEST. * On DB open, delete WAL trash files left behind in wal_dir - ## 6.2.0 (4/30/2019) ### New Features * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index ed12a8801..00207461e 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2692,75 +2692,6 @@ TEST_P(DBIteratorTest, AvoidReseekLevelIterator) { SyncPoint::GetInstance()->DisableProcessing(); } -TEST_P(DBIteratorTest, AvoidReseekChildIterator) { - Options options = CurrentOptions(); - options.compression = CompressionType::kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 800; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - Random rnd(301); - std::string random_str = RandomString(&rnd, 180); - - ASSERT_OK(Put("1", random_str)); - ASSERT_OK(Put("2", random_str)); - ASSERT_OK(Put("3", random_str)); - ASSERT_OK(Put("4", random_str)); - ASSERT_OK(Put("8", random_str)); - ASSERT_OK(Put("9", random_str)); - ASSERT_OK(Flush()); - ASSERT_OK(Put("5", random_str)); - ASSERT_OK(Put("6", random_str)); - ASSERT_OK(Put("7", random_str)); - ASSERT_OK(Flush()); - - // These two keys will be kept in memtable. - ASSERT_OK(Put("0", random_str)); - ASSERT_OK(Put("8", random_str)); - - int num_iter_wrapper_seek = 0; - SyncPoint::GetInstance()->SetCallBack( - "IteratorWrapper::Seek:0", - [&](void* /*arg*/) { num_iter_wrapper_seek++; }); - SyncPoint::GetInstance()->EnableProcessing(); - { - std::unique_ptr iter(NewIterator(ReadOptions())); - iter->Seek("1"); - ASSERT_TRUE(iter->Valid()); - // DBIter always wraps internal iterator with IteratorWrapper, - // and in merging iterator each child iterator will be wrapped - // with IteratorWrapper. - ASSERT_EQ(4, num_iter_wrapper_seek); - - // child position: 1 and 5 - num_iter_wrapper_seek = 0; - iter->Seek("2"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(3, num_iter_wrapper_seek); - - // child position: 2 and 5 - num_iter_wrapper_seek = 0; - iter->Seek("6"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(4, num_iter_wrapper_seek); - - // child position: 8 and 6 - num_iter_wrapper_seek = 0; - iter->Seek("7"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(3, num_iter_wrapper_seek); - - // child position: 8 and 7 - num_iter_wrapper_seek = 0; - iter->Seek("5"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(4, num_iter_wrapper_seek); - } - - SyncPoint::GetInstance()->DisableProcessing(); -} - // MyRocks may change iterate bounds before seek. Simply test to make sure such // usage doesn't break iterator. TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) { diff --git a/db/version_set.cc b/db/version_set.cc index 61c67b42a..fc9316a3d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -861,8 +861,7 @@ class LevelIterator final : public InternalIterator { bool skip_filters, int level, RangeDelAggregator* range_del_agg, const std::vector* compaction_boundaries = nullptr) - : InternalIterator(false), - table_cache_(table_cache), + : table_cache_(table_cache), read_options_(read_options), env_options_(env_options), icomparator_(icomparator), diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index b18dccd11..dd496d354 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -615,8 +615,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { const SliceTransform* prefix_extractor, BlockType block_type, TableReaderCaller caller, size_t compaction_readahead_size = 0) - : InternalIteratorBase(false), - table_(table), + : table_(table), read_options_(read_options), icomp_(icomp), user_comparator_(icomp.user_comparator()), diff --git a/table/internal_iterator.h b/table/internal_iterator.h index adcccf795..d7940eeff 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -25,8 +25,8 @@ struct IterateResult { template class InternalIteratorBase : public Cleanable { public: - InternalIteratorBase() : is_mutable_(true) {} - InternalIteratorBase(bool _is_mutable) : is_mutable_(_is_mutable) {} + InternalIteratorBase() {} + // No copying allowed InternalIteratorBase(const InternalIteratorBase&) = delete; InternalIteratorBase& operator=(const InternalIteratorBase&) = delete; @@ -148,7 +148,6 @@ class InternalIteratorBase : public Cleanable { virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) { return Status::NotSupported(""); } - bool is_mutable() const { return is_mutable_; } protected: void SeekForPrevImpl(const Slice& target, const Comparator* cmp) { diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index d6648bc38..f8fdde565 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -74,7 +74,6 @@ class IteratorWrapperBase { } void Prev() { assert(iter_); iter_->Prev(); Update(); } void Seek(const Slice& k) { - TEST_SYNC_POINT("IteratorWrapper::Seek:0"); assert(iter_); iter_->Seek(k); Update(); diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 62d5ce1ee..189850887 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -109,37 +109,18 @@ class MergingIterator : public InternalIterator { } void Seek(const Slice& target) override { - bool is_increasing_reseek = false; - if (current_ != nullptr && direction_ == kForward && status_.ok() && - !prefix_seek_mode_ && comparator_->Compare(target, key()) >= 0) { - is_increasing_reseek = true; - } ClearHeaps(); status_ = Status::OK(); for (auto& child : children_) { - // If upper bound never changes, we can skip Seek() for - // the !Valid() case too, but people do hack the code to change - // upper bound between Seek(), so it's not a good idea to break - // the API. - // If DBIter is used on top of merging iterator, we probably - // can skip mutable child iterators if they are invalid too, - // but it's a less clean API. We can optimize for it later if - // needed. - if (!is_increasing_reseek || !child.Valid() || - comparator_->Compare(target, child.key()) > 0 || - child.iter()->is_mutable()) { - PERF_TIMER_GUARD(seek_child_seek_time); + PERF_TIMER_GUARD(seek_child_seek_time); - child.Seek(target); + child.Seek(target); - PERF_COUNTER_ADD(seek_child_seek_count, 1); - } - { - // Strictly, we timed slightly more than min heap operation, - // but these operations are very cheap. - PERF_TIMER_GUARD(seek_min_heap_time); - AddToMinHeapOrCheckStatus(&child); - } + PERF_COUNTER_ADD(seek_child_seek_count, 1); + // Strictly, we timed slightly more than min heap operation, + // but these operations are very cheap. + PERF_TIMER_GUARD(seek_min_heap_time); + AddToMinHeapOrCheckStatus(&child); } direction_ = kForward; {