Reduce iterator key comparison for upper/lower bound check (#5111)

Summary:
Previously if iterator upper/lower bound presents, `DBIter` will check the bound for every key. This patch turns the check into per-file or per-data block check when applicable, by checking against either file largest/smallest key or block index key.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5111

Differential Revision: D15330061

Pulled By: siying

fbshipit-source-id: 8a653fe3cd50d94d81eb2d13b087326c58ee2024
main
yiwu-arbug 5 years ago committed by Facebook Github Bot
parent a13026fb2f
commit f3a7847598
  1. 1
      HISTORY.md
  2. 4
      db/db_iter.cc
  3. 40
      db/version_set.cc
  4. 20
      table/block_based_table_reader.cc
  5. 9
      table/block_based_table_reader.h
  6. 25
      table/internal_iterator.h
  7. 22
      table/iterator_wrapper.h
  8. 24
      table/merging_iterator.cc

@ -11,6 +11,7 @@
* Reduce binary search when iterator reseek into the same data block.
* DBIter::Next() can skip user key checking if previous entry's seqnum is 0.
* Merging iterator to avoid child iterator reseek for some cases
* Reduce iterator key comparision for upper/lower bound check.
### Bug Fixes

@ -467,7 +467,7 @@ inline bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check)
is_key_seqnum_zero_ = (ikey_.sequence == 0);
if (iterate_upper_bound_ != nullptr &&
if (iterate_upper_bound_ != nullptr && iter_.MayBeOutOfUpperBound() &&
user_comparator_.Compare(ikey_.user_key, *iterate_upper_bound_) >= 0) {
break;
}
@ -859,7 +859,7 @@ void DBIter::PrevInternal() {
return;
}
if (iterate_lower_bound_ != nullptr &&
if (iterate_lower_bound_ != nullptr && iter_.MayBeOutOfLowerBound() &&
user_comparator_.Compare(saved_key_.GetUserKey(),
*iterate_lower_bound_) < 0) {
// We've iterated earlier than the user-specified lower bound.

@ -887,7 +887,7 @@ class LevelIterator final : public InternalIterator {
void SeekToFirst() override;
void SeekToLast() override;
void Next() final override;
bool NextAndGetResult(Slice* ret_key) override;
bool NextAndGetResult(IterateResult* result) override;
void Prev() override;
bool Valid() const override { return file_iter_.Valid(); }
@ -895,23 +895,38 @@ class LevelIterator final : public InternalIterator {
assert(Valid());
return file_iter_.key();
}
Slice value() const override {
assert(Valid());
return file_iter_.value();
}
Status status() const override {
return file_iter_.iter() ? file_iter_.status() : Status::OK();
}
inline bool MayBeOutOfLowerBound() override {
assert(Valid());
return may_be_out_of_lower_bound_ && file_iter_.MayBeOutOfLowerBound();
}
inline bool MayBeOutOfUpperBound() override {
assert(Valid());
return file_iter_.MayBeOutOfUpperBound();
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr;
if (file_iter_.iter()) {
file_iter_.SetPinnedItersMgr(pinned_iters_mgr);
}
}
bool IsKeyPinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsKeyPinned();
}
bool IsValuePinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsValuePinned();
@ -954,12 +969,16 @@ class LevelIterator final : public InternalIterator {
smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest;
largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
}
may_be_out_of_lower_bound_ =
read_options_.iterate_lower_bound != nullptr &&
user_comparator_.Compare(ExtractUserKey(file_smallest_key(file_index_)),
*read_options_.iterate_lower_bound) < 0;
return table_cache_->NewIterator(
read_options_, env_options_, icomparator_, *file_meta.file_metadata,
range_del_agg_, prefix_extractor_,
nullptr /* don't need reference to table */,
file_read_hist_, for_compaction_, nullptr /* arena */, skip_filters_,
level_, smallest_compaction_key, largest_compaction_key);
nullptr /* don't need reference to table */, file_read_hist_,
for_compaction_, nullptr /* arena */, skip_filters_, level_,
smallest_compaction_key, largest_compaction_key);
}
TableCache* table_cache_;
@ -975,6 +994,7 @@ class LevelIterator final : public InternalIterator {
bool should_sample_;
bool for_compaction_;
bool skip_filters_;
bool may_be_out_of_lower_bound_ = true;
size_t file_index_;
int level_;
RangeDelAggregator* range_del_agg_;
@ -1043,11 +1063,12 @@ void LevelIterator::SeekToLast() {
void LevelIterator::Next() { NextImpl(); }
bool LevelIterator::NextAndGetResult(Slice* ret_key) {
bool LevelIterator::NextAndGetResult(IterateResult* result) {
NextImpl();
bool is_valid = Valid();
if (is_valid) {
*ret_key = key();
result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
}
return is_valid;
}
@ -4278,10 +4299,9 @@ Status VersionSet::Recover(
", last_sequence is %" PRIu64 ", log_number is %" PRIu64
",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32
",min_log_number_to_keep is %" PRIu64 "\n",
manifest_path.c_str(), manifest_file_number_,
next_file_number_.load(), last_sequence_.load(), log_number,
prev_log_number_, column_family_set_->GetMaxColumnFamily(),
min_log_number_to_keep_2pc());
manifest_path.c_str(), manifest_file_number_, next_file_number_.load(),
last_sequence_.load(), log_number, prev_log_number_,
column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep_2pc());
for (auto cfd : *column_family_set_) {
if (cfd->IsDropped()) {

@ -2446,11 +2446,12 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Next() {
template <class TBlockIter, typename TValue>
bool BlockBasedTableIterator<TBlockIter, TValue>::NextAndGetResult(
Slice* ret_key) {
IterateResult* result) {
Next();
bool is_valid = Valid();
if (is_valid) {
*ret_key = key();
result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
}
return is_valid;
}
@ -2531,6 +2532,11 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
key_includes_seq_, index_key_is_full_,
/* get_context */ nullptr, s, prefetch_buffer_.get());
block_iter_points_to_real_block_ = true;
if (read_options_.iterate_upper_bound != nullptr) {
data_block_within_upper_bound_ =
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
index_iter_->user_key()) > 0);
}
}
}
@ -2543,13 +2549,9 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindBlockForward() {
return;
}
// Whether next data block is out of upper bound, if there is one.
bool next_block_is_out_of_bound = false;
if (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_) {
next_block_is_out_of_bound =
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
index_iter_->user_key()) <= 0);
}
bool next_block_is_out_of_bound =
read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && !data_block_within_upper_bound_;
ResetDataIter();
index_iter_->Next();
if (next_block_is_out_of_bound) {

@ -588,7 +588,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
void SeekToFirst() override;
void SeekToLast() override;
void Next() final override;
bool NextAndGetResult(Slice* ret_key) override;
bool NextAndGetResult(IterateResult* result) override;
void Prev() override;
bool Valid() const override {
return !is_out_of_bound_ && block_iter_points_to_real_block_ &&
@ -619,6 +619,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
// Whether iterator invalidated for being out of bound.
bool IsOutOfBound() override { return is_out_of_bound_; }
inline bool MayBeOutOfUpperBound() override {
assert(Valid());
return !data_block_within_upper_bound_;
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr;
}
@ -680,6 +685,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
TBlockIter block_iter_;
bool block_iter_points_to_real_block_;
bool is_out_of_bound_ = false;
// Whether current data block being fully within iterate upper bound.
bool data_block_within_upper_bound_ = false;
bool check_filter_;
// TODO(Zhongyi): pick a better name
bool need_upper_bound_check_;

@ -17,6 +17,11 @@ namespace rocksdb {
class PinnedIteratorsManager;
struct IterateResult {
Slice key;
bool may_be_out_of_upper_bound;
};
template <class TValue>
class InternalIteratorBase : public Cleanable {
public:
@ -55,11 +60,20 @@ class InternalIteratorBase : public Cleanable {
// REQUIRES: Valid()
virtual void Next() = 0;
virtual bool NextAndGetResult(Slice* ret_key) {
// Moves to the next entry in the source, and return result. Iterator
// implementation should override this method to help methods inline better,
// or when MayBeOutOfUpperBound() is non-trivial.
// REQUIRES: Valid()
virtual bool NextAndGetResult(IterateResult* result) {
Next();
bool is_valid = Valid();
if (is_valid) {
*ret_key = key();
result->key = key();
// Default may_be_out_of_upper_bound to true to avoid unnecessary virtual
// call. If an implementation has non-trivial MayBeOutOfUpperBound(),
// it should also override NextAndGetResult().
result->may_be_out_of_upper_bound = true;
assert(MayBeOutOfUpperBound());
}
return is_valid;
}
@ -94,6 +108,13 @@ class InternalIteratorBase : public Cleanable {
// upper bound
virtual bool IsOutOfBound() { return false; }
// Keys return from this iterator can be smaller than iterate_lower_bound.
virtual bool MayBeOutOfLowerBound() { return true; }
// Keys return from this iterator can be larger or equal to
// iterate_upper_bound.
virtual bool MayBeOutOfUpperBound() { return true; }
// Pass the PinnedIteratorsManager to the Iterator, most Iterators dont
// communicate with PinnedIteratorsManager so default implementation is no-op
// but for Iterators that need to communicate with PinnedIteratorsManager

@ -56,7 +56,10 @@ class IteratorWrapperBase {
// Iterator interface methods
bool Valid() const { return valid_; }
Slice key() const { assert(Valid()); return key_; }
Slice key() const {
assert(Valid());
return result_.key;
}
TValue value() const {
assert(Valid());
return iter_->value();
@ -65,7 +68,7 @@ class IteratorWrapperBase {
Status status() const { assert(iter_); return iter_->status(); }
void Next() {
assert(iter_);
valid_ = iter_->NextAndGetResult(&key_);
valid_ = iter_->NextAndGetResult(&result_);
assert(!valid_ || iter_->status().ok());
}
void Prev() { assert(iter_); iter_->Prev(); Update(); }
@ -83,6 +86,16 @@ class IteratorWrapperBase {
void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); }
void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); }
bool MayBeOutOfLowerBound() {
assert(Valid());
return iter_->MayBeOutOfLowerBound();
}
bool MayBeOutOfUpperBound() {
assert(Valid());
return result_.may_be_out_of_upper_bound;
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
assert(iter_);
iter_->SetPinnedItersMgr(pinned_iters_mgr);
@ -100,14 +113,15 @@ class IteratorWrapperBase {
void Update() {
valid_ = iter_->Valid();
if (valid_) {
key_ = iter_->key();
assert(iter_->status().ok());
result_.key = iter_->key();
result_.may_be_out_of_upper_bound = true;
}
}
InternalIteratorBase<TValue>* iter_;
IterateResult result_;
bool valid_;
Slice key_;
};
using IteratorWrapper = IteratorWrapperBase<Slice>;

@ -227,6 +227,16 @@ class MergingIterator : public InternalIterator {
current_ = CurrentForward();
}
bool NextAndGetResult(IterateResult* result) override {
Next();
bool is_valid = Valid();
if (is_valid) {
result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
}
return is_valid;
}
void Prev() override {
assert(Valid());
// Ensure that all children are positioned before key().
@ -296,6 +306,20 @@ class MergingIterator : public InternalIterator {
return current_->value();
}
// Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
// from current child iterator. Potentially as long as one of child iterator
// report out of bound is not possible, we know current key is within bound.
bool MayBeOutOfLowerBound() override {
assert(Valid());
return current_->MayBeOutOfLowerBound();
}
bool MayBeOutOfUpperBound() override {
assert(Valid());
return current_->MayBeOutOfUpperBound();
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr;
for (auto& child : children_) {

Loading…
Cancel
Save