From d719b095dc8d6650c67084fb618a29c142f81437 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 26 Apr 2016 12:41:07 -0700 Subject: [PATCH] Introduce PinnedIteratorsManager (Reduce PinData() overhead / Refactor PinData) Summary: While trying to reuse PinData() / ReleasePinnedData() .. to optimize away some memcpys I realized that there is a significant overhead for using PinData() / ReleasePinnedData if they were called many times. This diff refactor the pinning logic by introducing PinnedIteratorsManager a centralized component that will be created once and will be notified whenever we need to Pin an Iterator. This implementation have much less overhead than the original implementation Test Plan: make check -j64 COMPILE_WITH_ASAN=1 make check -j64 Reviewers: yhchiang, sdong, andrewkr Reviewed By: andrewkr Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D56493 --- db/db_iter.cc | 61 +++++++------------ db/db_iter.h | 2 - db/memtable.cc | 25 ++++---- db/pinned_iterators_manager.h | 66 +++++++++++++++++++++ include/rocksdb/iterator.h | 6 +- include/rocksdb/table.h | 2 +- table/block.h | 22 ++++--- table/internal_iterator.h | 23 ++++---- table/iterator_wrapper.h | 108 ++++++++-------------------------- table/merger.cc | 58 +++++------------- table/two_level_iterator.cc | 34 +++++++++-- 11 files changed, 195 insertions(+), 212 deletions(-) create mode 100644 db/pinned_iterators_manager.h diff --git a/db/db_iter.cc b/db/db_iter.cc index 4afe610f7..15028c4a2 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -16,6 +16,7 @@ #include "db/dbformat.h" #include "db/filename.h" #include "db/merge_context.h" +#include "db/pinned_iterators_manager.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" @@ -103,7 +104,7 @@ class DBIter: public Iterator { InternalIterator* iter, SequenceNumber s, bool arena_mode, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, const Slice* iterate_upper_bound = nullptr, - bool prefix_same_as_start = false) + bool prefix_same_as_start = false, bool pin_data = false) : arena_mode_(arena_mode), env_(env), logger_(ioptions.info_log), @@ -118,12 +119,21 @@ class DBIter: public Iterator { version_number_(version_number), iterate_upper_bound_(iterate_upper_bound), prefix_same_as_start_(prefix_same_as_start), - iter_pinned_(false) { + pin_thru_lifetime_(pin_data) { RecordTick(statistics_, NO_ITERATORS); prefix_extractor_ = ioptions.prefix_extractor; max_skip_ = max_sequential_skip_in_iterations; + if (pin_thru_lifetime_) { + pinned_iters_mgr_.StartPinning(); + } + if (iter_) { + iter_->SetPinnedItersMgr(&pinned_iters_mgr_); + } } virtual ~DBIter() { + if (pin_thru_lifetime_) { + pinned_iters_mgr_.ReleasePinnedIterators(); + } RecordTick(statistics_, NO_ITERATORS, -1); local_stats_.BumpGlobalStatistics(statistics_); if (!arena_mode_) { @@ -135,9 +145,7 @@ class DBIter: public Iterator { virtual void SetIter(InternalIterator* iter) { assert(iter_ == nullptr); iter_ = iter; - if (iter_ && iter_pinned_) { - iter_->PinData(); - } + iter_->SetPinnedItersMgr(&pinned_iters_mgr_); } virtual bool Valid() const override { return valid_; } virtual Slice key() const override { @@ -156,28 +164,6 @@ class DBIter: public Iterator { return status_; } } - virtual Status PinData() { - Status s; - if (iter_) { - s = iter_->PinData(); - } - if (s.ok()) { - // Even if iter_ is nullptr, we set iter_pinned_ to true so that when - // iter_ is updated using SetIter, we Pin it. - iter_pinned_ = true; - } - return s; - } - virtual Status ReleasePinnedData() { - Status s; - if (iter_) { - s = iter_->ReleasePinnedData(); - } - if (s.ok()) { - iter_pinned_ = false; - } - return s; - } virtual Status GetProperty(std::string prop_name, std::string* prop) override { @@ -192,7 +178,7 @@ class DBIter: public Iterator { return Status::OK(); } else if (prop_name == "rocksdb.iterator.is-key-pinned") { if (valid_) { - *prop = (iter_pinned_ && saved_key_.IsKeyPinned()) ? "1" : "0"; + *prop = (pin_thru_lifetime_ && saved_key_.IsKeyPinned()) ? "1" : "0"; } else { *prop = "Iterator is not valid."; } @@ -250,10 +236,13 @@ class DBIter: public Iterator { const Slice* iterate_upper_bound_; IterKey prefix_start_; bool prefix_same_as_start_; - bool iter_pinned_; + // Means that we will pin all data blocks we read as long the Iterator + // is not deleted, will be true if ReadOptions::pin_data is true + const bool pin_thru_lifetime_; // List of operands for merge operator. MergeContext merge_context_; LocalStatistics local_stats_; + PinnedIteratorsManager pinned_iters_mgr_; // No copying allowed DBIter(const DBIter&); @@ -890,10 +879,7 @@ Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& ioptions, DBIter* db_iter = new DBIter(env, ioptions, user_key_comparator, internal_iter, sequence, false, max_sequential_skip_in_iterations, version_number, - iterate_upper_bound, prefix_same_as_start); - if (pin_data) { - db_iter->PinData(); - } + iterate_upper_bound, prefix_same_as_start, pin_data); return db_iter; } @@ -916,14 +902,10 @@ inline void ArenaWrappedDBIter::Prev() { db_iter_->Prev(); } inline Slice ArenaWrappedDBIter::key() const { return db_iter_->key(); } inline Slice ArenaWrappedDBIter::value() const { return db_iter_->value(); } inline Status ArenaWrappedDBIter::status() const { return db_iter_->status(); } -inline Status ArenaWrappedDBIter::PinData() { return db_iter_->PinData(); } inline Status ArenaWrappedDBIter::GetProperty(std::string prop_name, std::string* prop) { return db_iter_->GetProperty(prop_name, prop); } -inline Status ArenaWrappedDBIter::ReleasePinnedData() { - return db_iter_->ReleasePinnedData(); -} void ArenaWrappedDBIter::RegisterCleanup(CleanupFunction function, void* arg1, void* arg2) { db_iter_->RegisterCleanup(function, arg1, arg2); @@ -941,12 +923,9 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator( DBIter* db_iter = new (mem) DBIter(env, ioptions, user_key_comparator, nullptr, sequence, true, max_sequential_skip_in_iterations, version_number, - iterate_upper_bound, prefix_same_as_start); + iterate_upper_bound, prefix_same_as_start, pin_data); iter->SetDBIter(db_iter); - if (pin_data) { - iter->PinData(); - } return iter; } diff --git a/db/db_iter.h b/db/db_iter.h index f239d2984..89db7ad38 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -63,8 +63,6 @@ class ArenaWrappedDBIter : public Iterator { virtual Status status() const override; void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); - virtual Status PinData(); - virtual Status ReleasePinnedData(); virtual Status GetProperty(std::string prop_name, std::string* prop) override; private: diff --git a/db/memtable.cc b/db/memtable.cc index f34acb319..32aa8d76c 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -15,6 +15,7 @@ #include "db/dbformat.h" #include "db/merge_context.h" +#include "db/pinned_iterators_manager.h" #include "db/writebuffer.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" @@ -231,6 +232,12 @@ class MemTableIterator : public InternalIterator { } ~MemTableIterator() { +#ifndef NDEBUG + // Assert that the MemTableIterator is never deleted while + // Pinning is Enabled. + assert(!pinned_iters_mgr_ || + (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); +#endif if (arena_mode_) { iter_->~Iterator(); } else { @@ -238,6 +245,14 @@ class MemTableIterator : public InternalIterator { } } +#ifndef NDEBUG + virtual void SetPinnedItersMgr( + PinnedIteratorsManager* pinned_iters_mgr) override { + pinned_iters_mgr_ = pinned_iters_mgr; + } + PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; +#endif + virtual bool Valid() const override { return valid_; } virtual void Seek(const Slice& k) override { PERF_TIMER_GUARD(seek_on_memtable_time); @@ -285,16 +300,6 @@ class MemTableIterator : public InternalIterator { virtual Status status() const override { return Status::OK(); } - virtual Status PinData() override { - // memtable data is always pinned - return Status::OK(); - } - - virtual Status ReleasePinnedData() override { - // memtable data is always pinned - return Status::OK(); - } - virtual bool IsKeyPinned() const override { // memtable data is always pinned return true; diff --git a/db/pinned_iterators_manager.h b/db/pinned_iterators_manager.h new file mode 100644 index 000000000..af58c79f5 --- /dev/null +++ b/db/pinned_iterators_manager.h @@ -0,0 +1,66 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// +#pragma once +#include +#include +#include + +#include "table/internal_iterator.h" + +namespace rocksdb { + +// PinnedIteratorsManager will be notified whenever we need to pin an Iterator +// and it will be responsible for deleting pinned Iterators when they are +// not needed anymore. +class PinnedIteratorsManager { + public: + PinnedIteratorsManager() : pinning_enabled(false), pinned_iters_(nullptr) {} + ~PinnedIteratorsManager() { assert(!pinning_enabled); } + + // Enable Iterators pinning + void StartPinning() { + if (!pinning_enabled) { + pinning_enabled = true; + if (!pinned_iters_) { + pinned_iters_.reset(new std::vector()); + } + } + } + + // Is pinning enabled ? + bool PinningEnabled() { return pinning_enabled; } + + // Take ownership of iter if pinning is enabled and delete it when + // ReleasePinnedIterators() is called + void PinIteratorIfNeeded(InternalIterator* iter) { + if (!pinning_enabled || !iter) { + return; + } + pinned_iters_->push_back(iter); + } + + // Release pinned Iterators + void ReleasePinnedIterators() { + if (pinning_enabled) { + pinning_enabled = false; + + // Remove duplicate pointers + std::sort(pinned_iters_->begin(), pinned_iters_->end()); + std::unique(pinned_iters_->begin(), pinned_iters_->end()); + + for (auto& iter : *pinned_iters_) { + delete iter; + } + pinned_iters_->clear(); + } + } + + private: + bool pinning_enabled; + std::unique_ptr> pinned_iters_; +}; + +} // namespace rocksdb diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 7da37ec33..c39ce4641 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -98,13 +98,11 @@ class Iterator : public Cleanable { // Property "rocksdb.iterator.is-key-pinned": // If returning "1", this means that the Slice returned by key() is valid - // as long as the iterator is not deleted and ReleasePinnedData() is not - // called. + // as long as the iterator is not deleted. // It is guaranteed to always return "1" if // - Iterator created with ReadOptions::pin_data = true // - DB tables were created with - // BlockBasedTableOptions::use_delta_encoding - // set to false. + // BlockBasedTableOptions::use_delta_encoding = false. // Property "rocksdb.iterator.super-version-number": // LSM version used by the iterator. The same format as DB Property // kCurrentSuperVersionNumber. See its comment for more information. diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 8aba3a153..b066b67d9 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -130,7 +130,7 @@ struct BlockBasedTableOptions { int index_block_restart_interval = 1; // Use delta encoding to compress keys in blocks. - // Iterator::PinData() requires this option to be disabled. + // ReadOptions::pin_data requires this option to be disabled. // // Default: true bool use_delta_encoding = true; diff --git a/table/block.h b/table/block.h index c3a26ef1f..7c0f02d82 100644 --- a/table/block.h +++ b/table/block.h @@ -14,11 +14,12 @@ #include #endif +#include "db/dbformat.h" +#include "db/pinned_iterators_manager.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" -#include "db/dbformat.h" -#include "table/block_prefix_index.h" #include "table/block_hash_index.h" +#include "table/block_prefix_index.h" #include "table/internal_iterator.h" #include "format.h" @@ -151,15 +152,18 @@ class BlockIter : public InternalIterator { virtual void SeekToLast() override; - virtual Status PinData() override { - // block data is always pinned. - return Status::OK(); +#ifndef NDEBUG + ~BlockIter() { + // Assert that the BlockIter is never deleted while Pinning is Enabled. + assert(!pinned_iters_mgr_ || + (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); } - - virtual Status ReleasePinnedData() override { - // block data is always pinned. - return Status::OK(); + virtual void SetPinnedItersMgr( + PinnedIteratorsManager* pinned_iters_mgr) override { + pinned_iters_mgr_ = pinned_iters_mgr; } + PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; +#endif virtual bool IsKeyPinned() const override { return key_.IsKeyPinned(); } diff --git a/table/internal_iterator.h b/table/internal_iterator.h index d487b3124..d7bf73cac 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -12,6 +12,8 @@ namespace rocksdb { +class PinnedIteratorsManager; + class InternalIterator : public Cleanable { public: InternalIterator() {} @@ -61,20 +63,19 @@ class InternalIterator : public Cleanable { // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; - // Make sure that all current and future data blocks used by this iterator - // will be pinned in memory and will not be released except when - // ReleasePinnedData() is called or the iterator is deleted. - virtual Status PinData() { return Status::NotSupported(""); } - - // Release all blocks that were pinned because of PinData() and no future - // blocks will be pinned. - virtual Status ReleasePinnedData() { return Status::NotSupported(""); } + // Pass the PinnedIteratorsManager to the Iterator, most Iterators dont + // communicate with PinnedIteratorsManager so default implementation is no-op + // but for Iterators that need to communicate with PinnedIteratorsManager + // they will implement this function and use the passed pointer to communicate + // with PinnedIteratorsManager. + virtual void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {} - // If true, this means that the Slice returned by key() is valid as long - // as the iterator is not deleted and ReleasePinnedData() is not called. + // If true, this means that the Slice returned by key() is valid as long as + // PinnedIteratorsManager::ReleasePinnedIterators is not called and the + // Iterator is not deleted. // // IsKeyPinned() is guaranteed to always return true if - // - PinData() is called + // - Iterator is created with ReadOptions::pin_data = true // - DB tables were created with BlockBasedTableOptions::use_delta_encoding // set to false. virtual bool IsKeyPinned() const { return false; } diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 0a0a819d7..bf5cfa6e8 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -15,95 +15,47 @@ namespace rocksdb { -// A internal wrapper class with an interface similar to Iterator that -// caches the valid() and key() results for an underlying iterator. +// A internal wrapper class with an interface similar to Iterator that caches +// the valid(), key() and IsKeyPinned() results for an underlying iterator. // This can help avoid virtual function calls and also gives better // cache locality. class IteratorWrapper { public: - IteratorWrapper() : iter_(nullptr), iters_pinned_(false), valid_(false) {} - explicit IteratorWrapper(InternalIterator* _iter) - : iter_(nullptr), iters_pinned_(false) { + IteratorWrapper() : iter_(nullptr), valid_(false) {} + explicit IteratorWrapper(InternalIterator* _iter) : iter_(nullptr) { Set(_iter); } ~IteratorWrapper() {} InternalIterator* iter() const { return iter_; } - // Takes the ownership of "_iter" and will delete it when destroyed. - // Next call to Set() will destroy "_iter" except if PinData() was called. - void Set(InternalIterator* _iter) { - if (iters_pinned_ && iter_) { - // keep old iterator until ReleasePinnedData() is called - pinned_iters_.insert(iter_); - } else { - delete iter_; - } + // Set the underlying Iterator to _iter and return + // previous underlying Iterator. + InternalIterator* Set(InternalIterator* _iter) { + InternalIterator* old_iter = iter_; iter_ = _iter; if (iter_ == nullptr) { valid_ = false; } else { Update(); - if (iters_pinned_) { - // Pin new iterator - Status s = iter_->PinData(); - assert(s.ok()); - } - } - } - - Status PinData() { - Status s; - if (iters_pinned_) { - return s; - } - - if (iter_) { - s = iter_->PinData(); } - - if (s.ok()) { - iters_pinned_ = true; - } - - return s; - } - - Status ReleasePinnedData() { - Status s; - if (!iters_pinned_) { - return s; - } - - if (iter_) { - s = iter_->ReleasePinnedData(); - } - - if (s.ok()) { - iters_pinned_ = false; - // No need to call ReleasePinnedData() for pinned_iters_ - // since we will delete them - DeletePinnedIterators(false); - } - - return s; - } - - bool IsKeyPinned() const { - assert(iter_); - return iters_pinned_ && iter_->IsKeyPinned(); + return old_iter; } void DeleteIter(bool is_arena_mode) { - if (iter_ && pinned_iters_.find(iter_) == pinned_iters_.end()) { - DestroyIterator(iter_, is_arena_mode); + if (iter_) { + if (!is_arena_mode) { + delete iter_; + } else { + iter_->~InternalIterator(); + } } - DeletePinnedIterators(is_arena_mode); } // Iterator interface methods bool Valid() const { return valid_; } Slice key() const { assert(Valid()); return key_; } + bool IsKeyPinned() const { assert(Valid()); return is_key_pinned_; } Slice value() const { assert(Valid()); return iter_->value(); } // Methods below require iter() != nullptr Status status() const { assert(iter_); return iter_->status(); } @@ -112,39 +64,25 @@ class IteratorWrapper { void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } + void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) { + assert(iter_); + iter_->SetPinnedItersMgr(pinned_iters_mgr); + Update(); + } private: void Update() { valid_ = iter_->Valid(); if (valid_) { key_ = iter_->key(); - } - } - - void DeletePinnedIterators(bool is_arena_mode) { - for (auto it : pinned_iters_) { - DestroyIterator(it, is_arena_mode); - } - pinned_iters_.clear(); - } - - inline void DestroyIterator(InternalIterator* it, bool is_arena_mode) { - if (!is_arena_mode) { - delete it; - } else { - it->~InternalIterator(); + is_key_pinned_ = iter_->IsKeyPinned(); } } InternalIterator* iter_; - // If set to true, current and future iterators wont be deleted. - bool iters_pinned_; - // List of past iterators that are pinned and wont be deleted as long as - // iters_pinned_ is true. When we are pinning iterators this set will contain - // iterators of previous data blocks to keep them from being deleted. - std::set pinned_iters_; bool valid_; Slice key_; + bool is_key_pinned_; }; class Arena; diff --git a/table/merger.cc b/table/merger.cc index 1cc80ec8d..f28cbcd7b 100644 --- a/table/merger.cc +++ b/table/merger.cc @@ -11,6 +11,7 @@ #include +#include "db/pinned_iterators_manager.h" #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" @@ -18,11 +19,11 @@ #include "table/iter_heap.h" #include "table/iterator_wrapper.h" #include "util/arena.h" +#include "util/autovector.h" #include "util/heap.h" +#include "util/perf_context_imp.h" #include "util/stop_watch.h" #include "util/sync_point.h" -#include "util/perf_context_imp.h" -#include "util/autovector.h" namespace rocksdb { // Without anonymous namespace here, we fail the warning -Wmissing-prototypes @@ -37,12 +38,12 @@ class MergingIterator : public InternalIterator { public: MergingIterator(const Comparator* comparator, InternalIterator** children, int n, bool is_arena_mode) - : data_pinned_(false), - is_arena_mode_(is_arena_mode), + : is_arena_mode_(is_arena_mode), comparator_(comparator), current_(nullptr), direction_(kForward), - minHeap_(comparator_) { + minHeap_(comparator_), + pinned_iters_mgr_(nullptr) { children_.resize(n); for (int i = 0; i < n; i++) { children_[i].Set(children[i]); @@ -58,9 +59,8 @@ class MergingIterator : public InternalIterator { virtual void AddIterator(InternalIterator* iter) { assert(direction_ == kForward); children_.emplace_back(iter); - if (data_pinned_) { - Status s = iter->PinData(); - assert(s.ok()); + if (pinned_iters_mgr_) { + iter->SetPinnedItersMgr(pinned_iters_mgr_); } auto new_wrapper = children_.back(); if (new_wrapper.Valid()) { @@ -243,50 +243,21 @@ class MergingIterator : public InternalIterator { return s; } - virtual Status PinData() override { - Status s; - if (data_pinned_) { - return s; - } - - for (size_t i = 0; i < children_.size(); i++) { - s = children_[i].PinData(); - if (!s.ok()) { - // We failed to pin an iterator, clean up - for (size_t j = 0; j < i; j++) { - children_[j].ReleasePinnedData(); - } - break; - } - } - data_pinned_ = s.ok(); - return s; - } - - virtual Status ReleasePinnedData() override { - Status s; - if (!data_pinned_) { - return s; - } - + virtual void SetPinnedItersMgr( + PinnedIteratorsManager* pinned_iters_mgr) override { + pinned_iters_mgr_ = pinned_iters_mgr; for (auto& child : children_) { - Status release_status = child.ReleasePinnedData(); - if (s.ok() && !release_status.ok()) { - s = release_status; - } + child.SetPinnedItersMgr(pinned_iters_mgr); } - data_pinned_ = false; - - return s; } virtual bool IsKeyPinned() const override { assert(Valid()); - return current_->IsKeyPinned(); + return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && + current_->IsKeyPinned(); } private: - bool data_pinned_; // Clears heaps for both directions, used when changing direction or seeking void ClearHeaps(); // Ensures that maxHeap_ is initialized when starting to go in the reverse @@ -311,6 +282,7 @@ class MergingIterator : public InternalIterator { // Max heap is used for reverse iteration, which is way less common than // forward. Lazily initialize it to save memory. std::unique_ptr maxHeap_; + PinnedIteratorsManager* pinned_iters_mgr_; IteratorWrapper* CurrentForward() const { assert(direction_ == kForward); diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc index 2656b317a..0ed01321c 100644 --- a/table/two_level_iterator.cc +++ b/table/two_level_iterator.cc @@ -9,6 +9,7 @@ #include "table/two_level_iterator.h" +#include "db/pinned_iterators_manager.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block.h" @@ -26,6 +27,10 @@ class TwoLevelIterator : public InternalIterator { bool need_free_iter_and_state); virtual ~TwoLevelIterator() { + // Assert that the TwoLevelIterator is never deleted while Pinning is + // Enabled. + assert(!pinned_iters_mgr_ || + (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); first_level_iter_.DeleteIter(!need_free_iter_and_state_); second_level_iter_.DeleteIter(false); if (need_free_iter_and_state_) { @@ -61,12 +66,17 @@ class TwoLevelIterator : public InternalIterator { return status_; } } - virtual Status PinData() override { return second_level_iter_.PinData(); } - virtual Status ReleasePinnedData() override { - return second_level_iter_.ReleasePinnedData(); + virtual void SetPinnedItersMgr( + PinnedIteratorsManager* pinned_iters_mgr) override { + pinned_iters_mgr_ = pinned_iters_mgr; + first_level_iter_.SetPinnedItersMgr(pinned_iters_mgr); + if (second_level_iter_.iter()) { + second_level_iter_.SetPinnedItersMgr(pinned_iters_mgr); + } } virtual bool IsKeyPinned() const override { - return second_level_iter_.iter() ? second_level_iter_.IsKeyPinned() : false; + return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && + second_level_iter_.iter() && second_level_iter_.IsKeyPinned(); } private: @@ -82,6 +92,7 @@ class TwoLevelIterator : public InternalIterator { IteratorWrapper first_level_iter_; IteratorWrapper second_level_iter_; // May be nullptr bool need_free_iter_and_state_; + PinnedIteratorsManager* pinned_iters_mgr_; Status status_; // If second_level_iter is non-nullptr, then "data_block_handle_" holds the // "index_value" passed to block_function_ to create the second_level_iter. @@ -93,7 +104,8 @@ TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state, bool need_free_iter_and_state) : state_(state), first_level_iter_(first_level_iter), - need_free_iter_and_state_(need_free_iter_and_state) {} + need_free_iter_and_state_(need_free_iter_and_state), + pinned_iters_mgr_(nullptr) {} void TwoLevelIterator::Seek(const Slice& target) { if (state_->check_prefix_may_match && @@ -179,7 +191,17 @@ void TwoLevelIterator::SetSecondLevelIterator(InternalIterator* iter) { if (second_level_iter_.iter() != nullptr) { SaveError(second_level_iter_.status()); } - second_level_iter_.Set(iter); + + if (pinned_iters_mgr_ && iter) { + iter->SetPinnedItersMgr(pinned_iters_mgr_); + } + + InternalIterator* old_iter = second_level_iter_.Set(iter); + if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { + pinned_iters_mgr_->PinIteratorIfNeeded(old_iter); + } else { + delete old_iter; + } } void TwoLevelIterator::InitDataBlock() {