diff --git a/db/db_test.cc b/db/db_test.cc index 474ce4668..acccce201 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -6481,7 +6481,7 @@ TEST(DBTest, PrefixScan) { ASSERT_OK(iter->status()); delete iter; ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 11); + ASSERT_EQ(env_->random_read_counter_.Read(), 2); Close(); delete options.filter_policy; } diff --git a/db/table_cache.cc b/db/table_cache.cc index 8cecfdb59..3208b4692 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -193,7 +193,7 @@ Status TableCache::GetTableProperties( bool TableCache::PrefixMayMatch(const ReadOptions& options, const InternalKeyComparator& icomparator, const FileMetaData& file_meta, - const Slice& internal_prefix, bool* table_io) { + const Slice& internal_key, bool* table_io) { bool may_match = true; auto table_reader = file_meta.table_reader; Cache::Handle* table_handle = nullptr; @@ -207,7 +207,7 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options, table_reader = GetTableReaderFromHandle(table_handle); } - may_match = table_reader->PrefixMayMatch(internal_prefix); + may_match = table_reader->PrefixMayMatch(internal_key); if (table_handle != nullptr) { // Need to release handle if it is generated from here. diff --git a/db/version_set.cc b/db/version_set.cc index 280db327b..815021af6 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -217,49 +217,43 @@ class Version::LevelFileNumIterator : public Iterator { mutable EncodedFileMetaData current_value_; }; -static Iterator* GetFileIterator(void* arg, const ReadOptions& options, - const EnvOptions& soptions, - const InternalKeyComparator& icomparator, - const Slice& file_value, bool for_compaction) { - TableCache* cache = reinterpret_cast(arg); - if (file_value.size() != sizeof(EncodedFileMetaData)) { - return NewErrorIterator( - Status::Corruption("FileReader invoked with unexpected value")); - } else { - const EncodedFileMetaData* encoded_meta = - reinterpret_cast(file_value.data()); - FileMetaData meta(encoded_meta->number, encoded_meta->file_size); - meta.table_reader = encoded_meta->table_reader; - return cache->NewIterator(options, soptions, icomparator, meta, - nullptr /* don't need reference to table*/, for_compaction); +class Version::LevelFileIteratorState : public TwoLevelIteratorState { + public: + LevelFileIteratorState(TableCache* table_cache, + const ReadOptions& read_options, const EnvOptions& env_options, + const InternalKeyComparator& icomparator, bool for_compaction, + bool prefix_enabled) + : TwoLevelIteratorState(prefix_enabled), + table_cache_(table_cache), read_options_(read_options), + env_options_(env_options), icomparator_(icomparator), + for_compaction_(for_compaction) {} + + Iterator* NewSecondaryIterator(const Slice& meta_handle) override { + if (meta_handle.size() != sizeof(EncodedFileMetaData)) { + return NewErrorIterator( + Status::Corruption("FileReader invoked with unexpected value")); + } else { + const EncodedFileMetaData* encoded_meta = + reinterpret_cast(meta_handle.data()); + FileMetaData meta(encoded_meta->number, encoded_meta->file_size); + meta.table_reader = encoded_meta->table_reader; + return table_cache_->NewIterator(read_options_, env_options_, + icomparator_, meta, nullptr /* don't need reference to table*/, + for_compaction_); + } } -} -bool Version::PrefixMayMatch(const ReadOptions& options, - const EnvOptions& soptions, - const Slice& internal_prefix, - Iterator* level_iter) const { - bool may_match = true; - level_iter->Seek(internal_prefix); - if (!level_iter->Valid()) { - // we're past end of level - may_match = false; - } else if (ExtractUserKey(level_iter->key()).starts_with( - ExtractUserKey(internal_prefix))) { - // TODO(tylerharter): do we need this case? Or are we guaranteed - // key() will always be the biggest value for this SST? - may_match = true; - } else { - const EncodedFileMetaData* encoded_meta = - reinterpret_cast( - level_iter->value().data()); - FileMetaData meta(encoded_meta->number, encoded_meta->file_size); - meta.table_reader = encoded_meta->table_reader; - may_match = cfd_->table_cache()->PrefixMayMatch( - options, cfd_->internal_comparator(), meta, internal_prefix, nullptr); - } - return may_match; -} + bool PrefixMayMatch(const Slice& internal_key) override { + return true; + } + + private: + TableCache* table_cache_; + const ReadOptions read_options_; + const EnvOptions& env_options_; + const InternalKeyComparator& icomparator_; + bool for_compaction_; +}; Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { auto table_cache = cfd_->table_cache(); @@ -314,15 +308,6 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { return Status::OK(); } -Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, - const EnvOptions& soptions, - int level) const { - Iterator* level_iter = - new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]); - return NewTwoLevelIterator(level_iter, &GetFileIterator, cfd_->table_cache(), - options, soptions, cfd_->internal_comparator()); -} - void Version::AddIterators(const ReadOptions& options, const EnvOptions& soptions, std::vector* iters) { @@ -337,7 +322,11 @@ void Version::AddIterators(const ReadOptions& options, // lazily. for (int level = 1; level < num_levels_; level++) { if (!files_[level].empty()) { - iters->push_back(NewConcatenatingIterator(options, soptions, level)); + iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState( + cfd_->table_cache(), options, soptions, + cfd_->internal_comparator(), false /* for_compaction */, + cfd_->options()->prefix_extractor != nullptr), + new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]))); } } } @@ -2638,10 +2627,11 @@ void VersionSet::AddLiveFiles(std::vector* live_list) { } Iterator* VersionSet::MakeInputIterator(Compaction* c) { - ReadOptions options; - options.verify_checksums = - c->column_family_data()->options()->verify_checksums_in_compaction; - options.fill_cache = false; + auto cfd = c->column_family_data(); + ReadOptions read_options; + read_options.verify_checksums = + cfd->options()->verify_checksums_in_compaction; + read_options.fill_cache = false; // Level-0 files have to be merged together. For other levels, // we will make a concatenating iterator per level. @@ -2653,20 +2643,19 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { if (!c->inputs(which)->empty()) { if (c->level() + which == 0) { for (const auto& file : *c->inputs(which)) { - list[num++] = c->column_family_data()->table_cache()->NewIterator( - options, storage_options_compactions_, - c->column_family_data()->internal_comparator(), *file, nullptr, + list[num++] = cfd->table_cache()->NewIterator( + read_options, storage_options_compactions_, + cfd->internal_comparator(), *file, nullptr, true /* for compaction */); } } else { // Create concatenating iterator for the files from this level - list[num++] = NewTwoLevelIterator( - new Version::LevelFileNumIterator( - c->column_family_data()->internal_comparator(), - c->inputs(which)), - &GetFileIterator, c->column_family_data()->table_cache(), options, - storage_options_, c->column_family_data()->internal_comparator(), - true /* for compaction */); + list[num++] = NewTwoLevelIterator(new Version::LevelFileIteratorState( + cfd->table_cache(), read_options, storage_options_, + cfd->internal_comparator(), true /* for_compaction */, + false /* prefix enabled */), + new Version::LevelFileNumIterator(cfd->internal_comparator(), + c->inputs(which))); } } } diff --git a/db/version_set.h b/db/version_set.h index 8076e6bc6..c8297f8ec 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -219,11 +219,10 @@ class Version { friend class UniversalCompactionPicker; class LevelFileNumIterator; - Iterator* NewConcatenatingIterator(const ReadOptions&, - const EnvOptions& soptions, - int level) const; - bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions, - const Slice& internal_prefix, Iterator* level_iter) const; + struct LevelFileIteratorState; + + bool PrefixMayMatch(const ReadOptions& options, Iterator* level_iter, + const Slice& internal_prefix) const; // Sort all files for this version based on their file size and // record results in files_by_size_. The largest files are listed first. diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index d932a9281..f75726108 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -642,94 +642,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter ( rep->options, block.data, block.heap_allocated); } -// Convert an index iterator value (i.e., an encoded BlockHandle) -// into an iterator over the contents of the corresponding block. -Iterator* BlockBasedTable::DataBlockReader(void* arg, - const ReadOptions& options, - const Slice& index_value, - bool* didIO, bool for_compaction) { - const bool no_io = (options.read_tier == kBlockCacheTier); - BlockBasedTable* table = reinterpret_cast(arg); - Cache* block_cache = table->rep_->options.block_cache.get(); - Cache* block_cache_compressed = table->rep_->options. - block_cache_compressed.get(); - CachableEntry block; - - BlockHandle handle; - Slice input = index_value; - // We intentionally allow extra stuff in index_value so that we - // can add more features in the future. - Status s = handle.DecodeFrom(&input); - - if (!s.ok()) { - return NewErrorIterator(s); - } - - // If either block cache is enabled, we'll try to read from it. - if (block_cache != nullptr || block_cache_compressed != nullptr) { - Statistics* statistics = table->rep_->options.statistics.get(); - char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - Slice key, /* key to the block cache */ - ckey /* key to the compressed block cache */; - - // create key for block cache - if (block_cache != nullptr) { - key = GetCacheKey(table->rep_->cache_key_prefix, - table->rep_->cache_key_prefix_size, handle, cache_key); - } - - if (block_cache_compressed != nullptr) { - ckey = GetCacheKey(table->rep_->compressed_cache_key_prefix, - table->rep_->compressed_cache_key_prefix_size, handle, - compressed_cache_key); - } - - s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed, - statistics, options, &block); - - if (block.value == nullptr && !no_io && options.fill_cache) { - Histograms histogram = for_compaction ? - READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS; - Block* raw_block = nullptr; - { - StopWatch sw(table->rep_->options.env, statistics, histogram); - s = ReadBlockFromFile(table->rep_->file.get(), options, handle, - &raw_block, table->rep_->options.env, didIO, - block_cache_compressed == nullptr); - } - - if (s.ok()) { - s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed, - options, statistics, &block, raw_block); - } - } - } - - // Didn't get any data from block caches. - if (block.value == nullptr) { - if (no_io) { - // Could not read from block_cache and can't do IO - return NewErrorIterator(Status::Incomplete("no blocking io")); - } - s = ReadBlockFromFile(table->rep_->file.get(), options, handle, - &block.value, table->rep_->options.env, didIO); - } - - Iterator* iter; - if (block.value != nullptr) { - iter = block.value->NewIterator(&table->rep_->internal_comparator); - if (block.cache_handle != nullptr) { - iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, - block.cache_handle); - } else { - iter->RegisterCleanup(&DeleteHeldResource, block.value, nullptr); - } - } else { - iter = NewErrorIterator(s); - } - return iter; -} BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( bool no_io) const { @@ -838,13 +750,115 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) { return iter; } -Iterator* BlockBasedTable::DataBlockReader( - void* arg, const ReadOptions& options, const EnvOptions& soptions, - const InternalKeyComparator& icomparator, const Slice& index_value, - bool for_compaction) { - return DataBlockReader(arg, options, index_value, nullptr, for_compaction); +// Convert an index iterator value (i.e., an encoded BlockHandle) +// into an iterator over the contents of the corresponding block. +Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep, + const ReadOptions& ro, bool* didIO, const Slice& index_value) { + const bool no_io = (ro.read_tier == kBlockCacheTier); + Cache* block_cache = rep->options.block_cache.get(); + Cache* block_cache_compressed = rep->options. + block_cache_compressed.get(); + CachableEntry block; + + BlockHandle handle; + Slice input = index_value; + // We intentionally allow extra stuff in index_value so that we + // can add more features in the future. + Status s = handle.DecodeFrom(&input); + + if (!s.ok()) { + return NewErrorIterator(s); + } + + // If either block cache is enabled, we'll try to read from it. + if (block_cache != nullptr || block_cache_compressed != nullptr) { + Statistics* statistics = rep->options.statistics.get(); + char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; + char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; + Slice key, /* key to the block cache */ + ckey /* key to the compressed block cache */; + + // create key for block cache + if (block_cache != nullptr) { + key = GetCacheKey(rep->cache_key_prefix, + rep->cache_key_prefix_size, handle, cache_key); + } + + if (block_cache_compressed != nullptr) { + ckey = GetCacheKey(rep->compressed_cache_key_prefix, + rep->compressed_cache_key_prefix_size, handle, + compressed_cache_key); + } + + s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed, + statistics, ro, &block); + + if (block.value == nullptr && !no_io && ro.fill_cache) { + Histograms histogram = READ_BLOCK_GET_MICROS; + Block* raw_block = nullptr; + { + StopWatch sw(rep->options.env, statistics, histogram); + s = ReadBlockFromFile(rep->file.get(), ro, handle, + &raw_block, rep->options.env, didIO, + block_cache_compressed == nullptr); + } + + if (s.ok()) { + s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed, + ro, statistics, &block, raw_block); + } + } + } + + // Didn't get any data from block caches. + if (block.value == nullptr) { + if (no_io) { + // Could not read from block_cache and can't do IO + return NewErrorIterator(Status::Incomplete("no blocking io")); + } + s = ReadBlockFromFile(rep->file.get(), ro, handle, + &block.value, rep->options.env, didIO); + } + + Iterator* iter; + if (block.value != nullptr) { + iter = block.value->NewIterator(&rep->internal_comparator); + if (block.cache_handle != nullptr) { + iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, + block.cache_handle); + } else { + iter->RegisterCleanup(&DeleteHeldResource, block.value, nullptr); + } + } else { + iter = NewErrorIterator(s); + } + return iter; } +class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState { + public: + BlockEntryIteratorState(BlockBasedTable* table, + const ReadOptions& read_options, bool* did_io) + : TwoLevelIteratorState(table->rep_->options.prefix_extractor != nullptr), + table_(table), read_options_(read_options), did_io_(did_io) {} + + Iterator* NewSecondaryIterator(const Slice& index_value) override { + return NewDataBlockIterator(table_->rep_, read_options_, did_io_, + index_value); + } + + bool PrefixMayMatch(const Slice& internal_key) override { + return table_->PrefixMayMatch(internal_key); + } + + private: + // Don't own table_ + BlockBasedTable* table_; + const ReadOptions read_options_; + // Don't own did_io_ + bool* did_io_; +}; + // This will be broken if the user specifies an unusual implementation // of Options.comparator, or if the user specifies an unusual // definition of prefixes in Options.filter_policy. In particular, we @@ -857,7 +871,13 @@ Iterator* BlockBasedTable::DataBlockReader( // Otherwise, this method guarantees no I/O will be incurred. // // REQUIRES: this method shouldn't be called while the DB lock is held. -bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) { +bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) { + assert(rep_->options.prefix_extractor != nullptr); + auto prefix = rep_->options.prefix_extractor->Transform( + ExtractUserKey(internal_key)); + InternalKey internal_key_prefix(prefix, 0, kTypeValue); + auto internal_prefix = internal_key_prefix.Encode(); + bool may_match = true; Status s; @@ -918,11 +938,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) { return may_match; } -Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) { - return NewTwoLevelIterator(NewIndexIterator(options), - &BlockBasedTable::DataBlockReader, - const_cast(this), options, - rep_->soptions, rep_->internal_comparator); +Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options) { + return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options, + nullptr), + NewIndexIterator(read_options)); } Status BlockBasedTable::Get( @@ -953,7 +972,7 @@ Status BlockBasedTable::Get( } else { bool didIO = false; unique_ptr block_iter( - DataBlockReader(this, read_options, iiter->value(), &didIO)); + NewDataBlockIterator(rep_, read_options, &didIO, iiter->value())); if (read_options.read_tier && block_iter->status().IsIncomplete()) { // couldn't get block from block_cache @@ -1050,10 +1069,8 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) { return HashIndexReader::Create( file, index_handle, env, comparator, [&](Iterator* index_iter) { - return NewTwoLevelIterator( - index_iter, &BlockBasedTable::DataBlockReader, - const_cast(this), ReadOptions(), - rep_->soptions, rep_->internal_comparator); + return NewTwoLevelIterator(new BlockEntryIteratorState(this, + ReadOptions(), nullptr), index_iter); }, rep_->internal_prefix_transform.get(), index_reader); } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index d48c5d2c7..23f6cb149 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -63,7 +63,7 @@ class BlockBasedTable : public TableReader { unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader); - bool PrefixMayMatch(const Slice& internal_prefix) override; + bool PrefixMayMatch(const Slice& internal_key) override; // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must @@ -111,13 +111,9 @@ class BlockBasedTable : public TableReader { Rep* rep_; bool compaction_optimized_; - static Iterator* DataBlockReader(void*, const ReadOptions&, - const EnvOptions& soptions, - const InternalKeyComparator& icomparator, - const Slice&, bool for_compaction); - - static Iterator* DataBlockReader(void*, const ReadOptions&, const Slice&, - bool* didIO, bool for_compaction = false); + struct BlockEntryIteratorState; + static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro, + bool* didIO, const Slice& index_value); // For the following two functions: // if `no_io == true`, we will not try to read filter/index from sst file diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc index 65a58ad93..7c3c99d42 100644 --- a/table/two_level_iterator.cc +++ b/table/two_level_iterator.cc @@ -13,26 +13,17 @@ #include "rocksdb/table.h" #include "table/block.h" #include "table/format.h" -#include "table/iterator_wrapper.h" namespace rocksdb { namespace { -typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, - const EnvOptions& soptions, - const InternalKeyComparator& icomparator, - const Slice&, bool for_compaction); - class TwoLevelIterator: public Iterator { public: - TwoLevelIterator(Iterator* index_iter, BlockFunction block_function, - void* arg, const ReadOptions& options, - const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, - bool for_compaction); + explicit TwoLevelIterator(TwoLevelIteratorState* state, + Iterator* first_level_iter); - virtual ~TwoLevelIterator(); + virtual ~TwoLevelIterator() {} virtual void Seek(const Slice& target); virtual void SeekToFirst(); @@ -41,22 +32,23 @@ class TwoLevelIterator: public Iterator { virtual void Prev(); virtual bool Valid() const { - return data_iter_.Valid(); + return second_level_iter_.Valid(); } virtual Slice key() const { assert(Valid()); - return data_iter_.key(); + return second_level_iter_.key(); } virtual Slice value() const { assert(Valid()); - return data_iter_.value(); + return second_level_iter_.value(); } virtual Status status() const { // It'd be nice if status() returned a const Status& instead of a Status - if (!index_iter_.status().ok()) { - return index_iter_.status(); - } else if (data_iter_.iter() != nullptr && !data_iter_.status().ok()) { - return data_iter_.status(); + if (!first_level_iter_.status().ok()) { + return first_level_iter_.status(); + } else if (second_level_iter_.iter() != nullptr && + !second_level_iter_.status().ok()) { + return second_level_iter_.status(); } else { return status_; } @@ -68,135 +60,129 @@ class TwoLevelIterator: public Iterator { } void SkipEmptyDataBlocksForward(); void SkipEmptyDataBlocksBackward(); - void SetDataIterator(Iterator* data_iter); + void SetSecondLevelIterator(Iterator* iter); void InitDataBlock(); - BlockFunction block_function_; - void* arg_; - const ReadOptions options_; - const EnvOptions& soptions_; - const InternalKeyComparator& internal_comparator_; + std::unique_ptr state_; + IteratorWrapper first_level_iter_; + IteratorWrapper second_level_iter_; // May be nullptr Status status_; - IteratorWrapper index_iter_; - IteratorWrapper data_iter_; // May be nullptr - // If data_iter_ is non-nullptr, then "data_block_handle_" holds the - // "index_value" passed to block_function_ to create the data_iter_. + // If second_level_iter is non-nullptr, then "data_block_handle_" holds the + // "index_value" passed to block_function_ to create the second_level_iter. std::string data_block_handle_; - bool for_compaction_; }; -TwoLevelIterator::TwoLevelIterator( - Iterator* index_iter, BlockFunction block_function, void* arg, - const ReadOptions& options, const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, bool for_compaction) - : block_function_(block_function), - arg_(arg), - options_(options), - soptions_(soptions), - internal_comparator_(internal_comparator), - index_iter_(index_iter), - data_iter_(nullptr), - for_compaction_(for_compaction) {} - -TwoLevelIterator::~TwoLevelIterator() { -} +TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state, + Iterator* first_level_iter) + : state_(state), first_level_iter_(first_level_iter) {} void TwoLevelIterator::Seek(const Slice& target) { - index_iter_.Seek(target); - InitDataBlock(); - if (data_iter_.iter() != nullptr) data_iter_.Seek(target); - SkipEmptyDataBlocksForward(); + if (state_->prefix_enabled && !state_->PrefixMayMatch(target)) { + SetSecondLevelIterator(nullptr); + } else { + first_level_iter_.Seek(target); + InitDataBlock(); + if (second_level_iter_.iter() != nullptr) { + second_level_iter_.Seek(target); + } + SkipEmptyDataBlocksForward(); + } } void TwoLevelIterator::SeekToFirst() { - index_iter_.SeekToFirst(); + first_level_iter_.SeekToFirst(); InitDataBlock(); - if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst(); + if (second_level_iter_.iter() != nullptr) { + second_level_iter_.SeekToFirst(); + } SkipEmptyDataBlocksForward(); } void TwoLevelIterator::SeekToLast() { - index_iter_.SeekToLast(); + first_level_iter_.SeekToLast(); InitDataBlock(); - if (data_iter_.iter() != nullptr) data_iter_.SeekToLast(); + if (second_level_iter_.iter() != nullptr) { + second_level_iter_.SeekToLast(); + } SkipEmptyDataBlocksBackward(); } void TwoLevelIterator::Next() { assert(Valid()); - data_iter_.Next(); + second_level_iter_.Next(); SkipEmptyDataBlocksForward(); } void TwoLevelIterator::Prev() { assert(Valid()); - data_iter_.Prev(); + second_level_iter_.Prev(); SkipEmptyDataBlocksBackward(); } void TwoLevelIterator::SkipEmptyDataBlocksForward() { - while (data_iter_.iter() == nullptr || (!data_iter_.Valid() && - !data_iter_.status().IsIncomplete())) { + while (second_level_iter_.iter() == nullptr || + (!second_level_iter_.Valid() && + !second_level_iter_.status().IsIncomplete())) { // Move to next block - if (!index_iter_.Valid()) { - SetDataIterator(nullptr); + if (!first_level_iter_.Valid()) { + SetSecondLevelIterator(nullptr); return; } - index_iter_.Next(); + first_level_iter_.Next(); InitDataBlock(); - if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst(); + if (second_level_iter_.iter() != nullptr) { + second_level_iter_.SeekToFirst(); + } } } void TwoLevelIterator::SkipEmptyDataBlocksBackward() { - while (data_iter_.iter() == nullptr || (!data_iter_.Valid() && - !data_iter_.status().IsIncomplete())) { + while (second_level_iter_.iter() == nullptr || + (!second_level_iter_.Valid() && + !second_level_iter_.status().IsIncomplete())) { // Move to next block - if (!index_iter_.Valid()) { - SetDataIterator(nullptr); + if (!first_level_iter_.Valid()) { + SetSecondLevelIterator(nullptr); return; } - index_iter_.Prev(); + first_level_iter_.Prev(); InitDataBlock(); - if (data_iter_.iter() != nullptr) data_iter_.SeekToLast(); + if (second_level_iter_.iter() != nullptr) { + second_level_iter_.SeekToLast(); + } } } -void TwoLevelIterator::SetDataIterator(Iterator* data_iter) { - if (data_iter_.iter() != nullptr) SaveError(data_iter_.status()); - data_iter_.Set(data_iter); +void TwoLevelIterator::SetSecondLevelIterator(Iterator* iter) { + if (second_level_iter_.iter() != nullptr) { + SaveError(second_level_iter_.status()); + } + second_level_iter_.Set(iter); } void TwoLevelIterator::InitDataBlock() { - if (!index_iter_.Valid()) { - SetDataIterator(nullptr); + if (!first_level_iter_.Valid()) { + SetSecondLevelIterator(nullptr); } else { - Slice handle = index_iter_.value(); - if (data_iter_.iter() != nullptr + Slice handle = first_level_iter_.value(); + if (second_level_iter_.iter() != nullptr && handle.compare(data_block_handle_) == 0) { - // data_iter_ is already constructed with this iterator, so + // second_level_iter is already constructed with this iterator, so // no need to change anything } else { - Iterator* iter = - (*block_function_)(arg_, options_, soptions_, internal_comparator_, - handle, for_compaction_); + Iterator* iter = state_->NewSecondaryIterator(handle); data_block_handle_.assign(handle.data(), handle.size()); - SetDataIterator(iter); + SetSecondLevelIterator(iter); } } } } // namespace -Iterator* NewTwoLevelIterator(Iterator* index_iter, - BlockFunction block_function, void* arg, - const ReadOptions& options, - const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, - bool for_compaction) { - return new TwoLevelIterator(index_iter, block_function, arg, options, - soptions, internal_comparator, for_compaction); +Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state, + Iterator* first_level_iter) { + return new TwoLevelIterator(state, first_level_iter); } } // namespace rocksdb diff --git a/table/two_level_iterator.h b/table/two_level_iterator.h index d313dcb18..ac9f3d6a0 100644 --- a/table/two_level_iterator.h +++ b/table/two_level_iterator.h @@ -10,12 +10,25 @@ #pragma once #include "rocksdb/iterator.h" #include "rocksdb/env.h" +#include "table/iterator_wrapper.h" namespace rocksdb { struct ReadOptions; class InternalKeyComparator; +struct TwoLevelIteratorState { + explicit TwoLevelIteratorState(bool prefix_enabled) + : prefix_enabled(prefix_enabled) {} + + virtual ~TwoLevelIteratorState() {} + virtual Iterator* NewSecondaryIterator(const Slice& handle) = 0; + virtual bool PrefixMayMatch(const Slice& internal_key) = 0; + + bool prefix_enabled; +}; + + // Return a new two level iterator. A two-level iterator contains an // index iterator whose values point to a sequence of blocks where // each block is itself a sequence of key,value pairs. The returned @@ -25,14 +38,7 @@ class InternalKeyComparator; // // Uses a supplied function to convert an index_iter value into // an iterator over the contents of the corresponding block. -extern Iterator* NewTwoLevelIterator( - Iterator* index_iter, - Iterator* (*block_function)( - void* arg, const ReadOptions& options, const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, - const Slice& index_value, bool for_compaction), - void* arg, const ReadOptions& options, const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, - bool for_compaction = false); +extern Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state, + Iterator* first_level_iter); } // namespace rocksdb