Check PrefixMayMatch on Seek()

Summary:
As a follow-up diff for https://reviews.facebook.net/D17805, add
optimization to check PrefixMayMatch on Seek()

Test Plan: make all check

Reviewers: igor, haobo, sdong, yhchiang, dhruba

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D17853
main
Lei Jin 10 years ago
parent 3995e801ab
commit d642c60bdc
  1. 2
      db/db_test.cc
  2. 4
      db/table_cache.cc
  3. 119
      db/version_set.cc
  4. 9
      db/version_set.h
  5. 225
      table/block_based_table_reader.cc
  6. 12
      table/block_based_table_reader.h
  7. 160
      table/two_level_iterator.cc
  8. 24
      table/two_level_iterator.h

@ -6481,7 +6481,7 @@ TEST(DBTest, PrefixScan) {
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
Close();
delete options.filter_policy;
}

@ -193,7 +193,7 @@ Status TableCache::GetTableProperties(
bool TableCache::PrefixMayMatch(const ReadOptions& options,
const InternalKeyComparator& icomparator,
const FileMetaData& file_meta,
const Slice& internal_prefix, bool* table_io) {
const Slice& internal_key, bool* table_io) {
bool may_match = true;
auto table_reader = file_meta.table_reader;
Cache::Handle* table_handle = nullptr;
@ -207,7 +207,7 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
table_reader = GetTableReaderFromHandle(table_handle);
}
may_match = table_reader->PrefixMayMatch(internal_prefix);
may_match = table_reader->PrefixMayMatch(internal_key);
if (table_handle != nullptr) {
// Need to release handle if it is generated from here.

@ -217,49 +217,43 @@ class Version::LevelFileNumIterator : public Iterator {
mutable EncodedFileMetaData current_value_;
};
static Iterator* GetFileIterator(void* arg, const ReadOptions& options,
const EnvOptions& soptions,
const InternalKeyComparator& icomparator,
const Slice& file_value, bool for_compaction) {
TableCache* cache = reinterpret_cast<TableCache*>(arg);
if (file_value.size() != sizeof(EncodedFileMetaData)) {
return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value"));
} else {
const EncodedFileMetaData* encoded_meta =
reinterpret_cast<const EncodedFileMetaData*>(file_value.data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
meta.table_reader = encoded_meta->table_reader;
return cache->NewIterator(options, soptions, icomparator, meta,
nullptr /* don't need reference to table*/, for_compaction);
class Version::LevelFileIteratorState : public TwoLevelIteratorState {
public:
LevelFileIteratorState(TableCache* table_cache,
const ReadOptions& read_options, const EnvOptions& env_options,
const InternalKeyComparator& icomparator, bool for_compaction,
bool prefix_enabled)
: TwoLevelIteratorState(prefix_enabled),
table_cache_(table_cache), read_options_(read_options),
env_options_(env_options), icomparator_(icomparator),
for_compaction_(for_compaction) {}
Iterator* NewSecondaryIterator(const Slice& meta_handle) override {
if (meta_handle.size() != sizeof(EncodedFileMetaData)) {
return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value"));
} else {
const EncodedFileMetaData* encoded_meta =
reinterpret_cast<const EncodedFileMetaData*>(meta_handle.data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
meta.table_reader = encoded_meta->table_reader;
return table_cache_->NewIterator(read_options_, env_options_,
icomparator_, meta, nullptr /* don't need reference to table*/,
for_compaction_);
}
}
}
bool Version::PrefixMayMatch(const ReadOptions& options,
const EnvOptions& soptions,
const Slice& internal_prefix,
Iterator* level_iter) const {
bool may_match = true;
level_iter->Seek(internal_prefix);
if (!level_iter->Valid()) {
// we're past end of level
may_match = false;
} else if (ExtractUserKey(level_iter->key()).starts_with(
ExtractUserKey(internal_prefix))) {
// TODO(tylerharter): do we need this case? Or are we guaranteed
// key() will always be the biggest value for this SST?
may_match = true;
} else {
const EncodedFileMetaData* encoded_meta =
reinterpret_cast<const EncodedFileMetaData*>(
level_iter->value().data());
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
meta.table_reader = encoded_meta->table_reader;
may_match = cfd_->table_cache()->PrefixMayMatch(
options, cfd_->internal_comparator(), meta, internal_prefix, nullptr);
}
return may_match;
}
bool PrefixMayMatch(const Slice& internal_key) override {
return true;
}
private:
TableCache* table_cache_;
const ReadOptions read_options_;
const EnvOptions& env_options_;
const InternalKeyComparator& icomparator_;
bool for_compaction_;
};
Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
auto table_cache = cfd_->table_cache();
@ -314,15 +308,6 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
return Status::OK();
}
Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
const EnvOptions& soptions,
int level) const {
Iterator* level_iter =
new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]);
return NewTwoLevelIterator(level_iter, &GetFileIterator, cfd_->table_cache(),
options, soptions, cfd_->internal_comparator());
}
void Version::AddIterators(const ReadOptions& options,
const EnvOptions& soptions,
std::vector<Iterator*>* iters) {
@ -337,7 +322,11 @@ void Version::AddIterators(const ReadOptions& options,
// lazily.
for (int level = 1; level < num_levels_; level++) {
if (!files_[level].empty()) {
iters->push_back(NewConcatenatingIterator(options, soptions, level));
iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState(
cfd_->table_cache(), options, soptions,
cfd_->internal_comparator(), false /* for_compaction */,
cfd_->options()->prefix_extractor != nullptr),
new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level])));
}
}
}
@ -2638,10 +2627,11 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_list) {
}
Iterator* VersionSet::MakeInputIterator(Compaction* c) {
ReadOptions options;
options.verify_checksums =
c->column_family_data()->options()->verify_checksums_in_compaction;
options.fill_cache = false;
auto cfd = c->column_family_data();
ReadOptions read_options;
read_options.verify_checksums =
cfd->options()->verify_checksums_in_compaction;
read_options.fill_cache = false;
// Level-0 files have to be merged together. For other levels,
// we will make a concatenating iterator per level.
@ -2653,20 +2643,19 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
if (!c->inputs(which)->empty()) {
if (c->level() + which == 0) {
for (const auto& file : *c->inputs(which)) {
list[num++] = c->column_family_data()->table_cache()->NewIterator(
options, storage_options_compactions_,
c->column_family_data()->internal_comparator(), *file, nullptr,
list[num++] = cfd->table_cache()->NewIterator(
read_options, storage_options_compactions_,
cfd->internal_comparator(), *file, nullptr,
true /* for compaction */);
}
} else {
// Create concatenating iterator for the files from this level
list[num++] = NewTwoLevelIterator(
new Version::LevelFileNumIterator(
c->column_family_data()->internal_comparator(),
c->inputs(which)),
&GetFileIterator, c->column_family_data()->table_cache(), options,
storage_options_, c->column_family_data()->internal_comparator(),
true /* for compaction */);
list[num++] = NewTwoLevelIterator(new Version::LevelFileIteratorState(
cfd->table_cache(), read_options, storage_options_,
cfd->internal_comparator(), true /* for_compaction */,
false /* prefix enabled */),
new Version::LevelFileNumIterator(cfd->internal_comparator(),
c->inputs(which)));
}
}
}

@ -219,11 +219,10 @@ class Version {
friend class UniversalCompactionPicker;
class LevelFileNumIterator;
Iterator* NewConcatenatingIterator(const ReadOptions&,
const EnvOptions& soptions,
int level) const;
bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions,
const Slice& internal_prefix, Iterator* level_iter) const;
struct LevelFileIteratorState;
bool PrefixMayMatch(const ReadOptions& options, Iterator* level_iter,
const Slice& internal_prefix) const;
// Sort all files for this version based on their file size and
// record results in files_by_size_. The largest files are listed first.

@ -642,94 +642,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter (
rep->options, block.data, block.heap_allocated);
}
// Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block.
Iterator* BlockBasedTable::DataBlockReader(void* arg,
const ReadOptions& options,
const Slice& index_value,
bool* didIO, bool for_compaction) {
const bool no_io = (options.read_tier == kBlockCacheTier);
BlockBasedTable* table = reinterpret_cast<BlockBasedTable*>(arg);
Cache* block_cache = table->rep_->options.block_cache.get();
Cache* block_cache_compressed = table->rep_->options.
block_cache_compressed.get();
CachableEntry<Block> block;
BlockHandle handle;
Slice input = index_value;
// We intentionally allow extra stuff in index_value so that we
// can add more features in the future.
Status s = handle.DecodeFrom(&input);
if (!s.ok()) {
return NewErrorIterator(s);
}
// If either block cache is enabled, we'll try to read from it.
if (block_cache != nullptr || block_cache_compressed != nullptr) {
Statistics* statistics = table->rep_->options.statistics.get();
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
Slice key, /* key to the block cache */
ckey /* key to the compressed block cache */;
// create key for block cache
if (block_cache != nullptr) {
key = GetCacheKey(table->rep_->cache_key_prefix,
table->rep_->cache_key_prefix_size, handle, cache_key);
}
if (block_cache_compressed != nullptr) {
ckey = GetCacheKey(table->rep_->compressed_cache_key_prefix,
table->rep_->compressed_cache_key_prefix_size, handle,
compressed_cache_key);
}
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
statistics, options, &block);
if (block.value == nullptr && !no_io && options.fill_cache) {
Histograms histogram = for_compaction ?
READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
Block* raw_block = nullptr;
{
StopWatch sw(table->rep_->options.env, statistics, histogram);
s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
&raw_block, table->rep_->options.env, didIO,
block_cache_compressed == nullptr);
}
if (s.ok()) {
s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
options, statistics, &block, raw_block);
}
}
}
// Didn't get any data from block caches.
if (block.value == nullptr) {
if (no_io) {
// Could not read from block_cache and can't do IO
return NewErrorIterator(Status::Incomplete("no blocking io"));
}
s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
&block.value, table->rep_->options.env, didIO);
}
Iterator* iter;
if (block.value != nullptr) {
iter = block.value->NewIterator(&table->rep_->internal_comparator);
if (block.cache_handle != nullptr) {
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
block.cache_handle);
} else {
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
}
} else {
iter = NewErrorIterator(s);
}
return iter;
}
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
bool no_io) const {
@ -838,13 +750,115 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
return iter;
}
Iterator* BlockBasedTable::DataBlockReader(
void* arg, const ReadOptions& options, const EnvOptions& soptions,
const InternalKeyComparator& icomparator, const Slice& index_value,
bool for_compaction) {
return DataBlockReader(arg, options, index_value, nullptr, for_compaction);
// Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block.
Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
const ReadOptions& ro, bool* didIO, const Slice& index_value) {
const bool no_io = (ro.read_tier == kBlockCacheTier);
Cache* block_cache = rep->options.block_cache.get();
Cache* block_cache_compressed = rep->options.
block_cache_compressed.get();
CachableEntry<Block> block;
BlockHandle handle;
Slice input = index_value;
// We intentionally allow extra stuff in index_value so that we
// can add more features in the future.
Status s = handle.DecodeFrom(&input);
if (!s.ok()) {
return NewErrorIterator(s);
}
// If either block cache is enabled, we'll try to read from it.
if (block_cache != nullptr || block_cache_compressed != nullptr) {
Statistics* statistics = rep->options.statistics.get();
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
Slice key, /* key to the block cache */
ckey /* key to the compressed block cache */;
// create key for block cache
if (block_cache != nullptr) {
key = GetCacheKey(rep->cache_key_prefix,
rep->cache_key_prefix_size, handle, cache_key);
}
if (block_cache_compressed != nullptr) {
ckey = GetCacheKey(rep->compressed_cache_key_prefix,
rep->compressed_cache_key_prefix_size, handle,
compressed_cache_key);
}
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
statistics, ro, &block);
if (block.value == nullptr && !no_io && ro.fill_cache) {
Histograms histogram = READ_BLOCK_GET_MICROS;
Block* raw_block = nullptr;
{
StopWatch sw(rep->options.env, statistics, histogram);
s = ReadBlockFromFile(rep->file.get(), ro, handle,
&raw_block, rep->options.env, didIO,
block_cache_compressed == nullptr);
}
if (s.ok()) {
s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
ro, statistics, &block, raw_block);
}
}
}
// Didn't get any data from block caches.
if (block.value == nullptr) {
if (no_io) {
// Could not read from block_cache and can't do IO
return NewErrorIterator(Status::Incomplete("no blocking io"));
}
s = ReadBlockFromFile(rep->file.get(), ro, handle,
&block.value, rep->options.env, didIO);
}
Iterator* iter;
if (block.value != nullptr) {
iter = block.value->NewIterator(&rep->internal_comparator);
if (block.cache_handle != nullptr) {
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
block.cache_handle);
} else {
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
}
} else {
iter = NewErrorIterator(s);
}
return iter;
}
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
public:
BlockEntryIteratorState(BlockBasedTable* table,
const ReadOptions& read_options, bool* did_io)
: TwoLevelIteratorState(table->rep_->options.prefix_extractor != nullptr),
table_(table), read_options_(read_options), did_io_(did_io) {}
Iterator* NewSecondaryIterator(const Slice& index_value) override {
return NewDataBlockIterator(table_->rep_, read_options_, did_io_,
index_value);
}
bool PrefixMayMatch(const Slice& internal_key) override {
return table_->PrefixMayMatch(internal_key);
}
private:
// Don't own table_
BlockBasedTable* table_;
const ReadOptions read_options_;
// Don't own did_io_
bool* did_io_;
};
// This will be broken if the user specifies an unusual implementation
// of Options.comparator, or if the user specifies an unusual
// definition of prefixes in Options.filter_policy. In particular, we
@ -857,7 +871,13 @@ Iterator* BlockBasedTable::DataBlockReader(
// Otherwise, this method guarantees no I/O will be incurred.
//
// REQUIRES: this method shouldn't be called while the DB lock is held.
bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
assert(rep_->options.prefix_extractor != nullptr);
auto prefix = rep_->options.prefix_extractor->Transform(
ExtractUserKey(internal_key));
InternalKey internal_key_prefix(prefix, 0, kTypeValue);
auto internal_prefix = internal_key_prefix.Encode();
bool may_match = true;
Status s;
@ -918,11 +938,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
return may_match;
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
return NewTwoLevelIterator(NewIndexIterator(options),
&BlockBasedTable::DataBlockReader,
const_cast<BlockBasedTable*>(this), options,
rep_->soptions, rep_->internal_comparator);
Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options) {
return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options,
nullptr),
NewIndexIterator(read_options));
}
Status BlockBasedTable::Get(
@ -953,7 +972,7 @@ Status BlockBasedTable::Get(
} else {
bool didIO = false;
unique_ptr<Iterator> block_iter(
DataBlockReader(this, read_options, iiter->value(), &didIO));
NewDataBlockIterator(rep_, read_options, &didIO, iiter->value()));
if (read_options.read_tier && block_iter->status().IsIncomplete()) {
// couldn't get block from block_cache
@ -1050,10 +1069,8 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
return HashIndexReader::Create(
file, index_handle, env, comparator,
[&](Iterator* index_iter) {
return NewTwoLevelIterator(
index_iter, &BlockBasedTable::DataBlockReader,
const_cast<BlockBasedTable*>(this), ReadOptions(),
rep_->soptions, rep_->internal_comparator);
return NewTwoLevelIterator(new BlockEntryIteratorState(this,
ReadOptions(), nullptr), index_iter);
},
rep_->internal_prefix_transform.get(), index_reader);
}

@ -63,7 +63,7 @@ class BlockBasedTable : public TableReader {
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
bool PrefixMayMatch(const Slice& internal_key) override;
// Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must
@ -111,13 +111,9 @@ class BlockBasedTable : public TableReader {
Rep* rep_;
bool compaction_optimized_;
static Iterator* DataBlockReader(void*, const ReadOptions&,
const EnvOptions& soptions,
const InternalKeyComparator& icomparator,
const Slice&, bool for_compaction);
static Iterator* DataBlockReader(void*, const ReadOptions&, const Slice&,
bool* didIO, bool for_compaction = false);
struct BlockEntryIteratorState;
static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
bool* didIO, const Slice& index_value);
// For the following two functions:
// if `no_io == true`, we will not try to read filter/index from sst file

@ -13,26 +13,17 @@
#include "rocksdb/table.h"
#include "table/block.h"
#include "table/format.h"
#include "table/iterator_wrapper.h"
namespace rocksdb {
namespace {
typedef Iterator* (*BlockFunction)(void*, const ReadOptions&,
const EnvOptions& soptions,
const InternalKeyComparator& icomparator,
const Slice&, bool for_compaction);
class TwoLevelIterator: public Iterator {
public:
TwoLevelIterator(Iterator* index_iter, BlockFunction block_function,
void* arg, const ReadOptions& options,
const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
bool for_compaction);
explicit TwoLevelIterator(TwoLevelIteratorState* state,
Iterator* first_level_iter);
virtual ~TwoLevelIterator();
virtual ~TwoLevelIterator() {}
virtual void Seek(const Slice& target);
virtual void SeekToFirst();
@ -41,22 +32,23 @@ class TwoLevelIterator: public Iterator {
virtual void Prev();
virtual bool Valid() const {
return data_iter_.Valid();
return second_level_iter_.Valid();
}
virtual Slice key() const {
assert(Valid());
return data_iter_.key();
return second_level_iter_.key();
}
virtual Slice value() const {
assert(Valid());
return data_iter_.value();
return second_level_iter_.value();
}
virtual Status status() const {
// It'd be nice if status() returned a const Status& instead of a Status
if (!index_iter_.status().ok()) {
return index_iter_.status();
} else if (data_iter_.iter() != nullptr && !data_iter_.status().ok()) {
return data_iter_.status();
if (!first_level_iter_.status().ok()) {
return first_level_iter_.status();
} else if (second_level_iter_.iter() != nullptr &&
!second_level_iter_.status().ok()) {
return second_level_iter_.status();
} else {
return status_;
}
@ -68,135 +60,129 @@ class TwoLevelIterator: public Iterator {
}
void SkipEmptyDataBlocksForward();
void SkipEmptyDataBlocksBackward();
void SetDataIterator(Iterator* data_iter);
void SetSecondLevelIterator(Iterator* iter);
void InitDataBlock();
BlockFunction block_function_;
void* arg_;
const ReadOptions options_;
const EnvOptions& soptions_;
const InternalKeyComparator& internal_comparator_;
std::unique_ptr<TwoLevelIteratorState> state_;
IteratorWrapper first_level_iter_;
IteratorWrapper second_level_iter_; // May be nullptr
Status status_;
IteratorWrapper index_iter_;
IteratorWrapper data_iter_; // May be nullptr
// If data_iter_ is non-nullptr, then "data_block_handle_" holds the
// "index_value" passed to block_function_ to create the data_iter_.
// If second_level_iter is non-nullptr, then "data_block_handle_" holds the
// "index_value" passed to block_function_ to create the second_level_iter.
std::string data_block_handle_;
bool for_compaction_;
};
TwoLevelIterator::TwoLevelIterator(
Iterator* index_iter, BlockFunction block_function, void* arg,
const ReadOptions& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, bool for_compaction)
: block_function_(block_function),
arg_(arg),
options_(options),
soptions_(soptions),
internal_comparator_(internal_comparator),
index_iter_(index_iter),
data_iter_(nullptr),
for_compaction_(for_compaction) {}
TwoLevelIterator::~TwoLevelIterator() {
}
TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state,
Iterator* first_level_iter)
: state_(state), first_level_iter_(first_level_iter) {}
void TwoLevelIterator::Seek(const Slice& target) {
index_iter_.Seek(target);
InitDataBlock();
if (data_iter_.iter() != nullptr) data_iter_.Seek(target);
SkipEmptyDataBlocksForward();
if (state_->prefix_enabled && !state_->PrefixMayMatch(target)) {
SetSecondLevelIterator(nullptr);
} else {
first_level_iter_.Seek(target);
InitDataBlock();
if (second_level_iter_.iter() != nullptr) {
second_level_iter_.Seek(target);
}
SkipEmptyDataBlocksForward();
}
}
void TwoLevelIterator::SeekToFirst() {
index_iter_.SeekToFirst();
first_level_iter_.SeekToFirst();
InitDataBlock();
if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
if (second_level_iter_.iter() != nullptr) {
second_level_iter_.SeekToFirst();
}
SkipEmptyDataBlocksForward();
}
void TwoLevelIterator::SeekToLast() {
index_iter_.SeekToLast();
first_level_iter_.SeekToLast();
InitDataBlock();
if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
if (second_level_iter_.iter() != nullptr) {
second_level_iter_.SeekToLast();
}
SkipEmptyDataBlocksBackward();
}
void TwoLevelIterator::Next() {
assert(Valid());
data_iter_.Next();
second_level_iter_.Next();
SkipEmptyDataBlocksForward();
}
void TwoLevelIterator::Prev() {
assert(Valid());
data_iter_.Prev();
second_level_iter_.Prev();
SkipEmptyDataBlocksBackward();
}
void TwoLevelIterator::SkipEmptyDataBlocksForward() {
while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
!data_iter_.status().IsIncomplete())) {
while (second_level_iter_.iter() == nullptr ||
(!second_level_iter_.Valid() &&
!second_level_iter_.status().IsIncomplete())) {
// Move to next block
if (!index_iter_.Valid()) {
SetDataIterator(nullptr);
if (!first_level_iter_.Valid()) {
SetSecondLevelIterator(nullptr);
return;
}
index_iter_.Next();
first_level_iter_.Next();
InitDataBlock();
if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
if (second_level_iter_.iter() != nullptr) {
second_level_iter_.SeekToFirst();
}
}
}
void TwoLevelIterator::SkipEmptyDataBlocksBackward() {
while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
!data_iter_.status().IsIncomplete())) {
while (second_level_iter_.iter() == nullptr ||
(!second_level_iter_.Valid() &&
!second_level_iter_.status().IsIncomplete())) {
// Move to next block
if (!index_iter_.Valid()) {
SetDataIterator(nullptr);
if (!first_level_iter_.Valid()) {
SetSecondLevelIterator(nullptr);
return;
}
index_iter_.Prev();
first_level_iter_.Prev();
InitDataBlock();
if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
if (second_level_iter_.iter() != nullptr) {
second_level_iter_.SeekToLast();
}
}
}
void TwoLevelIterator::SetDataIterator(Iterator* data_iter) {
if (data_iter_.iter() != nullptr) SaveError(data_iter_.status());
data_iter_.Set(data_iter);
void TwoLevelIterator::SetSecondLevelIterator(Iterator* iter) {
if (second_level_iter_.iter() != nullptr) {
SaveError(second_level_iter_.status());
}
second_level_iter_.Set(iter);
}
void TwoLevelIterator::InitDataBlock() {
if (!index_iter_.Valid()) {
SetDataIterator(nullptr);
if (!first_level_iter_.Valid()) {
SetSecondLevelIterator(nullptr);
} else {
Slice handle = index_iter_.value();
if (data_iter_.iter() != nullptr
Slice handle = first_level_iter_.value();
if (second_level_iter_.iter() != nullptr
&& handle.compare(data_block_handle_) == 0) {
// data_iter_ is already constructed with this iterator, so
// second_level_iter is already constructed with this iterator, so
// no need to change anything
} else {
Iterator* iter =
(*block_function_)(arg_, options_, soptions_, internal_comparator_,
handle, for_compaction_);
Iterator* iter = state_->NewSecondaryIterator(handle);
data_block_handle_.assign(handle.data(), handle.size());
SetDataIterator(iter);
SetSecondLevelIterator(iter);
}
}
}
} // namespace
Iterator* NewTwoLevelIterator(Iterator* index_iter,
BlockFunction block_function, void* arg,
const ReadOptions& options,
const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
bool for_compaction) {
return new TwoLevelIterator(index_iter, block_function, arg, options,
soptions, internal_comparator, for_compaction);
Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
Iterator* first_level_iter) {
return new TwoLevelIterator(state, first_level_iter);
}
} // namespace rocksdb

@ -10,12 +10,25 @@
#pragma once
#include "rocksdb/iterator.h"
#include "rocksdb/env.h"
#include "table/iterator_wrapper.h"
namespace rocksdb {
struct ReadOptions;
class InternalKeyComparator;
struct TwoLevelIteratorState {
explicit TwoLevelIteratorState(bool prefix_enabled)
: prefix_enabled(prefix_enabled) {}
virtual ~TwoLevelIteratorState() {}
virtual Iterator* NewSecondaryIterator(const Slice& handle) = 0;
virtual bool PrefixMayMatch(const Slice& internal_key) = 0;
bool prefix_enabled;
};
// Return a new two level iterator. A two-level iterator contains an
// index iterator whose values point to a sequence of blocks where
// each block is itself a sequence of key,value pairs. The returned
@ -25,14 +38,7 @@ class InternalKeyComparator;
//
// Uses a supplied function to convert an index_iter value into
// an iterator over the contents of the corresponding block.
extern Iterator* NewTwoLevelIterator(
Iterator* index_iter,
Iterator* (*block_function)(
void* arg, const ReadOptions& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
const Slice& index_value, bool for_compaction),
void* arg, const ReadOptions& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
bool for_compaction = false);
extern Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
Iterator* first_level_iter);
} // namespace rocksdb

Loading…
Cancel
Save