Refactor BlockIter (#4121)

Summary:
BlockIter is getting crowded including details that specific only to either index or data blocks. The patch moves down such details to DataBlockIter and IndexBlockIter, both inheriting from BlockIter.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4121

Differential Revision: D8816832

Pulled By: maysamyabandeh

fbshipit-source-id: d492e74155c11d8a0c1c85cd7ee33d24c7456197
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 63904434eb
commit d4ad32d7bd
  1. 110
      table/block.cc
  2. 182
      table/block.h
  3. 214
      table/block_based_table_reader.cc
  4. 31
      table/block_based_table_reader.h
  5. 23
      table/block_test.cc
  6. 15
      table/meta_blocks.cc
  7. 24
      table/partitioned_filter_block.cc
  8. 7
      table/table_test.cc

@ -140,6 +140,31 @@ void BlockIter::Prev() {
} }
void BlockIter::Seek(const Slice& target) { void BlockIter::Seek(const Slice& target) {
Slice seek_key = target;
if (!key_includes_seq_) {
seek_key = ExtractUserKey(target);
}
PERF_TIMER_GUARD(block_seek_nanos);
if (data_ == nullptr) { // Not init yet
return;
}
uint32_t index = 0;
bool ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index);
if (!ok) {
return;
}
SeekToRestartPoint(index);
// Linear search (within restart block) for first key >= target
while (true) {
if (!ParseNextKey() || Compare(key_, seek_key) >= 0) {
return;
}
}
}
void IndexBlockIter::Seek(const Slice& target) {
Slice seek_key = target; Slice seek_key = target;
if (!key_includes_seq_) { if (!key_includes_seq_) {
seek_key = ExtractUserKey(target); seek_key = ExtractUserKey(target);
@ -329,7 +354,7 @@ bool BlockIter::BinarySeek(const Slice& target, uint32_t left, uint32_t right,
// Compare target key and the block key of the block of `block_index`. // Compare target key and the block key of the block of `block_index`.
// Return -1 if error. // Return -1 if error.
int BlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) { int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
uint32_t region_offset = GetRestartPoint(block_index); uint32_t region_offset = GetRestartPoint(block_index);
uint32_t shared, non_shared, value_length; uint32_t shared, non_shared, value_length;
const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_, const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_,
@ -344,9 +369,9 @@ int BlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
// Binary search in block_ids to find the first block // Binary search in block_ids to find the first block
// with a key >= target // with a key >= target
bool BlockIter::BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, bool IndexBlockIter::BinaryBlockIndexSeek(const Slice& target,
uint32_t left, uint32_t right, uint32_t* block_ids, uint32_t left,
uint32_t* index) { uint32_t right, uint32_t* index) {
assert(left <= right); assert(left <= right);
uint32_t left_bound = left; uint32_t left_bound = left;
@ -394,7 +419,7 @@ bool BlockIter::BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
} }
} }
bool BlockIter::PrefixSeek(const Slice& target, uint32_t* index) { bool IndexBlockIter::PrefixSeek(const Slice& target, uint32_t* index) {
assert(prefix_index_); assert(prefix_index_);
Slice seek_key = target; Slice seek_key = target;
if (!key_includes_seq_) { if (!key_includes_seq_) {
@ -445,9 +470,12 @@ Block::Block(BlockContents&& contents, SequenceNumber _global_seqno,
} }
} }
template <>
BlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp, BlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp,
BlockIter* iter, bool total_order_seek, BlockIter* iter, Statistics* /*stats*/,
Statistics* stats, bool key_includes_seq) { bool /*total_order_seek*/,
bool /*key_includes_seq*/,
BlockPrefixIndex* /*prefix_index*/) {
BlockIter* ret_iter; BlockIter* ret_iter;
if (iter != nullptr) { if (iter != nullptr) {
ret_iter = iter; ret_iter = iter;
@ -463,12 +491,37 @@ BlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp,
ret_iter->Invalidate(Status::OK()); ret_iter->Invalidate(Status::OK());
return ret_iter; return ret_iter;
} else { } else {
BlockPrefixIndex* prefix_index_ptr = const bool kKeyIncludesSeq = true;
total_order_seek ? nullptr : prefix_index_.get(); ret_iter->InitializeBase(cmp, ucmp, data_, restart_offset_, num_restarts_,
ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno_, kKeyIncludesSeq, cachable());
prefix_index_ptr, global_seqno_, }
read_amp_bitmap_.get(), key_includes_seq, cachable());
return ret_iter;
}
template <>
DataBlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp,
DataBlockIter* iter, Statistics* stats,
bool /*total_order_seek*/,
bool /*key_includes_seq*/,
BlockPrefixIndex* /*prefix_index*/) {
DataBlockIter* ret_iter;
if (iter != nullptr) {
ret_iter = iter;
} else {
ret_iter = new DataBlockIter;
}
if (size_ < 2 * sizeof(uint32_t)) {
ret_iter->Invalidate(Status::Corruption("bad block contents"));
return ret_iter;
}
if (num_restarts_ == 0) {
// Empty block.
ret_iter->Invalidate(Status::OK());
return ret_iter;
} else {
ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_,
global_seqno_, read_amp_bitmap_.get(), cachable());
if (read_amp_bitmap_) { if (read_amp_bitmap_) {
if (read_amp_bitmap_->GetStatistics() != stats) { if (read_amp_bitmap_->GetStatistics() != stats) {
// DB changed the Statistics pointer, we need to notify read_amp_bitmap_ // DB changed the Statistics pointer, we need to notify read_amp_bitmap_
@ -480,8 +533,34 @@ BlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp,
return ret_iter; return ret_iter;
} }
void Block::SetBlockPrefixIndex(BlockPrefixIndex* prefix_index) { template <>
prefix_index_.reset(prefix_index); IndexBlockIter* Block::NewIterator(const Comparator* cmp,
const Comparator* ucmp, IndexBlockIter* iter,
Statistics* /*stats*/, bool total_order_seek,
bool key_includes_seq,
BlockPrefixIndex* prefix_index) {
IndexBlockIter* ret_iter;
if (iter != nullptr) {
ret_iter = iter;
} else {
ret_iter = new IndexBlockIter;
}
if (size_ < 2 * sizeof(uint32_t)) {
ret_iter->Invalidate(Status::Corruption("bad block contents"));
return ret_iter;
}
if (num_restarts_ == 0) {
// Empty block.
ret_iter->Invalidate(Status::OK());
return ret_iter;
} else {
BlockPrefixIndex* prefix_index_ptr =
total_order_seek ? nullptr : prefix_index;
ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_,
prefix_index_ptr, key_includes_seq, cachable());
}
return ret_iter;
} }
size_t Block::ApproximateMemoryUsage() const { size_t Block::ApproximateMemoryUsage() const {
@ -491,9 +570,6 @@ size_t Block::ApproximateMemoryUsage() const {
#else #else
usage += sizeof(*this); usage += sizeof(*this);
#endif // ROCKSDB_MALLOC_USABLE_SIZE #endif // ROCKSDB_MALLOC_USABLE_SIZE
if (prefix_index_) {
usage += prefix_index_->ApproximateMemoryUsage();
}
if (read_amp_bitmap_) { if (read_amp_bitmap_) {
usage += read_amp_bitmap_->ApproximateMemoryUsage(); usage += read_amp_bitmap_->ApproximateMemoryUsage();
} }

@ -36,6 +36,8 @@ namespace rocksdb {
struct BlockContents; struct BlockContents;
class Comparator; class Comparator;
class BlockIter; class BlockIter;
class DataBlockIter;
class IndexBlockIter;
class BlockPrefixIndex; class BlockPrefixIndex;
// BlockReadAmpBitmap is a bitmap that map the rocksdb::Block data bytes to // BlockReadAmpBitmap is a bitmap that map the rocksdb::Block data bytes to
@ -156,29 +158,30 @@ class Block {
return contents_.compression_type; return contents_.compression_type;
} }
// If hash index lookup is enabled and `use_hash_index` is true. This block
// will do hash lookup for the key prefix.
//
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key.
//
// If comparator is InternalKeyComparator, user_comparator is its user // If comparator is InternalKeyComparator, user_comparator is its user
// comparator; they are equal otherwise. // comparator; they are equal otherwise.
// //
// If iter is null, return new Iterator // If iter is null, return new Iterator
// If iter is not null, update this one and return it as Iterator* // If iter is not null, update this one and return it as Iterator*
// //
// If total_order_seek is true, hash_index_ and prefix_index_ are ignored. // NewIterator<DataBlockIter>
// This option only applies for index block. For data block, hash_index_ // Same as above but also updates read_amp_bitmap_ if it is not nullptr.
// and prefix_index_ are null, so this option does not matter. //
BlockIter* NewIterator(const Comparator* comparator, // NewIterator<IndexBlockIter>
const Comparator* user_comparator, // If `prefix_index` is not nullptr this block will do hash lookup for the key
BlockIter* iter = nullptr, // prefix. If total_order_seek is true, prefix_index_ is ignored.
bool total_order_seek = true, //
Statistics* stats = nullptr, // NOTE: for the hash based lookup, if a key prefix doesn't match any key,
bool key_includes_seq = true); // the iterator will simply be set as "invalid", rather than returning
void SetBlockPrefixIndex(BlockPrefixIndex* prefix_index); // the key that is just pass the target key.
template <typename TBlockIter>
TBlockIter* NewIterator(const Comparator* comparator,
const Comparator* user_comparator,
TBlockIter* iter = nullptr,
Statistics* stats = nullptr,
bool total_order_seek = true,
bool key_includes_seq = true,
BlockPrefixIndex* prefix_index = nullptr);
// Report an approximation of how much memory has been used. // Report an approximation of how much memory has been used.
size_t ApproximateMemoryUsage() const; size_t ApproximateMemoryUsage() const;
@ -191,7 +194,6 @@ class Block {
size_t size_; // contents_.data.size() size_t size_; // contents_.data.size()
uint32_t restart_offset_; // Offset in data_ of restart array uint32_t restart_offset_; // Offset in data_ of restart array
uint32_t num_restarts_; uint32_t num_restarts_;
std::unique_ptr<BlockPrefixIndex> prefix_index_;
std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_; std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
// All keys in the block will have seqno = global_seqno_, regardless of // All keys in the block will have seqno = global_seqno_, regardless of
// the encoded value (kDisableGlobalSequenceNumber means disabled) // the encoded value (kDisableGlobalSequenceNumber means disabled)
@ -202,7 +204,7 @@ class Block {
void operator=(const Block&) = delete; void operator=(const Block&) = delete;
}; };
class BlockIter final : public InternalIterator { class BlockIter : public InternalIterator {
public: public:
// Object created using this constructor will behave like an iterator // Object created using this constructor will behave like an iterator
// against an empty block. The state after the creation: Valid()=false // against an empty block. The state after the creation: Valid()=false
@ -211,36 +213,30 @@ class BlockIter final : public InternalIterator {
: comparator_(nullptr), : comparator_(nullptr),
user_comparator_(nullptr), user_comparator_(nullptr),
data_(nullptr), data_(nullptr),
restarts_(0),
num_restarts_(0), num_restarts_(0),
current_(0),
restart_index_(0), restart_index_(0),
restarts_(0),
current_(0),
status_(Status::OK()), status_(Status::OK()),
prefix_index_(nullptr),
key_pinned_(false), key_pinned_(false),
block_contents_pinned_(false),
key_includes_seq_(true), key_includes_seq_(true),
global_seqno_(kDisableGlobalSequenceNumber), global_seqno_(kDisableGlobalSequenceNumber) {}
read_amp_bitmap_(nullptr),
last_bitmap_offset_(0),
block_contents_pinned_(false) {}
BlockIter(const Comparator* comparator, const Comparator* user_comparator, BlockIter(const Comparator* comparator, const Comparator* user_comparator,
const char* data, uint32_t restarts, uint32_t num_restarts, const char* data, uint32_t restarts, uint32_t num_restarts,
BlockPrefixIndex* prefix_index, SequenceNumber global_seqno, SequenceNumber global_seqno, bool key_includes_seq,
BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq,
bool block_contents_pinned) bool block_contents_pinned)
: BlockIter() { : BlockIter() {
Initialize(comparator, user_comparator, data, restarts, num_restarts, InitializeBase(comparator, user_comparator, data, restarts, num_restarts,
prefix_index, global_seqno, read_amp_bitmap, key_includes_seq, global_seqno, key_includes_seq, block_contents_pinned);
block_contents_pinned);
} }
void Initialize(const Comparator* comparator, void InitializeBase(const Comparator* comparator,
const Comparator* user_comparator, const char* data, const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts, uint32_t restarts, uint32_t num_restarts,
BlockPrefixIndex* prefix_index, SequenceNumber global_seqno, SequenceNumber global_seqno, bool key_includes_seq,
BlockReadAmpBitmap* read_amp_bitmap, bool key_includes_seq, bool block_contents_pinned) {
bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid assert(num_restarts > 0); // Ensure the param is valid
@ -251,10 +247,7 @@ class BlockIter final : public InternalIterator {
num_restarts_ = num_restarts; num_restarts_ = num_restarts;
current_ = restarts_; current_ = restarts_;
restart_index_ = num_restarts_; restart_index_ = num_restarts_;
prefix_index_ = prefix_index;
global_seqno_ = global_seqno; global_seqno_ = global_seqno;
read_amp_bitmap_ = read_amp_bitmap;
last_bitmap_offset_ = current_ + 1;
key_includes_seq_ = key_includes_seq; key_includes_seq_ = key_includes_seq;
block_contents_pinned_ = block_contents_pinned; block_contents_pinned_ = block_contents_pinned;
} }
@ -283,16 +276,10 @@ class BlockIter final : public InternalIterator {
virtual Status status() const override { return status_; } virtual Status status() const override { return status_; }
virtual Slice key() const override { virtual Slice key() const override {
assert(Valid()); assert(Valid());
return key_includes_seq_ ? key_.GetInternalKey() : key_.GetUserKey(); return key_.GetInternalKey();
} }
virtual Slice value() const override { virtual Slice value() const override {
assert(Valid()); assert(Valid());
if (read_amp_bitmap_ && current_ < restarts_ &&
current_ != last_bitmap_offset_) {
read_amp_bitmap_->Mark(current_ /* current entry offset */,
NextEntryOffset() - 1);
last_bitmap_offset_ = current_;
}
return value_; return value_;
} }
@ -309,7 +296,7 @@ class BlockIter final : public InternalIterator {
virtual void SeekToLast() override; virtual void SeekToLast() override;
#ifndef NDEBUG #ifndef NDEBUG
~BlockIter() { virtual ~BlockIter() {
// Assert that the BlockIter is never deleted while Pinning is Enabled. // Assert that the BlockIter is never deleted while Pinning is Enabled.
assert(!pinned_iters_mgr_ || assert(!pinned_iters_mgr_ ||
(pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled()));
@ -333,36 +320,29 @@ class BlockIter final : public InternalIterator {
return static_cast<uint32_t>(value_.data() - data_); return static_cast<uint32_t>(value_.data() - data_);
} }
private: protected:
// Note: The type could be changed to InternalKeyComparator but we see a weird // Note: The type could be changed to InternalKeyComparator but we see a weird
// performance drop by that. // performance drop by that.
const Comparator* comparator_; const Comparator* comparator_;
// Same as comparator_ if comparator_ is not InernalKeyComparator // Same as comparator_ if comparator_ is not InernalKeyComparator
const Comparator* user_comparator_; const Comparator* user_comparator_;
const char* data_; // underlying block contents const char* data_; // underlying block contents
uint32_t restarts_; // Offset of restart array (list of fixed32)
uint32_t num_restarts_; // Number of uint32_t entries in restart array uint32_t num_restarts_; // Number of uint32_t entries in restart array
uint32_t restart_index_; // Index of restart block in which current_ falls
uint32_t restarts_; // Offset of restart array (list of fixed32)
// current_ is offset in data_ of current entry. >= restarts_ if !Valid // current_ is offset in data_ of current entry. >= restarts_ if !Valid
uint32_t current_; uint32_t current_;
uint32_t restart_index_; // Index of restart block in which current_ falls
IterKey key_; IterKey key_;
Slice value_; Slice value_;
Status status_; Status status_;
BlockPrefixIndex* prefix_index_;
bool key_pinned_; bool key_pinned_;
// whether the block data is guaranteed to outlive this iterator
bool block_contents_pinned_;
// Key is in InternalKey format // Key is in InternalKey format
bool key_includes_seq_; bool key_includes_seq_;
SequenceNumber global_seqno_; SequenceNumber global_seqno_;
public:
// read-amp bitmap
BlockReadAmpBitmap* read_amp_bitmap_;
// last `current_` value we report to read-amp bitmp
mutable uint32_t last_bitmap_offset_;
// whether the block data is guaranteed to outlive this iterator
bool block_contents_pinned_;
struct CachedPrevEntry { struct CachedPrevEntry {
explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr,
size_t _key_offset, size_t _key_size, Slice _value) size_t _key_offset, size_t _key_size, Slice _value)
@ -387,6 +367,7 @@ class BlockIter final : public InternalIterator {
std::vector<CachedPrevEntry> prev_entries_; std::vector<CachedPrevEntry> prev_entries_;
int32_t prev_entries_idx_ = -1; int32_t prev_entries_idx_ = -1;
public:
inline int Compare(const Slice& a, const Slice& b) const { inline int Compare(const Slice& a, const Slice& b) const {
if (key_includes_seq_) { if (key_includes_seq_) {
return comparator_->Compare(a, b); return comparator_->Compare(a, b);
@ -430,15 +411,90 @@ class BlockIter final : public InternalIterator {
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
uint32_t* index); uint32_t* index);
};
int CompareBlockKey(uint32_t block_index, const Slice& target); class DataBlockIter final : public BlockIter {
public:
DataBlockIter()
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
DataBlockIter(const Comparator* comparator, const Comparator* user_comparator,
const char* data, uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned)
: DataBlockIter() {
Initialize(comparator, user_comparator, data, restarts, num_restarts,
global_seqno, read_amp_bitmap, block_contents_pinned);
}
void Initialize(const Comparator* comparator,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap,
bool block_contents_pinned) {
const bool kKeyIncludesSeq = true;
InitializeBase(comparator, user_comparator, data, restarts, num_restarts,
global_seqno, kKeyIncludesSeq, block_contents_pinned);
read_amp_bitmap_ = read_amp_bitmap;
last_bitmap_offset_ = current_ + 1;
}
virtual Slice value() const override {
assert(Valid());
if (read_amp_bitmap_ && current_ < restarts_ &&
current_ != last_bitmap_offset_) {
read_amp_bitmap_->Mark(current_ /* current entry offset */,
NextEntryOffset() - 1);
last_bitmap_offset_ = current_;
}
return value_;
}
private:
// read-amp bitmap
BlockReadAmpBitmap* read_amp_bitmap_;
// last `current_` value we report to read-amp bitmp
mutable uint32_t last_bitmap_offset_;
};
class IndexBlockIter final : public BlockIter {
public:
IndexBlockIter() : BlockIter(), prefix_index_(nullptr) {}
virtual Slice key() const override {
assert(Valid());
return key_includes_seq_ ? key_.GetInternalKey() : key_.GetUserKey();
}
IndexBlockIter(const Comparator* comparator,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
BlockPrefixIndex* prefix_index, bool key_includes_seq,
bool block_contents_pinned)
: IndexBlockIter() {
Initialize(comparator, user_comparator, data, restarts, num_restarts,
prefix_index, key_includes_seq, block_contents_pinned);
}
void Initialize(const Comparator* comparator,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
BlockPrefixIndex* prefix_index, bool key_includes_seq,
bool block_contents_pinned) {
InitializeBase(comparator, user_comparator, data, restarts, num_restarts,
kDisableGlobalSequenceNumber, key_includes_seq,
block_contents_pinned);
prefix_index_ = prefix_index;
}
virtual void Seek(const Slice& target) override;
private:
bool PrefixSeek(const Slice& target, uint32_t* index);
bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
uint32_t left, uint32_t right, uint32_t left, uint32_t right,
uint32_t* index); uint32_t* index);
int CompareBlockKey(uint32_t block_index, const Slice& target);
bool PrefixSeek(const Slice& target, uint32_t* index); BlockPrefixIndex* prefix_index_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -229,26 +229,27 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
} }
// return a two-level iterator: first level is on the partition index // return a two-level iterator: first level is on the partition index
virtual InternalIterator* NewIterator(BlockIter* /*iter*/ = nullptr, virtual InternalIterator* NewIterator(IndexBlockIter* /*iter*/ = nullptr,
bool /*dont_care*/ = true, bool /*dont_care*/ = true,
bool fill_cache = true) override { bool fill_cache = true) override {
Statistics* kNullStats = nullptr;
// Filters are already checked before seeking the index // Filters are already checked before seeking the index
if (!partition_map_.empty()) { if (!partition_map_.empty()) {
return NewTwoLevelIterator( return NewTwoLevelIterator(
new BlockBasedTable::PartitionedIndexIteratorState( new BlockBasedTable::PartitionedIndexIteratorState(
table_, &partition_map_, index_key_includes_seq_), table_, &partition_map_, index_key_includes_seq_),
index_block_->NewIterator(icomparator_, index_block_->NewIterator<IndexBlockIter>(
icomparator_->user_comparator(), nullptr, icomparator_, icomparator_->user_comparator(), nullptr,
true, nullptr, index_key_includes_seq_)); kNullStats, true, index_key_includes_seq_));
} else { } else {
auto ro = ReadOptions(); auto ro = ReadOptions();
ro.fill_cache = fill_cache; ro.fill_cache = fill_cache;
bool kIsIndex = true; bool kIsIndex = true;
return new BlockBasedTableIterator( return new BlockBasedTableIterator<IndexBlockIter>(
table_, ro, *icomparator_, table_, ro, *icomparator_,
index_block_->NewIterator(icomparator_, index_block_->NewIterator<IndexBlockIter>(
icomparator_->user_comparator(), nullptr, icomparator_, icomparator_->user_comparator(), nullptr,
true, nullptr, index_key_includes_seq_), kNullStats, true, index_key_includes_seq_),
false, true, /* prefix_extractor */ nullptr, kIsIndex, false, true, /* prefix_extractor */ nullptr, kIsIndex,
index_key_includes_seq_); index_key_includes_seq_);
} }
@ -261,10 +262,12 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
virtual void CacheDependencies(bool pin) override { virtual void CacheDependencies(bool pin) override {
// Before read partitions, prefetch them to avoid lots of IOs // Before read partitions, prefetch them to avoid lots of IOs
auto rep = table_->rep_; auto rep = table_->rep_;
BlockIter biter; IndexBlockIter biter;
BlockHandle handle; BlockHandle handle;
index_block_->NewIterator(icomparator_, icomparator_->user_comparator(), Statistics* kNullStats = nullptr;
&biter, true, nullptr, index_key_includes_seq_); index_block_->NewIterator<IndexBlockIter>(
icomparator_, icomparator_->user_comparator(), &biter, kNullStats, true,
index_key_includes_seq_);
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset // Read the first block offset
biter.SeekToFirst(); biter.SeekToFirst();
@ -415,12 +418,13 @@ class BinarySearchIndexReader : public IndexReader {
return s; return s;
} }
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, virtual InternalIterator* NewIterator(IndexBlockIter* iter = nullptr,
bool /*dont_care*/ = true, bool /*dont_care*/ = true,
bool /*dont_care*/ = true) override { bool /*dont_care*/ = true) override {
return index_block_->NewIterator(icomparator_, Statistics* kNullStats = nullptr;
icomparator_->user_comparator(), iter, return index_block_->NewIterator<IndexBlockIter>(
true, nullptr, index_key_includes_seq_); icomparator_, icomparator_->user_comparator(), iter, kNullStats, true,
index_key_includes_seq_);
} }
virtual size_t size() const override { return index_block_->size(); } virtual size_t size() const override { return index_block_->size(); }
@ -532,18 +536,19 @@ class HashIndexReader : public IndexReader {
prefixes_meta_contents.data, &prefix_index); prefixes_meta_contents.data, &prefix_index);
// TODO: log error // TODO: log error
if (s.ok()) { if (s.ok()) {
new_index_reader->index_block_->SetBlockPrefixIndex(prefix_index); new_index_reader->prefix_index_.reset(prefix_index);
} }
return Status::OK(); return Status::OK();
} }
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, virtual InternalIterator* NewIterator(IndexBlockIter* iter = nullptr,
bool total_order_seek = true, bool total_order_seek = true,
bool /*dont_care*/ = true) override { bool /*dont_care*/ = true) override {
return index_block_->NewIterator( Statistics* kNullStats = nullptr;
icomparator_, icomparator_->user_comparator(), iter, total_order_seek, return index_block_->NewIterator<IndexBlockIter>(
nullptr, index_key_includes_seq_); icomparator_, icomparator_->user_comparator(), iter, kNullStats,
total_order_seek, index_key_includes_seq_, prefix_index_.get());
} }
virtual size_t size() const override { return index_block_->size(); } virtual size_t size() const override { return index_block_->size(); }
@ -558,6 +563,9 @@ class HashIndexReader : public IndexReader {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef ROCKSDB_MALLOC_USABLE_SIZE
usage += malloc_usable_size((void*)this); usage += malloc_usable_size((void*)this);
#else #else
if (prefix_index_) {
usage += prefix_index_->ApproximateMemoryUsage();
}
usage += sizeof(*this); usage += sizeof(*this);
#endif // ROCKSDB_MALLOC_USABLE_SIZE #endif // ROCKSDB_MALLOC_USABLE_SIZE
return usage; return usage;
@ -577,6 +585,7 @@ class HashIndexReader : public IndexReader {
} }
std::unique_ptr<Block> index_block_; std::unique_ptr<Block> index_block_;
std::unique_ptr<BlockPrefixIndex> prefix_index_;
BlockContents prefixes_contents_; BlockContents prefixes_contents_;
const bool index_key_includes_seq_; const bool index_key_includes_seq_;
}; };
@ -1110,8 +1119,8 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep,
*meta_block = std::move(meta); *meta_block = std::move(meta);
// meta block uses bytewise comparator. // meta block uses bytewise comparator.
iter->reset(meta_block->get()->NewIterator(BytewiseComparator(), iter->reset(meta_block->get()->NewIterator<BlockIter>(BytewiseComparator(),
BytewiseComparator())); BytewiseComparator()));
return Status::OK(); return Status::OK();
} }
@ -1477,7 +1486,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
// differs from the one in mutable_cf_options and index type is HashBasedIndex // differs from the one in mutable_cf_options and index type is HashBasedIndex
InternalIterator* BlockBasedTable::NewIndexIterator( InternalIterator* BlockBasedTable::NewIndexIterator(
const ReadOptions& read_options, bool disable_prefix_seek, const ReadOptions& read_options, bool disable_prefix_seek,
BlockIter* input_iter, CachableEntry<IndexReader>* index_entry, IndexBlockIter* input_iter, CachableEntry<IndexReader>* index_entry,
GetContext* get_context) { GetContext* get_context) {
// index reader has already been pre-populated. // index reader has already been pre-populated.
if (rep_->index_reader) { if (rep_->index_reader) {
@ -1578,28 +1587,29 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
return iter; return iter;
} }
BlockIter* BlockBasedTable::NewDataBlockIterator( template <typename TBlockIter>
TBlockIter* BlockBasedTable::NewDataBlockIterator(
Rep* rep, const ReadOptions& ro, const Slice& index_value, Rep* rep, const ReadOptions& ro, const Slice& index_value,
BlockIter* input_iter, bool is_index, bool key_includes_seq, TBlockIter* input_iter, bool is_index, bool key_includes_seq,
GetContext* get_context, GetContext* get_context, FilePrefetchBuffer* prefetch_buffer) {
FilePrefetchBuffer* prefetch_buffer) {
BlockHandle handle; BlockHandle handle;
Slice input = index_value; Slice input = index_value;
// We intentionally allow extra stuff in index_value so that we // We intentionally allow extra stuff in index_value so that we
// can add more features in the future. // can add more features in the future.
Status s = handle.DecodeFrom(&input); Status s = handle.DecodeFrom(&input);
return NewDataBlockIterator(rep, ro, handle, input_iter, is_index, return NewDataBlockIterator<TBlockIter>(rep, ro, handle, input_iter, is_index,
key_includes_seq, get_context, s, key_includes_seq, get_context, s,
prefetch_buffer); prefetch_buffer);
} }
// Convert an index iterator value (i.e., an encoded BlockHandle) // Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block. // into an iterator over the contents of the corresponding block.
// If input_iter is null, new a iterator // If input_iter is null, new a iterator
// If input_iter is not null, update this iter and return it // If input_iter is not null, update this iter and return it
BlockIter* BlockBasedTable::NewDataBlockIterator( template <typename TBlockIter>
TBlockIter* BlockBasedTable::NewDataBlockIterator(
Rep* rep, const ReadOptions& ro, const BlockHandle& handle, Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
BlockIter* input_iter, bool is_index, bool key_includes_seq, TBlockIter* input_iter, bool is_index, bool key_includes_seq,
GetContext* get_context, Status s, FilePrefetchBuffer* prefetch_buffer) { GetContext* get_context, Status s, FilePrefetchBuffer* prefetch_buffer) {
PERF_TIMER_GUARD(new_table_block_iter_nanos); PERF_TIMER_GUARD(new_table_block_iter_nanos);
@ -1616,11 +1626,11 @@ BlockIter* BlockBasedTable::NewDataBlockIterator(
get_context); get_context);
} }
BlockIter* iter; TBlockIter* iter;
if (input_iter != nullptr) { if (input_iter != nullptr) {
iter = input_iter; iter = input_iter;
} else { } else {
iter = new BlockIter; iter = new TBlockIter;
} }
// Didn't get any data from block caches. // Didn't get any data from block caches.
if (s.ok() && block.value == nullptr) { if (s.ok() && block.value == nullptr) {
@ -1647,9 +1657,10 @@ BlockIter* BlockBasedTable::NewDataBlockIterator(
if (s.ok()) { if (s.ok()) {
assert(block.value != nullptr); assert(block.value != nullptr);
iter = block.value->NewIterator( const bool kTotalOrderSeek = true;
iter = block.value->NewIterator<TBlockIter>(
&rep->internal_comparator, rep->internal_comparator.user_comparator(), &rep->internal_comparator, rep->internal_comparator.user_comparator(),
iter, true, rep->ioptions.statistics, key_includes_seq); iter, rep->ioptions.statistics, kTotalOrderSeek, key_includes_seq);
if (block.cache_handle != nullptr) { if (block.cache_handle != nullptr) {
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
block.cache_handle); block.cache_handle);
@ -1769,7 +1780,9 @@ BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
block_map_(block_map), block_map_(block_map),
index_key_includes_seq_(index_key_includes_seq) {} index_key_includes_seq_(index_key_includes_seq) {}
const size_t BlockBasedTableIterator::kMaxReadaheadSize = 256 * 1024; template <class TBlockIter>
const size_t BlockBasedTableIterator<TBlockIter>::kMaxReadaheadSize =
256 * 1024;
InternalIterator* InternalIterator*
BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
@ -1790,9 +1803,10 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
assert(block_cache); assert(block_cache);
RecordTick(rep->ioptions.statistics, BLOCK_CACHE_BYTES_READ, RecordTick(rep->ioptions.statistics, BLOCK_CACHE_BYTES_READ,
block_cache->GetUsage(block->second.cache_handle)); block_cache->GetUsage(block->second.cache_handle));
return block->second.value->NewIterator( Statistics* kNullStats = nullptr;
return block->second.value->NewIterator<IndexBlockIter>(
&rep->internal_comparator, rep->internal_comparator.user_comparator(), &rep->internal_comparator, rep->internal_comparator.user_comparator(),
nullptr, true, rep->ioptions.statistics, index_key_includes_seq_); nullptr, kNullStats, true, index_key_includes_seq_);
} }
// Create an empty iterator // Create an empty iterator
return new BlockIter(); return new BlockIter();
@ -1928,7 +1942,8 @@ bool BlockBasedTable::PrefixMayMatch(
return may_match; return may_match;
} }
void BlockBasedTableIterator::Seek(const Slice& target) { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::Seek(const Slice& target) {
if (!CheckPrefixMayMatch(target)) { if (!CheckPrefixMayMatch(target)) {
ResetDataIter(); ResetDataIter();
return; return;
@ -1945,18 +1960,19 @@ void BlockBasedTableIterator::Seek(const Slice& target) {
InitDataBlock(); InitDataBlock();
data_block_iter_.Seek(target); block_iter_.Seek(target);
FindKeyForward(); FindKeyForward();
assert(!data_block_iter_.Valid() || assert(
(key_includes_seq_ && !block_iter_.Valid() ||
icomp_.Compare(target, data_block_iter_.key()) <= 0) || (key_includes_seq_ && icomp_.Compare(target, block_iter_.key()) <= 0) ||
(!key_includes_seq_ && (!key_includes_seq_ &&
icomp_.user_comparator()->Compare(ExtractUserKey(target), icomp_.user_comparator()->Compare(ExtractUserKey(target),
data_block_iter_.key()) <= 0)); block_iter_.key()) <= 0));
} }
void BlockBasedTableIterator::SeekForPrev(const Slice& target) { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::SeekForPrev(const Slice& target) {
if (!CheckPrefixMayMatch(target)) { if (!CheckPrefixMayMatch(target)) {
ResetDataIter(); ResetDataIter();
return; return;
@ -1990,14 +2006,15 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) {
InitDataBlock(); InitDataBlock();
data_block_iter_.SeekForPrev(target); block_iter_.SeekForPrev(target);
FindKeyBackward(); FindKeyBackward();
assert(!data_block_iter_.Valid() || assert(!block_iter_.Valid() ||
icomp_.Compare(target, data_block_iter_.key()) >= 0); icomp_.Compare(target, block_iter_.key()) >= 0);
} }
void BlockBasedTableIterator::SeekToFirst() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::SeekToFirst() {
SavePrevIndexValue(); SavePrevIndexValue();
index_iter_->SeekToFirst(); index_iter_->SeekToFirst();
if (!index_iter_->Valid()) { if (!index_iter_->Valid()) {
@ -2005,11 +2022,12 @@ void BlockBasedTableIterator::SeekToFirst() {
return; return;
} }
InitDataBlock(); InitDataBlock();
data_block_iter_.SeekToFirst(); block_iter_.SeekToFirst();
FindKeyForward(); FindKeyForward();
} }
void BlockBasedTableIterator::SeekToLast() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::SeekToLast() {
SavePrevIndexValue(); SavePrevIndexValue();
index_iter_->SeekToLast(); index_iter_->SeekToLast();
if (!index_iter_->Valid()) { if (!index_iter_->Valid()) {
@ -2017,29 +2035,32 @@ void BlockBasedTableIterator::SeekToLast() {
return; return;
} }
InitDataBlock(); InitDataBlock();
data_block_iter_.SeekToLast(); block_iter_.SeekToLast();
FindKeyBackward(); FindKeyBackward();
} }
void BlockBasedTableIterator::Next() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::Next() {
assert(block_iter_points_to_real_block_); assert(block_iter_points_to_real_block_);
data_block_iter_.Next(); block_iter_.Next();
FindKeyForward(); FindKeyForward();
} }
void BlockBasedTableIterator::Prev() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::Prev() {
assert(block_iter_points_to_real_block_); assert(block_iter_points_to_real_block_);
data_block_iter_.Prev(); block_iter_.Prev();
FindKeyBackward(); FindKeyBackward();
} }
void BlockBasedTableIterator::InitDataBlock() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::InitDataBlock() {
BlockHandle data_block_handle; BlockHandle data_block_handle;
Slice handle_slice = index_iter_->value(); Slice handle_slice = index_iter_->value();
if (!block_iter_points_to_real_block_ || if (!block_iter_points_to_real_block_ ||
handle_slice.compare(prev_index_value_) != 0 || handle_slice.compare(prev_index_value_) != 0 ||
// if previous attempt of reading the block missed cache, try again // if previous attempt of reading the block missed cache, try again
data_block_iter_.status().IsIncomplete()) { block_iter_.status().IsIncomplete()) {
if (block_iter_points_to_real_block_) { if (block_iter_points_to_real_block_) {
ResetDataIter(); ResetDataIter();
} }
@ -2075,21 +2096,21 @@ void BlockBasedTableIterator::InitDataBlock() {
} }
} }
BlockBasedTable::NewDataBlockIterator(rep, read_options_, data_block_handle, BlockBasedTable::NewDataBlockIterator<TBlockIter>(
&data_block_iter_, is_index_, rep, read_options_, data_block_handle, &block_iter_, is_index_,
key_includes_seq_, key_includes_seq_,
/* get_context */ nullptr, s, /* get_context */ nullptr, s, prefetch_buffer_.get());
prefetch_buffer_.get());
block_iter_points_to_real_block_ = true; block_iter_points_to_real_block_ = true;
} }
} }
void BlockBasedTableIterator::FindKeyForward() { template <class TBlockIter>
void BlockBasedTableIterator<TBlockIter>::FindKeyForward() {
is_out_of_bound_ = false; is_out_of_bound_ = false;
// TODO the while loop inherits from two-level-iterator. We don't know // TODO the while loop inherits from two-level-iterator. We don't know
// whether a block can be empty so it can be replaced by an "if". // whether a block can be empty so it can be replaced by an "if".
while (!data_block_iter_.Valid()) { while (!block_iter_.Valid()) {
if (!data_block_iter_.status().ok()) { if (!block_iter_.status().ok()) {
return; return;
} }
ResetDataIter(); ResetDataIter();
@ -2101,7 +2122,7 @@ void BlockBasedTableIterator::FindKeyForward() {
if (index_iter_->Valid()) { if (index_iter_->Valid()) {
InitDataBlock(); InitDataBlock();
data_block_iter_.SeekToFirst(); block_iter_.SeekToFirst();
} else { } else {
return; return;
} }
@ -2110,8 +2131,8 @@ void BlockBasedTableIterator::FindKeyForward() {
// Check upper bound on the current key // Check upper bound on the current key
bool reached_upper_bound = bool reached_upper_bound =
(read_options_.iterate_upper_bound != nullptr && (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && data_block_iter_.Valid() && block_iter_points_to_real_block_ && block_iter_.Valid() &&
icomp_.user_comparator()->Compare(ExtractUserKey(data_block_iter_.key()), icomp_.user_comparator()->Compare(ExtractUserKey(block_iter_.key()),
*read_options_.iterate_upper_bound) >= *read_options_.iterate_upper_bound) >=
0); 0);
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
@ -2123,9 +2144,10 @@ void BlockBasedTableIterator::FindKeyForward() {
} }
} }
void BlockBasedTableIterator::FindKeyBackward() { template <class TBlockIter>
while (!data_block_iter_.Valid()) { void BlockBasedTableIterator<TBlockIter>::FindKeyBackward() {
if (!data_block_iter_.status().ok()) { while (!block_iter_.Valid()) {
if (!block_iter_.status().ok()) {
return; return;
} }
@ -2134,7 +2156,7 @@ void BlockBasedTableIterator::FindKeyBackward() {
if (index_iter_->Valid()) { if (index_iter_->Valid()) {
InitDataBlock(); InitDataBlock();
data_block_iter_.SeekToLast(); block_iter_.SeekToLast();
} else { } else {
return; return;
} }
@ -2151,7 +2173,7 @@ InternalIterator* BlockBasedTable::NewIterator(
PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor); PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
const bool kIsNotIndex = false; const bool kIsNotIndex = false;
if (arena == nullptr) { if (arena == nullptr) {
return new BlockBasedTableIterator( return new BlockBasedTableIterator<DataBlockIter>(
this, read_options, rep_->internal_comparator, this, read_options, rep_->internal_comparator,
NewIndexIterator( NewIndexIterator(
read_options, read_options,
@ -2162,8 +2184,9 @@ InternalIterator* BlockBasedTable::NewIterator(
need_upper_bound_check, prefix_extractor, kIsNotIndex, need_upper_bound_check, prefix_extractor, kIsNotIndex,
true /*key_includes_seq*/, for_compaction); true /*key_includes_seq*/, for_compaction);
} else { } else {
auto* mem = arena->AllocateAligned(sizeof(BlockBasedTableIterator)); auto* mem =
return new (mem) BlockBasedTableIterator( arena->AllocateAligned(sizeof(BlockBasedTableIterator<DataBlockIter>));
return new (mem) BlockBasedTableIterator<DataBlockIter>(
this, read_options, rep_->internal_comparator, this, read_options, rep_->internal_comparator,
NewIndexIterator(read_options, need_upper_bound_check), NewIndexIterator(read_options, need_upper_bound_check),
!skip_filters && !read_options.total_order_seek && !skip_filters && !read_options.total_order_seek &&
@ -2188,10 +2211,9 @@ InternalIterator* BlockBasedTable::NewRangeTombstoneIterator(
Cache* block_cache = rep_->table_options.block_cache.get(); Cache* block_cache = rep_->table_options.block_cache.get();
assert(block_cache != nullptr); assert(block_cache != nullptr);
if (block_cache->Ref(rep_->range_del_entry.cache_handle)) { if (block_cache->Ref(rep_->range_del_entry.cache_handle)) {
auto iter = rep_->range_del_entry.value->NewIterator( auto iter = rep_->range_del_entry.value->NewIterator<BlockIter>(
&rep_->internal_comparator, &rep_->internal_comparator,
rep_->internal_comparator.user_comparator(), nullptr /* iter */, rep_->internal_comparator.user_comparator());
true /* total_order_seek */, rep_->ioptions.statistics);
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
rep_->range_del_entry.cache_handle); rep_->range_del_entry.cache_handle);
return iter; return iter;
@ -2201,7 +2223,7 @@ InternalIterator* BlockBasedTable::NewRangeTombstoneIterator(
rep_->range_del_handle.EncodeTo(&str); rep_->range_del_handle.EncodeTo(&str);
// The meta-block exists but isn't in uncompressed block cache (maybe // The meta-block exists but isn't in uncompressed block cache (maybe
// because it is disabled), so go through the full lookup process. // because it is disabled), so go through the full lookup process.
return NewDataBlockIterator(rep_, read_options, Slice(str)); return NewDataBlockIterator<BlockIter>(rep_, read_options, Slice(str));
} }
bool BlockBasedTable::FullFilterKeyMayMatch( bool BlockBasedTable::FullFilterKeyMayMatch(
@ -2253,7 +2275,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
prefix_extractor)) { prefix_extractor)) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
} else { } else {
BlockIter iiter_on_stack; IndexBlockIter iiter_on_stack;
// if prefix_extractor found in block differs from options, disable // if prefix_extractor found in block differs from options, disable
// BlockPrefixIndex. Only do this check when index_type is kHashSearch. // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
bool need_upper_bound_check = false; bool need_upper_bound_check = false;
@ -2288,9 +2310,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
break; break;
} else { } else {
BlockIter biter; DataBlockIter biter;
NewDataBlockIterator(rep_, read_options, iiter->value(), &biter, false, NewDataBlockIterator<DataBlockIter>(
true /* key_includes_seq */, get_context); rep_, read_options, iiter->value(), &biter, false,
true /* key_includes_seq */, get_context);
if (read_options.read_tier == kBlockCacheTier && if (read_options.read_tier == kBlockCacheTier &&
biter.status().IsIncomplete()) { biter.status().IsIncomplete()) {
@ -2352,7 +2375,7 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
return Status::InvalidArgument(*begin, *end); return Status::InvalidArgument(*begin, *end);
} }
BlockIter iiter_on_stack; IndexBlockIter iiter_on_stack;
auto iiter = NewIndexIterator(ReadOptions(), false, &iiter_on_stack); auto iiter = NewIndexIterator(ReadOptions(), false, &iiter_on_stack);
std::unique_ptr<InternalIterator> iiter_unique_ptr; std::unique_ptr<InternalIterator> iiter_unique_ptr;
if (iiter != &iiter_on_stack) { if (iiter != &iiter_on_stack) {
@ -2386,8 +2409,9 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
} }
// Load the block specified by the block_handle into the block cache // Load the block specified by the block_handle into the block cache
BlockIter biter; DataBlockIter biter;
NewDataBlockIterator(rep_, ReadOptions(), block_handle, &biter); NewDataBlockIterator<DataBlockIter>(rep_, ReadOptions(), block_handle,
&biter);
if (!biter.status().ok()) { if (!biter.status().ok()) {
// there was an unexpected error while pre-fetching // there was an unexpected error while pre-fetching
@ -2413,7 +2437,7 @@ Status BlockBasedTable::VerifyChecksum() {
return s; return s;
} }
// Check Data blocks // Check Data blocks
BlockIter iiter_on_stack; IndexBlockIter iiter_on_stack;
InternalIterator* iiter = InternalIterator* iiter =
NewIndexIterator(ReadOptions(), false, &iiter_on_stack); NewIndexIterator(ReadOptions(), false, &iiter_on_stack);
std::unique_ptr<InternalIterator> iiter_unique_ptr; std::unique_ptr<InternalIterator> iiter_unique_ptr;
@ -2645,8 +2669,8 @@ Status BlockBasedTable::GetKVPairsFromDataBlocks(
} }
std::unique_ptr<InternalIterator> datablock_iter; std::unique_ptr<InternalIterator> datablock_iter;
datablock_iter.reset( datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
NewDataBlockIterator(rep_, ReadOptions(), blockhandles_iter->value())); rep_, ReadOptions(), blockhandles_iter->value()));
s = datablock_iter->status(); s = datablock_iter->status();
if (!s.ok()) { if (!s.ok()) {
@ -2927,8 +2951,8 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
out_file->Append("--------------------------------------\n"); out_file->Append("--------------------------------------\n");
std::unique_ptr<InternalIterator> datablock_iter; std::unique_ptr<InternalIterator> datablock_iter;
datablock_iter.reset( datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
NewDataBlockIterator(rep_, ReadOptions(), blockhandles_iter->value())); rep_, ReadOptions(), blockhandles_iter->value()));
s = datablock_iter->status(); s = datablock_iter->status();
if (!s.ok()) { if (!s.ok()) {

@ -175,7 +175,7 @@ class BlockBasedTable : public TableReader {
// to // to
// a different object then iter and the callee has the ownership of the // a different object then iter and the callee has the ownership of the
// returned object. // returned object.
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr, virtual InternalIterator* NewIterator(IndexBlockIter* iter = nullptr,
bool total_order_seek = true, bool total_order_seek = true,
bool fill_cache = true) = 0; bool fill_cache = true) = 0;
@ -217,14 +217,16 @@ class BlockBasedTable : public TableReader {
Rep* get_rep() { return rep_; } Rep* get_rep() { return rep_; }
// input_iter: if it is not null, update this one and return it as Iterator // input_iter: if it is not null, update this one and return it as Iterator
static BlockIter* NewDataBlockIterator( template <typename TBlockIter>
static TBlockIter* NewDataBlockIterator(
Rep* rep, const ReadOptions& ro, const Slice& index_value, Rep* rep, const ReadOptions& ro, const Slice& index_value,
BlockIter* input_iter = nullptr, bool is_index = false, TBlockIter* input_iter = nullptr, bool is_index = false,
bool key_includes_seq = true, GetContext* get_context = nullptr, bool key_includes_seq = true, GetContext* get_context = nullptr,
FilePrefetchBuffer* prefetch_buffer = nullptr); FilePrefetchBuffer* prefetch_buffer = nullptr);
static BlockIter* NewDataBlockIterator( template <typename TBlockIter>
static TBlockIter* NewDataBlockIterator(
Rep* rep, const ReadOptions& ro, const BlockHandle& block_hanlde, Rep* rep, const ReadOptions& ro, const BlockHandle& block_hanlde,
BlockIter* input_iter = nullptr, bool is_index = false, TBlockIter* input_iter = nullptr, bool is_index = false,
bool key_includes_seq = true, GetContext* get_context = nullptr, bool key_includes_seq = true, GetContext* get_context = nullptr,
Status s = Status(), FilePrefetchBuffer* prefetch_buffer = nullptr); Status s = Status(), FilePrefetchBuffer* prefetch_buffer = nullptr);
@ -281,7 +283,7 @@ class BlockBasedTable : public TableReader {
// kBlockCacheTier // kBlockCacheTier
InternalIterator* NewIndexIterator( InternalIterator* NewIndexIterator(
const ReadOptions& read_options, bool need_upper_bound_check = false, const ReadOptions& read_options, bool need_upper_bound_check = false,
BlockIter* input_iter = nullptr, IndexBlockIter* input_iter = nullptr,
CachableEntry<IndexReader>* index_entry = nullptr, CachableEntry<IndexReader>* index_entry = nullptr,
GetContext* get_context = nullptr); GetContext* get_context = nullptr);
@ -516,6 +518,7 @@ struct BlockBasedTable::Rep {
const bool immortal_table; const bool immortal_table;
}; };
template <class TBlockIter>
class BlockBasedTableIterator : public InternalIterator { class BlockBasedTableIterator : public InternalIterator {
public: public:
BlockBasedTableIterator(BlockBasedTable* table, BlockBasedTableIterator(BlockBasedTable* table,
@ -549,21 +552,21 @@ class BlockBasedTableIterator : public InternalIterator {
void Prev() override; void Prev() override;
bool Valid() const override { bool Valid() const override {
return !is_out_of_bound_ && block_iter_points_to_real_block_ && return !is_out_of_bound_ && block_iter_points_to_real_block_ &&
data_block_iter_.Valid(); block_iter_.Valid();
} }
Slice key() const override { Slice key() const override {
assert(Valid()); assert(Valid());
return data_block_iter_.key(); return block_iter_.key();
} }
Slice value() const override { Slice value() const override {
assert(Valid()); assert(Valid());
return data_block_iter_.value(); return block_iter_.value();
} }
Status status() const override { Status status() const override {
if (!index_iter_->status().ok()) { if (!index_iter_->status().ok()) {
return index_iter_->status(); return index_iter_->status();
} else if (block_iter_points_to_real_block_) { } else if (block_iter_points_to_real_block_) {
return data_block_iter_.status(); return block_iter_.status();
} else { } else {
return Status::OK(); return Status::OK();
} }
@ -576,7 +579,7 @@ class BlockBasedTableIterator : public InternalIterator {
} }
bool IsKeyPinned() const override { bool IsKeyPinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
block_iter_points_to_real_block_ && data_block_iter_.IsKeyPinned(); block_iter_points_to_real_block_ && block_iter_.IsKeyPinned();
} }
bool IsValuePinned() const override { bool IsValuePinned() const override {
// BlockIter::IsValuePinned() is always true. No need to check // BlockIter::IsValuePinned() is always true. No need to check
@ -601,9 +604,9 @@ class BlockBasedTableIterator : public InternalIterator {
void ResetDataIter() { void ResetDataIter() {
if (block_iter_points_to_real_block_) { if (block_iter_points_to_real_block_) {
if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) { if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) {
data_block_iter_.DelegateCleanupsTo(pinned_iters_mgr_); block_iter_.DelegateCleanupsTo(pinned_iters_mgr_);
} }
data_block_iter_.Invalidate(Status::OK()); block_iter_.Invalidate(Status::OK());
block_iter_points_to_real_block_ = false; block_iter_points_to_real_block_ = false;
} }
} }
@ -627,7 +630,7 @@ class BlockBasedTableIterator : public InternalIterator {
const InternalKeyComparator& icomp_; const InternalKeyComparator& icomp_;
InternalIterator* index_iter_; InternalIterator* index_iter_;
PinnedIteratorsManager* pinned_iters_mgr_; PinnedIteratorsManager* pinned_iters_mgr_;
BlockIter data_block_iter_; TBlockIter block_iter_;
bool block_iter_points_to_real_block_; bool block_iter_points_to_real_block_;
bool is_out_of_bound_ = false; bool is_out_of_bound_ = false;
bool check_filter_; bool check_filter_;

@ -100,7 +100,7 @@ TEST_F(BlockTest, SimpleTest) {
// read contents of block sequentially // read contents of block sequentially
int count = 0; int count = 0;
InternalIterator *iter = InternalIterator *iter =
reader.NewIterator(options.comparator, options.comparator); reader.NewIterator<BlockIter>(options.comparator, options.comparator);
for (iter->SeekToFirst();iter->Valid(); count++, iter->Next()) { for (iter->SeekToFirst();iter->Valid(); count++, iter->Next()) {
// read kv from block // read kv from block
@ -114,7 +114,7 @@ TEST_F(BlockTest, SimpleTest) {
delete iter; delete iter;
// read block contents randomly // read block contents randomly
iter = reader.NewIterator(options.comparator, options.comparator); iter = reader.NewIterator<BlockIter>(options.comparator, options.comparator);
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
// find a random key in the lookaside array // find a random key in the lookaside array
@ -163,8 +163,8 @@ void CheckBlockContents(BlockContents contents, const int max_key,
std::unique_ptr<const SliceTransform> prefix_extractor( std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(prefix_size)); NewFixedPrefixTransform(prefix_size));
std::unique_ptr<InternalIterator> regular_iter( std::unique_ptr<InternalIterator> regular_iter(reader2.NewIterator<BlockIter>(
reader2.NewIterator(BytewiseComparator(), BytewiseComparator())); BytewiseComparator(), BytewiseComparator()));
// Seek existent keys // Seek existent keys
for (size_t i = 0; i < keys.size(); i++) { for (size_t i = 0; i < keys.size(); i++) {
@ -389,8 +389,9 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
// read contents of block sequentially // read contents of block sequentially
size_t read_bytes = 0; size_t read_bytes = 0;
BlockIter *iter = static_cast<BlockIter *>(reader.NewIterator( DataBlockIter *iter =
options.comparator, options.comparator, nullptr, true, stats.get())); static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
options.comparator, options.comparator, nullptr, stats.get()));
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
iter->value(); iter->value();
read_bytes += iter->TEST_CurrentEntrySize(); read_bytes += iter->TEST_CurrentEntrySize();
@ -422,8 +423,9 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
kBytesPerBit, stats.get()); kBytesPerBit, stats.get());
size_t read_bytes = 0; size_t read_bytes = 0;
BlockIter *iter = static_cast<BlockIter *>(reader.NewIterator( DataBlockIter *iter =
options.comparator, options.comparator, nullptr, true, stats.get())); static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
options.comparator, options.comparator, nullptr, stats.get()));
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
Slice k(keys[i]); Slice k(keys[i]);
@ -458,8 +460,9 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
kBytesPerBit, stats.get()); kBytesPerBit, stats.get());
size_t read_bytes = 0; size_t read_bytes = 0;
BlockIter *iter = static_cast<BlockIter *>(reader.NewIterator( DataBlockIter *iter =
options.comparator, options.comparator, nullptr, true, stats.get())); static_cast<DataBlockIter *>(reader.NewIterator<DataBlockIter>(
options.comparator, options.comparator, nullptr, stats.get()));
std::unordered_set<int> read_keys; std::unordered_set<int> read_keys;
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);

@ -204,8 +204,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
Block properties_block(std::move(block_contents), Block properties_block(std::move(block_contents),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
BlockIter iter; BlockIter iter;
properties_block.NewIterator(BytewiseComparator(), BytewiseComparator(), properties_block.NewIterator<BlockIter>(BytewiseComparator(),
&iter); BytewiseComparator(), &iter);
auto new_table_properties = new TableProperties(); auto new_table_properties = new TableProperties();
// All pre-defined properties of type uint64_t // All pre-defined properties of type uint64_t
@ -335,7 +335,8 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
Block metaindex_block(std::move(metaindex_contents), Block metaindex_block(std::move(metaindex_contents),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
std::unique_ptr<InternalIterator> meta_iter( std::unique_ptr<InternalIterator> meta_iter(
metaindex_block.NewIterator(BytewiseComparator(), BytewiseComparator())); metaindex_block.NewIterator<BlockIter>(BytewiseComparator(),
BytewiseComparator()));
// -- Read property block // -- Read property block
bool found_properties_block = true; bool found_properties_block = true;
@ -404,8 +405,8 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
std::unique_ptr<InternalIterator> meta_iter; std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset( meta_iter.reset(metaindex_block.NewIterator<BlockIter>(BytewiseComparator(),
metaindex_block.NewIterator(BytewiseComparator(), BytewiseComparator())); BytewiseComparator()));
return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
} }
@ -451,8 +452,8 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
std::unique_ptr<InternalIterator> meta_iter; std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset( meta_iter.reset(metaindex_block.NewIterator<BlockIter>(BytewiseComparator(),
metaindex_block.NewIterator(BytewiseComparator(), BytewiseComparator())); BytewiseComparator()));
BlockHandle block_handle; BlockHandle block_handle;
status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle); status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);

@ -129,10 +129,12 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
return; return;
} }
char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length];
BlockIter biter; IndexBlockIter biter;
BlockHandle handle; BlockHandle handle;
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), Statistics* kNullStats = nullptr;
&biter, true, nullptr, index_key_includes_seq_); idx_on_fltr_blk_->NewIterator<IndexBlockIter>(
&comparator_, comparator_.user_comparator(), &biter, kNullStats, true,
index_key_includes_seq_);
biter.SeekToFirst(); biter.SeekToFirst();
for (; biter.Valid(); biter.Next()) { for (; biter.Valid(); biter.Next()) {
auto input = biter.value(); auto input = biter.value();
@ -225,9 +227,11 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
Slice PartitionedFilterBlockReader::GetFilterPartitionHandle( Slice PartitionedFilterBlockReader::GetFilterPartitionHandle(
const Slice& entry) { const Slice& entry) {
BlockIter iter; IndexBlockIter iter;
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), Statistics* kNullStats = nullptr;
&iter, true, nullptr, index_key_includes_seq_); idx_on_fltr_blk_->NewIterator<IndexBlockIter>(
&comparator_, comparator_.user_comparator(), &iter, kNullStats, true,
index_key_includes_seq_);
iter.Seek(entry); iter.Seek(entry);
if (UNLIKELY(!iter.Valid())) { if (UNLIKELY(!iter.Valid())) {
return Slice(); return Slice();
@ -294,10 +298,12 @@ void PartitionedFilterBlockReader::CacheDependencies(
bool pin, const SliceTransform* prefix_extractor) { bool pin, const SliceTransform* prefix_extractor) {
// Before read partitions, prefetch them to avoid lots of IOs // Before read partitions, prefetch them to avoid lots of IOs
auto rep = table_->rep_; auto rep = table_->rep_;
BlockIter biter; IndexBlockIter biter;
BlockHandle handle; BlockHandle handle;
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), Statistics* kNullStats = nullptr;
&biter, true, nullptr, index_key_includes_seq_); idx_on_fltr_blk_->NewIterator<IndexBlockIter>(
&comparator_, comparator_.user_comparator(), &biter, kNullStats, true,
index_key_includes_seq_);
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset // Read the first block offset
biter.SeekToFirst(); biter.SeekToFirst();

@ -238,7 +238,7 @@ class BlockConstructor: public Constructor {
} }
virtual InternalIterator* NewIterator( virtual InternalIterator* NewIterator(
const SliceTransform* /*prefix_extractor*/) const override { const SliceTransform* /*prefix_extractor*/) const override {
return block_->NewIterator(comparator_, comparator_); return block_->NewIterator<BlockIter>(comparator_, comparator_);
} }
private: private:
@ -3473,8 +3473,9 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
Block metaindex_block(std::move(metaindex_contents), Block metaindex_block(std::move(metaindex_contents),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewIterator( std::unique_ptr<InternalIterator> meta_iter(
BytewiseComparator(), BytewiseComparator())); metaindex_block.NewIterator<BlockIter>(BytewiseComparator(),
BytewiseComparator()));
bool found_properties_block = true; bool found_properties_block = true;
ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block)); ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
ASSERT_TRUE(found_properties_block); ASSERT_TRUE(found_properties_block);

Loading…
Cancel
Save