Separate internal and user key comparators in `BlockIter` (#6944)

Summary:
Replace `BlockIter::comparator_` and `IndexBlockIter::user_comparator_wrapper_` with a concrete `UserComparatorWrapper` and `InternalKeyComparator`. The motivation for this change was the inconvenience of not knowing the concrete type of `BlockIter::comparator_`, which prevented calling specialized internal key comparison functions to optimize comparison of keys with global seqno applied.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/6944

Test Plan:
benchmark setup -- single file DBs, in-memory, no compression. "normal_db"
created by regular flush; "ingestion_db" created by ingesting a file. Both
DBs have same contents.

```
$ TEST_TMPDIR=/dev/shm/normal_db/ ./db_bench -benchmarks=fillrandom,compact -write_buffer_size=10485760000 -disable_auto_compactions=true -compression_type=none -num=1000000
$ ./ldb write_extern_sst ./tmp.sst --db=/dev/shm/ingestion_db/dbbench/ --compression_type=no --hex --create_if_missing < <(./sst_dump --command=scan --output_hex --file=/dev/shm/normal_db/dbbench/000007.sst | awk 'began {print "0x" substr($1, 2, length($1) - 2), "==>", "0x" $5} ; /^Sst file format: block-based/ {began=1}')
$ ./ldb ingest_extern_sst ./tmp.sst --db=/dev/shm/ingestion_db/dbbench/
```

benchmark run command:

```
$ TEST_TMPDIR=/dev/shm/$DB/ ./db_bench -benchmarks=seekrandom -seek_nexts=$SEEK_NEXT -use_existing_db=true -cache_index_and_filter_blocks=false -num=1000000 -cache_size=0 -threads=1 -reads=200000000 -mmap_read=1 -verify_checksum=false
```

results: perf improved marginally for ingestion_db and did not change significantly for normal_db:

SEEK_NEXT | DB | code | ops/sec | % change
-- | -- | -- | -- | --
0 | normal_db | master | 350880 |  
0 | normal_db | PR6944 | 351040 | 0.0
0 | ingestion_db | master | 343255 |  
0 | ingestion_db | PR6944 | 349424 | 1.8
10 | normal_db | master | 218711 |  
10 | normal_db | PR6944 | 217892 | -0.4
10 | ingestion_db | master | 220334 |  
10 | ingestion_db | PR6944 | 226437 | 2.8

Reviewed By: pdillinger

Differential Revision: D21924676

Pulled By: ajkr

fbshipit-source-id: ea4288a2eefa8112eb6c651a671c1de18c12e538
main
Andrew Kryczka 4 years ago committed by Facebook GitHub Bot
parent 4202c48f80
commit dd29ad4223
  1. 3
      HISTORY.md
  2. 21
      db/dbformat.cc
  3. 68
      db/dbformat.h
  4. 2
      table/block_based/binary_search_index_reader.cc
  5. 126
      table/block_based/block.cc
  6. 305
      table/block_based/block.h
  7. 17
      table/block_based/block_based_table_reader.cc
  8. 46
      table/block_based/block_test.cc
  9. 6
      table/block_based/data_block_hash_index_test.cc
  10. 2
      table/block_based/hash_index_reader.cc
  11. 10
      table/block_based/partitioned_filter_block.cc
  12. 6
      table/block_based/partitioned_index_reader.cc
  13. 11
      table/meta_blocks.cc
  14. 97
      table/table_test.cc
  15. 4
      util/user_comparator_wrapper.h

@ -26,6 +26,9 @@
### Bug Fixes
* Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further.
### Performance Improvements
* Eliminate key copies for internal comparisons while accessing ingested block-based tables.
## 6.11 (6/12/2020)
### Bug Fixes
* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true.

@ -26,12 +26,6 @@ namespace ROCKSDB_NAMESPACE {
const ValueType kValueTypeForSeek = kTypeDeletionWithTimestamp;
const ValueType kValueTypeForSeekForPrev = kTypeDeletion;
uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(t));
return (seq << 8) | t;
}
EntryType GetEntryType(ValueType value_type) {
switch (value_type) {
case kTypeValue:
@ -62,14 +56,6 @@ bool ParseFullKey(const Slice& internal_key, FullKey* fkey) {
return true;
}
void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t) {
*seq = packed >> 8;
*t = static_cast<ValueType>(packed & 0xff);
assert(*seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(*t));
}
void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
result->append(key.user_key.data(), key.user_key.size());
PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
@ -111,7 +97,12 @@ std::string InternalKey::DebugString(bool hex) const {
return result;
}
const char* InternalKeyComparator::Name() const { return name_.c_str(); }
const char* InternalKeyComparator::Name() const {
if (name_.empty()) {
return "rocksdb.anonymous.InternalKeyComparator";
}
return name_.c_str();
}
int InternalKeyComparator::Compare(const ParsedInternalKey& a,
const ParsedInternalKey& b) const {

@ -124,11 +124,22 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
}
// Pack a sequence number and a ValueType into a uint64_t
extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
inline uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(t));
return (seq << 8) | t;
}
// Given the result of PackSequenceAndType, store the sequence number in *seq
// and the ValueType in *t.
extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq,
ValueType* t) {
*seq = packed >> 8;
*t = static_cast<ValueType>(packed & 0xff);
assert(*seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(*t));
}
EntryType GetEntryType(ValueType value_type);
@ -200,11 +211,22 @@ class InternalKeyComparator
std::string name_;
public:
explicit InternalKeyComparator(const Comparator* c)
: Comparator(c->timestamp_size()),
user_comparator_(c),
name_("rocksdb.InternalKeyComparator:" +
std::string(user_comparator_.Name())) {}
// `InternalKeyComparator`s constructed with the default constructor are not
// usable and will segfault on any attempt to use them for comparisons.
InternalKeyComparator() = default;
// @param named If true, assign a name to this comparator based on the
// underlying comparator's name. This involves an allocation and copy in
// this constructor to precompute the result of `Name()`. To avoid this
// overhead, set `named` to false. In that case, `Name()` will return a
// generic name that is non-specific to the underlying comparator.
explicit InternalKeyComparator(const Comparator* c, bool named = true)
: Comparator(c->timestamp_size()), user_comparator_(c) {
if (named) {
name_ = "rocksdb.InternalKeyComparator:" +
std::string(user_comparator_.Name());
}
}
virtual ~InternalKeyComparator() {}
virtual const char* Name() const override;
@ -221,6 +243,12 @@ class InternalKeyComparator
int Compare(const InternalKey& a, const InternalKey& b) const;
int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
// In this `Compare()` overload, the sequence numbers provided in
// `a_global_seqno` and `b_global_seqno` override the sequence numbers in `a`
// and `b`, respectively. To disable sequence number override(s), provide the
// value `kDisableGlobalSequenceNumber`.
int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
SequenceNumber b_global_seqno) const;
virtual const Comparator* GetRootComparator() const override {
return user_comparator_.GetRootComparator();
}
@ -677,6 +705,32 @@ inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
return r;
}
inline int InternalKeyComparator::Compare(const Slice& a,
SequenceNumber a_global_seqno,
const Slice& b,
SequenceNumber b_global_seqno) const {
int r = user_comparator_.Compare(ExtractUserKey(a), ExtractUserKey(b));
if (r == 0) {
uint64_t a_footer, b_footer;
if (a_global_seqno == kDisableGlobalSequenceNumber) {
a_footer = ExtractInternalKeyFooter(a);
} else {
a_footer = PackSequenceAndType(a_global_seqno, ExtractValueType(a));
}
if (b_global_seqno == kDisableGlobalSequenceNumber) {
b_footer = ExtractInternalKeyFooter(b);
} else {
b_footer = PackSequenceAndType(b_global_seqno, ExtractValueType(b));
}
if (a_footer > b_footer) {
r = -1;
} else if (a_footer < b_footer) {
r = +1;
}
}
return r;
}
// Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
struct ParsedInternalKeyComparator {
explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)

@ -61,7 +61,7 @@ InternalIteratorBase<IndexValue>* BinarySearchIndexReader::NewIterator(
// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
auto it = index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(),
internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_value_is_full());

@ -127,19 +127,15 @@ struct DecodeKeyV4 {
}
};
void DataBlockIter::Next() {
ParseNextDataKey<DecodeEntry>();
}
void DataBlockIter::NextImpl() { ParseNextDataKey<DecodeEntry>(); }
void DataBlockIter::NextOrReport() {
void DataBlockIter::NextOrReportImpl() {
ParseNextDataKey<CheckAndDecodeEntry>();
}
void IndexBlockIter::Next() {
ParseNextIndexKey();
}
void IndexBlockIter::NextImpl() { ParseNextIndexKey(); }
void IndexBlockIter::Prev() {
void IndexBlockIter::PrevImpl() {
assert(Valid());
// Scan backwards to a restart point before current_
const uint32_t original = current_;
@ -158,8 +154,8 @@ void IndexBlockIter::Prev() {
}
}
// Similar to IndexBlockIter::Prev but also caches the prev entries
void DataBlockIter::Prev() {
// Similar to IndexBlockIter::PrevImpl but also caches the prev entries
void DataBlockIter::PrevImpl() {
assert(Valid());
assert(prev_entries_idx_ == -1 ||
@ -173,25 +169,26 @@ void DataBlockIter::Prev() {
prev_entries_[prev_entries_idx_];
const char* key_ptr = nullptr;
bool raw_key_cached;
if (current_prev_entry.key_ptr != nullptr) {
// The key is not delta encoded and stored in the data block
key_ptr = current_prev_entry.key_ptr;
key_pinned_ = true;
raw_key_cached = false;
} else {
// The key is delta encoded and stored in prev_entries_keys_buff_
key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset;
key_pinned_ = false;
raw_key_cached = true;
}
const Slice current_key(key_ptr, current_prev_entry.key_size);
current_ = current_prev_entry.offset;
raw_key_.SetKey(current_key, false /* copy */);
// TODO(ajkr): the copy when `raw_key_cached` is done here for convenience,
// not necessity. It is convenient since this class treats keys as pinned
// when `raw_key_` points to an outside buffer. So we cannot allow
// `raw_key_` point into Prev cache as it is a transient outside buffer
// (i.e., keys in it are not actually pinned).
raw_key_.SetKey(current_key, raw_key_cached /* copy */);
value_ = current_prev_entry.value;
key_ = applied_key_.UpdateAndGetKey();
// This is kind of odd in that applied_key_ may say the key is pinned while
// key_pinned_ ends up being false. That'll only happen when the key resides
// in a transient caching buffer.
key_pinned_ = key_pinned_ && applied_key_.IsKeyPinned();
return;
}
@ -238,7 +235,7 @@ void DataBlockIter::Prev() {
prev_entries_idx_ = static_cast<int32_t>(prev_entries_.size()) - 1;
}
void DataBlockIter::Seek(const Slice& target) {
void DataBlockIter::SeekImpl(const Slice& target) {
Slice seek_key = target;
PERF_TIMER_GUARD(block_seek_nanos);
if (data_ == nullptr) { // Not init yet
@ -247,12 +244,12 @@ void DataBlockIter::Seek(const Slice& target) {
uint32_t index = 0;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_);
&skip_linear_scan);
if (!ok) {
return;
}
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
}
// Optimized Seek for point lookup for an internal key `target`
@ -286,7 +283,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
if (entry == kCollision) {
// HashSeek not effective, falling back
Seek(target);
SeekImpl(target);
return true;
}
@ -330,7 +327,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
// TODO(fwu): check the left and write boundary of the restart interval
// to avoid linear seek a target key that is out of range.
if (!ParseNextDataKey<DecodeEntry>(limit) ||
comparator_->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) {
CompareCurrentKey(target) >= 0) {
// we stop at the first potential matching user key.
break;
}
@ -355,18 +352,18 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true;
}
if (user_comparator_->Compare(raw_key_.GetUserKey(), target_user_key) != 0) {
if (ucmp_wrapper_.Compare(raw_key_.GetUserKey(), target_user_key) != 0) {
// the key is not in this block and cannot be at the next block either.
return false;
}
// Here we are conservative and only support a limited set of cases
ValueType value_type = ExtractValueType(applied_key_.UpdateAndGetKey());
ValueType value_type = ExtractValueType(raw_key_.GetInternalKey());
if (value_type != ValueType::kTypeValue &&
value_type != ValueType::kTypeDeletion &&
value_type != ValueType::kTypeSingleDeletion &&
value_type != ValueType::kTypeBlobIndex) {
Seek(target);
SeekImpl(target);
return true;
}
@ -374,14 +371,14 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true;
}
void IndexBlockIter::Seek(const Slice& target) {
void IndexBlockIter::SeekImpl(const Slice& target) {
TEST_SYNC_POINT("IndexBlockIter::Seek:0");
PERF_TIMER_GUARD(block_seek_nanos);
if (data_ == nullptr) { // Not init yet
return;
}
Slice seek_key = target;
if (!key_includes_seq_) {
if (raw_key_.IsUserKey()) {
seek_key = ExtractUserKey(target);
}
status_ = Status::OK();
@ -403,19 +400,19 @@ void IndexBlockIter::Seek(const Slice& target) {
skip_linear_scan = true;
} else if (value_delta_encoded_) {
ok = BinarySeek<DecodeKeyV4>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_);
&skip_linear_scan);
} else {
ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_);
&skip_linear_scan);
}
if (!ok) {
return;
}
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
}
void DataBlockIter::SeekForPrev(const Slice& target) {
void DataBlockIter::SeekForPrevImpl(const Slice& target) {
PERF_TIMER_GUARD(block_seek_nanos);
Slice seek_key = target;
if (data_ == nullptr) { // Not init yet
@ -424,24 +421,23 @@ void DataBlockIter::SeekForPrev(const Slice& target) {
uint32_t index = 0;
bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_);
&skip_linear_scan);
if (!ok) {
return;
}
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_);
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
if (!Valid()) {
SeekToLast();
SeekToLastImpl();
} else {
while (Valid() &&
comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) > 0) {
Prev();
while (Valid() && CompareCurrentKey(seek_key) > 0) {
PrevImpl();
}
}
}
void DataBlockIter::SeekToFirst() {
void DataBlockIter::SeekToFirstImpl() {
if (data_ == nullptr) { // Not init yet
return;
}
@ -449,7 +445,7 @@ void DataBlockIter::SeekToFirst() {
ParseNextDataKey<DecodeEntry>();
}
void DataBlockIter::SeekToFirstOrReport() {
void DataBlockIter::SeekToFirstOrReportImpl() {
if (data_ == nullptr) { // Not init yet
return;
}
@ -457,7 +453,7 @@ void DataBlockIter::SeekToFirstOrReport() {
ParseNextDataKey<CheckAndDecodeEntry>();
}
void IndexBlockIter::SeekToFirst() {
void IndexBlockIter::SeekToFirstImpl() {
if (data_ == nullptr) { // Not init yet
return;
}
@ -466,7 +462,7 @@ void IndexBlockIter::SeekToFirst() {
ParseNextIndexKey();
}
void DataBlockIter::SeekToLast() {
void DataBlockIter::SeekToLastImpl() {
if (data_ == nullptr) { // Not init yet
return;
}
@ -476,7 +472,7 @@ void DataBlockIter::SeekToLast() {
}
}
void IndexBlockIter::SeekToLast() {
void IndexBlockIter::SeekToLastImpl() {
if (data_ == nullptr) { // Not init yet
return;
}
@ -526,8 +522,6 @@ bool DataBlockIter::ParseNextDataKey(const char* limit) {
// This key share `shared` bytes with prev key, we need to decode it
raw_key_.TrimAppend(shared, p, non_shared);
}
key_ = applied_key_.UpdateAndGetKey();
key_pinned_ = applied_key_.IsKeyPinned();
#ifndef NDEBUG
if (global_seqno_ != kDisableGlobalSequenceNumber) {
@ -590,8 +584,6 @@ bool IndexBlockIter::ParseNextIndexKey() {
// This key share `shared` bytes with prev key, we need to decode it
raw_key_.TrimAppend(shared, p, non_shared);
}
key_ = applied_key_.UpdateAndGetKey();
key_pinned_ = applied_key_.IsKeyPinned();
value_ = Slice(p + non_shared, value_length);
if (shared == 0) {
while (restart_index_ + 1 < num_restarts_ &&
@ -651,12 +643,12 @@ void IndexBlockIter::DecodeCurrentValue(uint32_t shared) {
template <class TValue>
void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
uint32_t index,
bool skip_linear_scan,
const Comparator* comp) {
bool skip_linear_scan) {
// SeekToRestartPoint() only does the lookup in the restart block. We need
// to follow it up with Next() to position the iterator at the restart key.
// to follow it up with NextImpl() to position the iterator at the restart
// key.
SeekToRestartPoint(index);
Next();
NextImpl();
if (!skip_linear_scan) {
// Linear search (within restart block) for first key >= target
@ -672,14 +664,14 @@ void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
max_offset = port::kMaxUint32;
}
while (true) {
Next();
NextImpl();
if (!Valid()) {
break;
}
if (current_ == max_offset) {
assert(comp->Compare(applied_key_.UpdateAndGetKey(), target) > 0);
assert(CompareCurrentKey(target) > 0);
break;
} else if (comp->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) {
} else if (CompareCurrentKey(target) >= 0) {
break;
}
}
@ -698,8 +690,7 @@ template <class TValue>
template <typename DecodeKeyFunc>
bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
uint32_t right, uint32_t* index,
bool* skip_linear_scan,
const Comparator* comp) {
bool* skip_linear_scan) {
assert(left <= right);
if (restarts_ == 0) {
// SST files dedicated to range tombstones are written with index blocks
@ -724,7 +715,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
}
Slice mid_key(key_ptr, non_shared);
raw_key_.SetKey(mid_key, false /* copy */);
int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target);
int cmp = CompareCurrentKey(target);
if (cmp < 0) {
// Key at "mid" is smaller than "target". Therefore all
// blocks before "mid" are uninteresting.
@ -757,7 +748,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
}
Slice first_key(key_ptr, non_shared);
raw_key_.SetKey(first_key, false /* copy */);
int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target);
int cmp = CompareCurrentKey(target);
*skip_linear_scan = cmp >= 0;
}
return true;
@ -780,7 +771,7 @@ int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
}
Slice block_key(key_ptr, non_shared);
raw_key_.SetKey(block_key, false /* copy */);
return comparator_->Compare(applied_key_.UpdateAndGetKey(), target);
return CompareCurrentKey(target);
}
// Binary search in block_ids to find the first block
@ -874,7 +865,7 @@ bool IndexBlockIter::PrefixSeek(const Slice& target, uint32_t* index,
assert(prefix_index_);
*prefix_may_exist = true;
Slice seek_key = target;
if (!key_includes_seq_) {
if (raw_key_.IsUserKey()) {
seek_key = ExtractUserKey(target);
}
uint32_t* block_ids = nullptr;
@ -989,8 +980,7 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
}
}
DataBlockIter* Block::NewDataIterator(const Comparator* cmp,
const Comparator* ucmp,
DataBlockIter* Block::NewDataIterator(const Comparator* ucmp,
SequenceNumber global_seqno,
DataBlockIter* iter, Statistics* stats,
bool block_contents_pinned) {
@ -1010,7 +1000,7 @@ DataBlockIter* Block::NewDataIterator(const Comparator* cmp,
return ret_iter;
} else {
ret_iter->Initialize(
cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno,
ucmp, data_, restart_offset_, num_restarts_, global_seqno,
read_amp_bitmap_.get(), block_contents_pinned,
data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr);
if (read_amp_bitmap_) {
@ -1025,10 +1015,10 @@ DataBlockIter* Block::NewDataIterator(const Comparator* cmp,
}
IndexBlockIter* Block::NewIndexIterator(
const Comparator* cmp, const Comparator* ucmp, SequenceNumber global_seqno,
IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek,
bool have_first_key, bool key_includes_seq, bool value_is_full,
bool block_contents_pinned, BlockPrefixIndex* prefix_index) {
const Comparator* ucmp, SequenceNumber global_seqno, IndexBlockIter* iter,
Statistics* /*stats*/, bool total_order_seek, bool have_first_key,
bool key_includes_seq, bool value_is_full, bool block_contents_pinned,
BlockPrefixIndex* prefix_index) {
IndexBlockIter* ret_iter;
if (iter != nullptr) {
ret_iter = iter;
@ -1046,7 +1036,7 @@ IndexBlockIter* Block::NewIndexIterator(
} else {
BlockPrefixIndex* prefix_index_ptr =
total_order_seek ? nullptr : prefix_index;
ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_,
ret_iter->Initialize(ucmp, data_, restart_offset_, num_restarts_,
global_seqno, prefix_index_ptr, have_first_key,
key_includes_seq, value_is_full,
block_contents_pinned);

@ -168,8 +168,8 @@ class Block {
BlockBasedTableOptions::DataBlockIndexType IndexType() const;
// If comparator is InternalKeyComparator, user_comparator is its user
// comparator; they are equal otherwise.
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator.
//
// If iter is null, return new Iterator
// If iter is not null, update this one and return it as Iterator*
@ -187,13 +187,15 @@ class Block {
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key.
DataBlockIter* NewDataIterator(const Comparator* comparator,
const Comparator* user_comparator,
DataBlockIter* NewDataIterator(const Comparator* ucmp,
SequenceNumber global_seqno,
DataBlockIter* iter = nullptr,
Statistics* stats = nullptr,
bool block_contents_pinned = false);
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator.
//
// key_includes_seq, default true, means that the keys are in internal key
// format.
// value_is_full, default true, means that no delta encoding is
@ -206,8 +208,7 @@ class Block {
// first_internal_key. It affects data serialization format, so the same value
// have_first_key must be used when writing and reading index.
// It is determined by IndexType property of the table.
IndexBlockIter* NewIndexIterator(const Comparator* comparator,
const Comparator* user_comparator,
IndexBlockIter* NewIndexIterator(const Comparator* ucmp,
SequenceNumber global_seqno,
IndexBlockIter* iter, Statistics* stats,
bool total_order_seek, bool have_first_key,
@ -228,61 +229,36 @@ class Block {
DataBlockHashIndex data_block_hash_index_;
};
// A GlobalSeqnoAppliedKey exposes a key with global sequence number applied
// if configured with `global_seqno != kDisableGlobalSequenceNumber`. It may
// hold a user key or an internal key since `format_version>=3` index blocks
// contain user keys. In case it holds user keys, it must be configured with
// `global_seqno == kDisableGlobalSequenceNumber`.
class GlobalSeqnoAppliedKey {
public:
void Initialize(IterKey* key, SequenceNumber global_seqno) {
key_ = key;
global_seqno_ = global_seqno;
#ifndef NDEBUG
init_ = true;
#endif // NDEBUG
}
Slice UpdateAndGetKey() {
assert(init_);
if (global_seqno_ == kDisableGlobalSequenceNumber) {
return key_->GetKey();
}
ParsedInternalKey parsed(Slice(), 0, kTypeValue);
if (!ParseInternalKey(key_->GetInternalKey(), &parsed)) {
assert(false); // error not handled in optimized builds
return Slice();
}
parsed.sequence = global_seqno_;
scratch_.SetInternalKey(parsed);
return scratch_.GetInternalKey();
}
bool IsKeyPinned() const {
return global_seqno_ == kDisableGlobalSequenceNumber && key_->IsKeyPinned();
}
private:
const IterKey* key_;
SequenceNumber global_seqno_;
IterKey scratch_;
#ifndef NDEBUG
bool init_ = false;
#endif // NDEBUG
};
// A `BlockIter` iterates over the entries in a `Block`'s data buffer. The
// format of this data buffer is an uncompressed, sorted sequence of key-value
// pairs (see `Block` API for more details).
//
// Notably, the keys may either be in internal key format or user key format.
// Subclasses are responsible for configuring the key format.
//
// `BlockIter` intends to provide final overrides for all of
// `InternalIteratorBase` functions that can move the iterator. It does
// this to guarantee `UpdateKey()` is called exactly once after each key
// movement potentially visible to users. In this step, the key is prepared
// (e.g., serialized if global seqno is in effect) so it can be returned
// immediately when the user asks for it via calling `key() const`.
//
// For its subclasses, it provides protected variants of the above-mentioned
// final-overridden methods. They are named with the "Impl" suffix, e.g.,
// `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
// "Impl" functions are responsible for positioning `raw_key_` but not
// invoking `UpdateKey()`.
template <class TValue>
class BlockIter : public InternalIteratorBase<TValue> {
public:
void InitializeBase(const Comparator* comparator, const char* data,
void InitializeBase(const Comparator* ucmp, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno, bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid
applied_key_.Initialize(&raw_key_, global_seqno);
comparator_ = comparator;
ucmp_wrapper_ = UserComparatorWrapper(ucmp);
icmp_ = InternalKeyComparator(ucmp, false /* named */);
data_ = data;
restarts_ = restarts;
num_restarts_ = num_restarts;
@ -309,6 +285,43 @@ class BlockIter : public InternalIteratorBase<TValue> {
}
bool Valid() const override { return current_ < restarts_; }
virtual void SeekToFirst() override final {
SeekToFirstImpl();
UpdateKey();
}
virtual void SeekToLast() override final {
SeekToLastImpl();
UpdateKey();
}
virtual void Seek(const Slice& target) override final {
SeekImpl(target);
UpdateKey();
}
virtual void SeekForPrev(const Slice& target) override final {
SeekForPrevImpl(target);
UpdateKey();
}
virtual void Next() override final {
NextImpl();
UpdateKey();
}
virtual bool NextAndGetResult(IterateResult* result) override final {
// This does not need to call `UpdateKey()` as the parent class only has
// access to the `UpdateKey()`-invoking functions.
return InternalIteratorBase<TValue>::NextAndGetResult(result);
}
virtual void Prev() override final {
PrevImpl();
UpdateKey();
}
Status status() const override { return status_; }
Slice key() const override {
assert(Valid());
@ -343,12 +356,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
Cache::Handle* cache_handle() { return cache_handle_; }
virtual void Next() override = 0;
protected:
// Note: The type could be changed to InternalKeyComparator but we see a weird
// performance drop by that.
const Comparator* comparator_;
UserComparatorWrapper ucmp_wrapper_;
InternalKeyComparator icmp_;
const char* data_; // underlying block contents
uint32_t num_restarts_; // Number of uint32_t entries in restart array
@ -359,13 +370,12 @@ class BlockIter : public InternalIteratorBase<TValue> {
uint32_t current_;
// Raw key from block.
IterKey raw_key_;
// raw_key_ with global seqno applied if necessary. Use this one for
// comparisons.
GlobalSeqnoAppliedKey applied_key_;
// Key to be exposed to users.
Slice key_;
// Buffer for key data when global seqno assignment is enabled.
IterKey key_buf_;
Slice value_;
Status status_;
// Key to be exposed to users.
Slice key_;
bool key_pinned_;
// Whether the block data is guaranteed to outlive this iterator, and
// as long as the cleanup functions are transferred to another class,
@ -373,6 +383,50 @@ class BlockIter : public InternalIteratorBase<TValue> {
bool block_contents_pinned_;
SequenceNumber global_seqno_;
virtual void SeekToFirstImpl() = 0;
virtual void SeekToLastImpl() = 0;
virtual void SeekImpl(const Slice& target) = 0;
virtual void SeekForPrevImpl(const Slice& target) = 0;
virtual void NextImpl() = 0;
virtual void PrevImpl() = 0;
// Must be called every time a key is found that needs to be returned to user,
// and may be called when no key is found (as a no-op). Updates `key_`,
// `key_buf_`, and `key_pinned_` with info about the found key.
void UpdateKey() {
key_buf_.Clear();
if (!Valid()) {
return;
}
if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber);
key_ = raw_key_.GetUserKey();
key_pinned_ = raw_key_.IsKeyPinned();
} else if (global_seqno_ == kDisableGlobalSequenceNumber) {
key_ = raw_key_.GetInternalKey();
key_pinned_ = raw_key_.IsKeyPinned();
} else {
key_buf_.SetInternalKey(raw_key_.GetUserKey(), global_seqno_,
ExtractValueType(raw_key_.GetInternalKey()));
key_ = key_buf_.GetInternalKey();
key_pinned_ = false;
}
}
// Returns the result of `Comparator::Compare()`, where the appropriate
// comparator is used for the block contents, the LHS argument is the current
// key with global seqno applied, and the RHS argument is `other`.
int CompareCurrentKey(const Slice& other) {
if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber);
return ucmp_wrapper_.Compare(raw_key_.GetUserKey(), other);
} else if (global_seqno_ == kDisableGlobalSequenceNumber) {
return icmp_.Compare(raw_key_.GetInternalKey(), other);
}
return icmp_.Compare(raw_key_.GetInternalKey(), global_seqno_, other,
kDisableGlobalSequenceNumber);
}
private:
// Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable
@ -408,37 +462,31 @@ class BlockIter : public InternalIteratorBase<TValue> {
protected:
template <typename DecodeKeyFunc>
inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
uint32_t* index, bool* is_index_key_result,
const Comparator* comp);
uint32_t* index, bool* is_index_key_result);
void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result, const Comparator* comp);
bool is_index_key_result);
};
class DataBlockIter final : public BlockIter<Slice> {
public:
DataBlockIter()
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
DataBlockIter(const Comparator* comparator, const Comparator* user_comparator,
const char* data, uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
DataBlockIter(const Comparator* ucmp, const char* data, uint32_t restarts,
uint32_t num_restarts, SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index)
: DataBlockIter() {
Initialize(comparator, user_comparator, data, restarts, num_restarts,
global_seqno, read_amp_bitmap, block_contents_pinned,
data_block_hash_index);
}
void Initialize(const Comparator* comparator,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
Initialize(ucmp, data, restarts, num_restarts, global_seqno,
read_amp_bitmap, block_contents_pinned, data_block_hash_index);
}
void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts,
uint32_t num_restarts, SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap,
bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index) {
InitializeBase(comparator, data, restarts, num_restarts, global_seqno,
InitializeBase(ucmp, data, restarts, num_restarts, global_seqno,
block_contents_pinned);
user_comparator_ = user_comparator;
raw_key_.SetIsUserKey(false);
read_amp_bitmap_ = read_amp_bitmap;
last_bitmap_offset_ = current_ + 1;
@ -456,36 +504,32 @@ class DataBlockIter final : public BlockIter<Slice> {
return value_;
}
void Seek(const Slice& target) override;
inline bool SeekForGet(const Slice& target) {
if (!data_block_hash_index_) {
Seek(target);
SeekImpl(target);
UpdateKey();
return true;
}
return SeekForGetImpl(target);
bool res = SeekForGetImpl(target);
UpdateKey();
return res;
}
void SeekForPrev(const Slice& target) override;
void Prev() override;
void Next() final override;
// Try to advance to the next entry in the block. If there is data corruption
// or error, report it to the caller instead of aborting the process. May
// incur higher CPU overhead because we need to perform check on every entry.
void NextOrReport();
void SeekToFirst() override;
void NextOrReport() {
NextOrReportImpl();
UpdateKey();
}
// Try to seek to the first entry in the block. If there is data corruption
// or error, report it to caller instead of aborting the process. May incur
// higher CPU overhead because we need to perform check on every entry.
void SeekToFirstOrReport();
void SeekToLast() override;
void SeekToFirstOrReport() {
SeekToFirstOrReportImpl();
UpdateKey();
}
void Invalidate(Status s) {
InvalidateBase(s);
@ -495,6 +539,14 @@ class DataBlockIter final : public BlockIter<Slice> {
prev_entries_idx_ = -1;
}
protected:
virtual void SeekToFirstImpl() override;
virtual void SeekToLastImpl() override;
virtual void SeekImpl(const Slice& target) override;
virtual void SeekForPrevImpl(const Slice& target) override;
virtual void NextImpl() override;
virtual void PrevImpl() override;
private:
// read-amp bitmap
BlockReadAmpBitmap* read_amp_bitmap_;
@ -525,12 +577,13 @@ class DataBlockIter final : public BlockIter<Slice> {
int32_t prev_entries_idx_ = -1;
DataBlockHashIndex* data_block_hash_index_;
const Comparator* user_comparator_;
template <typename DecodeEntryFunc>
inline bool ParseNextDataKey(const char* limit = nullptr);
bool SeekForGetImpl(const Slice& target);
void NextOrReportImpl();
void SeekToFirstOrReportImpl();
};
class IndexBlockIter final : public BlockIter<IndexValue> {
@ -541,22 +594,14 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// format.
// value_is_full, default true, means that no delta encoding is
// applied to values.
void Initialize(const Comparator* comparator,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
bool have_first_key, bool key_includes_seq,
bool value_is_full, bool block_contents_pinned) {
if (!key_includes_seq) {
user_comparator_wrapper_ = std::unique_ptr<UserComparatorWrapper>(
new UserComparatorWrapper(user_comparator));
}
InitializeBase(
key_includes_seq ? comparator : user_comparator_wrapper_.get(), data,
restarts, num_restarts, kDisableGlobalSequenceNumber,
block_contents_pinned);
key_includes_seq_ = key_includes_seq;
raw_key_.SetIsUserKey(!key_includes_seq_);
void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts,
uint32_t num_restarts, SequenceNumber global_seqno,
BlockPrefixIndex* prefix_index, bool have_first_key,
bool key_includes_seq, bool value_is_full,
bool block_contents_pinned) {
InitializeBase(ucmp, data, restarts, num_restarts,
kDisableGlobalSequenceNumber, block_contents_pinned);
raw_key_.SetIsUserKey(!key_includes_seq);
prefix_index_ = prefix_index;
value_delta_encoded_ = !value_is_full;
have_first_key_ = have_first_key;
@ -568,10 +613,8 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
}
Slice user_key() const override {
if (key_includes_seq_) {
return ExtractUserKey(key());
}
return key();
assert(Valid());
return raw_key_.GetUserKey();
}
IndexValue value() const override {
@ -588,6 +631,13 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
}
}
void Invalidate(Status s) { InvalidateBase(s); }
bool IsValuePinned() const override {
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
}
protected:
// IndexBlockIter follows a different contract for prefix iterator
// from data iterators.
// If prefix of the seek key `target` exists in the file, it must
@ -595,9 +645,9 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// If the prefix of `target` doesn't exist in the file, it can either
// return the result of total order seek, or set both of Valid() = false
// and status() = NotFound().
void Seek(const Slice& target) override;
void SeekImpl(const Slice& target) override;
void SeekForPrev(const Slice&) override {
void SeekForPrevImpl(const Slice&) override {
assert(false);
current_ = restarts_;
restart_index_ = num_restarts_;
@ -608,24 +658,15 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
value_.clear();
}
void Prev() override;
void PrevImpl() override;
void Next() override;
void NextImpl() override;
void SeekToFirst() override;
void SeekToFirstImpl() override;
void SeekToLast() override;
void Invalidate(Status s) { InvalidateBase(s); }
bool IsValuePinned() const override {
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
}
void SeekToLastImpl() override;
private:
std::unique_ptr<UserComparatorWrapper> user_comparator_wrapper_;
// Key is in InternalKey format
bool key_includes_seq_;
bool value_delta_encoded_;
bool have_first_key_; // value includes first_internal_key
BlockPrefixIndex* prefix_index_;

@ -1125,8 +1125,7 @@ Status BlockBasedTable::ReadMetaIndexBlock(
*metaindex_block = std::move(metaindex);
// meta block uses bytewise comparator.
iter->reset(metaindex_block->get()->NewDataIterator(
BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
BytewiseComparator(), kDisableGlobalSequenceNumber));
return Status::OK();
}
@ -1381,15 +1380,14 @@ InternalIteratorBase<IndexValue>* BlockBasedTable::NewIndexIterator(
lookup_context);
}
template <>
DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
const Rep* rep, Block* block, BlockType block_type,
DataBlockIter* input_iter, bool block_contents_pinned) {
return block->NewDataIterator(
&rep->internal_comparator, rep->internal_comparator.user_comparator(),
rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics,
block_contents_pinned);
return block->NewDataIterator(rep->internal_comparator.user_comparator(),
rep->get_global_seqno(block_type), input_iter,
rep->ioptions.statistics,
block_contents_pinned);
}
template <>
@ -1397,14 +1395,13 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
const Rep* rep, Block* block, BlockType block_type,
IndexBlockIter* input_iter, bool block_contents_pinned) {
return block->NewIndexIterator(
&rep->internal_comparator, rep->internal_comparator.user_comparator(),
rep->internal_comparator.user_comparator(),
rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics,
/* total_order_seek */ true, rep->index_has_first_key,
rep->index_key_includes_seq, rep->index_value_is_full,
block_contents_pinned);
}
// If contents is nullptr, this function looks up the block caches for the
// data block referenced by handle, and read the block from disk if necessary.
// If contents is non-null, it skips the cache lookup and disk read, since
@ -1945,7 +1942,7 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
return block->second.GetValue()->NewIndexIterator(
&rep->internal_comparator, rep->internal_comparator.user_comparator(),
rep->internal_comparator.user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
rep->index_has_first_key, rep->index_key_includes_seq,
rep->index_value_is_full);

@ -34,8 +34,9 @@ static std::string RandomString(Random *rnd, int len) {
test::RandomString(rnd, len, &r);
return r;
}
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
Random *rnd) {
std::string GenerateInternalKey(int primary_key, int secondary_key,
int padding_size, Random *rnd) {
char buf[50];
char *p = &buf[0];
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
@ -43,6 +44,7 @@ std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
if (padding_size) {
k += RandomString(rnd, padding_size);
}
AppendInternalKeyFooter(&k, 0 /* seqno */, kTypeValue);
return k;
}
@ -61,7 +63,8 @@ void GenerateRandomKVs(std::vector<std::string> *keys,
for (int i = from; i < from + len; i += step) {
// generating keys that shares the prefix
for (int j = 0; j < keys_share_prefix; ++j) {
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
// `DataBlockIter` assumes it reads only internal keys.
keys->emplace_back(GenerateInternalKey(i, j, padding_size, &rnd));
// 100 bytes values
values->emplace_back(RandomString(&rnd, 100));
@ -97,8 +100,8 @@ TEST_F(BlockTest, SimpleTest) {
// read contents of block sequentially
int count = 0;
InternalIterator *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber);
InternalIterator *iter =
reader.NewDataIterator(options.comparator, kDisableGlobalSequenceNumber);
for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
// read kv from block
Slice k = iter->key();
@ -111,8 +114,8 @@ TEST_F(BlockTest, SimpleTest) {
delete iter;
// read block contents randomly
iter = reader.NewDataIterator(options.comparator, options.comparator,
kDisableGlobalSequenceNumber);
iter =
reader.NewDataIterator(options.comparator, kDisableGlobalSequenceNumber);
for (int i = 0; i < num_records; i++) {
// find a random key in the lookaside array
int index = rnd.Uniform(num_records);
@ -158,9 +161,8 @@ void CheckBlockContents(BlockContents contents, const int max_key,
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(prefix_size));
std::unique_ptr<InternalIterator> regular_iter(
reader2.NewDataIterator(BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
std::unique_ptr<InternalIterator> regular_iter(reader2.NewDataIterator(
BytewiseComparator(), kDisableGlobalSequenceNumber));
// Seek existent keys
for (size_t i = 0; i < keys.size(); i++) {
@ -177,7 +179,8 @@ void CheckBlockContents(BlockContents contents, const int max_key,
// simply be set as invalid; whereas the binary search based iterator will
// return the one that is closest.
for (int i = 1; i < max_key - 1; i += 2) {
auto key = GenerateKey(i, 0, 0, nullptr);
// `DataBlockIter` assumes its APIs receive only internal keys.
auto key = GenerateInternalKey(i, 0, 0, nullptr);
regular_iter->Seek(key);
ASSERT_TRUE(regular_iter->Valid());
}
@ -382,8 +385,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
// read contents of block sequentially
size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber,
nullptr, stats.get());
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
iter->value();
read_bytes += iter->TEST_CurrentEntrySize();
@ -414,8 +416,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber,
nullptr, stats.get());
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
for (int i = 0; i < num_records; i++) {
Slice k(keys[i]);
@ -449,8 +450,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber,
nullptr, stats.get());
options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
std::unordered_set<int> read_keys;
for (int i = 0; i < num_records; i++) {
int index = rnd.Uniform(num_records);
@ -574,9 +574,8 @@ TEST_P(IndexBlockTest, IndexValueEncodingTest) {
Statistics *kNullStats = nullptr;
// read contents of block sequentially
InternalIteratorBase<IndexValue> *iter = reader.NewIndexIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber,
kNullIter, kNullStats, kTotalOrderSeek, includeFirstKey(), kIncludesSeq,
kValueIsFull);
options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull);
iter->SeekToFirst();
for (int index = 0; index < num_records; ++index) {
ASSERT_TRUE(iter->Valid());
@ -595,10 +594,9 @@ TEST_P(IndexBlockTest, IndexValueEncodingTest) {
delete iter;
// read block contents randomly
iter = reader.NewIndexIterator(options.comparator, options.comparator,
kDisableGlobalSequenceNumber, kNullIter,
kNullStats, kTotalOrderSeek, includeFirstKey(),
kIncludesSeq, kValueIsFull);
iter = reader.NewIndexIterator(
options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull);
for (int i = 0; i < num_records * 2; i++) {
// find a random key in the lookaside array
int index = rnd.Uniform(num_records);

@ -391,7 +391,7 @@ TEST(DataBlockHashIndex, BlockTestSingleKey) {
Block reader(std::move(contents));
const InternalKeyComparator icmp(BytewiseComparator());
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(),
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
bool may_exist;
// search in block for the key just inserted
@ -475,7 +475,7 @@ TEST(DataBlockHashIndex, BlockTestLarge) {
// random seek existent keys
for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(),
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
// find a random key in the lookaside array
int index = rnd.Uniform(num_records);
@ -513,7 +513,7 @@ TEST(DataBlockHashIndex, BlockTestLarge) {
// C true false
for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(),
auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber);
// find a random key in the lookaside array
int index = rnd.Uniform(num_records);

@ -133,7 +133,7 @@ InternalIteratorBase<IndexValue>* HashIndexReader::NewIterator(
// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
auto it = index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(),
internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), iter, kNullStats,
total_order_seek, index_has_first_key(), index_key_includes_seq(),
index_value_is_full(), false /* block_contents_pinned */,

@ -238,7 +238,7 @@ BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle(
const InternalKeyComparator* const comparator = internal_comparator();
Statistics* kNullStats = nullptr;
filter_block.GetValue()->NewIndexIterator(
comparator, comparator->user_comparator(),
comparator->user_comparator(),
table()->get_rep()->get_global_seqno(BlockType::kFilter), &iter,
kNullStats, true /* total_order_seek */, false /* have_first_key */,
index_key_includes_seq(), index_value_is_full());
@ -441,10 +441,10 @@ void PartitionedFilterBlockReader::CacheDependencies(const ReadOptions& ro,
const InternalKeyComparator* const comparator = internal_comparator();
Statistics* kNullStats = nullptr;
filter_block.GetValue()->NewIndexIterator(
comparator, comparator->user_comparator(),
rep->get_global_seqno(BlockType::kFilter), &biter, kNullStats,
true /* total_order_seek */, false /* have_first_key */,
index_key_includes_seq(), index_value_is_full());
comparator->user_comparator(), rep->get_global_seqno(BlockType::kFilter),
&biter, kNullStats, true /* total_order_seek */,
false /* have_first_key */, index_key_includes_seq(),
index_value_is_full());
// Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset
biter.SeekToFirst();

@ -70,7 +70,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
new BlockBasedTable::PartitionedIndexIteratorState(table(),
&partition_map_),
index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(),
internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
index_has_first_key(), index_key_includes_seq(),
index_value_is_full()));
@ -82,7 +82,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
// to set `block_contents_pinned`.
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(
index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(),
internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
index_has_first_key(), index_key_includes_seq(),
index_value_is_full()));
@ -126,7 +126,7 @@ void PartitionIndexReader::CacheDependencies(const ReadOptions& ro, bool pin) {
// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(),
internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), &biter, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_value_is_full());
// Index partitions are assumed to be consecuitive. Prefetch them all.

@ -231,7 +231,7 @@ Status ReadProperties(const ReadOptions& read_options,
Block properties_block(std::move(block_contents));
DataBlockIter iter;
properties_block.NewDataIterator(BytewiseComparator(), BytewiseComparator(),
properties_block.NewDataIterator(BytewiseComparator(),
kDisableGlobalSequenceNumber, &iter);
auto new_table_properties = new TableProperties();
@ -395,8 +395,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
// are to compress it.
Block metaindex_block(std::move(metaindex_contents));
std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
BytewiseComparator(), kDisableGlobalSequenceNumber));
// -- Read property block
bool found_properties_block = true;
@ -468,8 +467,7 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
BytewiseComparator(), kDisableGlobalSequenceNumber));
return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
}
@ -514,8 +512,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
BytewiseComparator(), kDisableGlobalSequenceNumber));
BlockHandle block_handle;
status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);

@ -211,47 +211,6 @@ class Constructor {
stl_wrappers::KVMap data_;
};
class BlockConstructor: public Constructor {
public:
explicit BlockConstructor(const Comparator* cmp)
: Constructor(cmp),
comparator_(cmp),
block_(nullptr) { }
~BlockConstructor() override { delete block_; }
Status FinishImpl(const Options& /*options*/,
const ImmutableCFOptions& /*ioptions*/,
const MutableCFOptions& /*moptions*/,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& /*internal_comparator*/,
const stl_wrappers::KVMap& kv_map) override {
delete block_;
block_ = nullptr;
BlockBuilder builder(table_options.block_restart_interval);
for (const auto& kv : kv_map) {
builder.Add(kv.first, kv.second);
}
// Open the block
data_ = builder.Finish().ToString();
BlockContents contents;
contents.data = data_;
block_ = new Block(std::move(contents));
return Status::OK();
}
InternalIterator* NewIterator(
const SliceTransform* /*prefix_extractor*/) const override {
return block_->NewDataIterator(comparator_, comparator_,
kDisableGlobalSequenceNumber);
}
private:
const Comparator* comparator_;
std::string data_;
Block* block_;
BlockConstructor();
};
// A helper class that converts internal format keys into user keys
class KeyConvertingIterator : public InternalIterator {
public:
@ -309,7 +268,56 @@ class KeyConvertingIterator : public InternalIterator {
void operator=(const KeyConvertingIterator&);
};
class TableConstructor: public Constructor {
// `BlockConstructor` APIs always accept/return user keys.
class BlockConstructor : public Constructor {
public:
explicit BlockConstructor(const Comparator* cmp)
: Constructor(cmp), comparator_(cmp), block_(nullptr) {}
~BlockConstructor() override { delete block_; }
Status FinishImpl(const Options& /*options*/,
const ImmutableCFOptions& /*ioptions*/,
const MutableCFOptions& /*moptions*/,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& /*internal_comparator*/,
const stl_wrappers::KVMap& kv_map) override {
delete block_;
block_ = nullptr;
BlockBuilder builder(table_options.block_restart_interval);
for (const auto& kv : kv_map) {
// `DataBlockIter` assumes it reads only internal keys. `BlockConstructor`
// clients provide user keys, so we need to convert to internal key format
// before writing the data block.
ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
std::string encoded;
AppendInternalKey(&encoded, ikey);
builder.Add(encoded, kv.second);
}
// Open the block
data_ = builder.Finish().ToString();
BlockContents contents;
contents.data = data_;
block_ = new Block(std::move(contents));
return Status::OK();
}
InternalIterator* NewIterator(
const SliceTransform* /*prefix_extractor*/) const override {
// `DataBlockIter` returns the internal keys it reads.
// `KeyConvertingIterator` converts them to user keys before they are
// exposed to the `BlockConstructor` clients.
return new KeyConvertingIterator(
block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber));
}
private:
const Comparator* comparator_;
std::string data_;
Block* block_;
BlockConstructor();
};
class TableConstructor : public Constructor {
public:
explicit TableConstructor(const Comparator* cmp,
bool convert_to_internal_key = false,
@ -4350,8 +4358,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
Block metaindex_block(std::move(metaindex_contents));
std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(),
kDisableGlobalSequenceNumber));
BytewiseComparator(), kDisableGlobalSequenceNumber));
bool found_properties_block = true;
ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
ASSERT_TRUE(found_properties_block);
@ -4430,7 +4437,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
// verify properties block comes last
std::unique_ptr<InternalIterator> metaindex_iter{
metaindex_block.NewDataIterator(options.comparator, options.comparator,
metaindex_block.NewDataIterator(options.comparator,
kDisableGlobalSequenceNumber)};
uint64_t max_offset = 0;
std::string key_at_max_offset;

@ -17,6 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// perf_context.user_key_comparison_count.
class UserComparatorWrapper final : public Comparator {
public:
// `UserComparatorWrapper`s constructed with the default constructor are not
// usable and will segfault on any attempt to use them for comparisons.
UserComparatorWrapper() : user_comparator_(nullptr) {}
explicit UserComparatorWrapper(const Comparator* const user_cmp)
: Comparator(user_cmp->timestamp_size()), user_comparator_(user_cmp) {}

Loading…
Cancel
Save