Separate internal and user key comparators in `BlockIter` (#6944)

Summary:
Replace `BlockIter::comparator_` and `IndexBlockIter::user_comparator_wrapper_` with a concrete `UserComparatorWrapper` and `InternalKeyComparator`. The motivation for this change was the inconvenience of not knowing the concrete type of `BlockIter::comparator_`, which prevented calling specialized internal key comparison functions to optimize comparison of keys with global seqno applied.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/6944

Test Plan:
benchmark setup -- single file DBs, in-memory, no compression. "normal_db"
created by regular flush; "ingestion_db" created by ingesting a file. Both
DBs have same contents.

```
$ TEST_TMPDIR=/dev/shm/normal_db/ ./db_bench -benchmarks=fillrandom,compact -write_buffer_size=10485760000 -disable_auto_compactions=true -compression_type=none -num=1000000
$ ./ldb write_extern_sst ./tmp.sst --db=/dev/shm/ingestion_db/dbbench/ --compression_type=no --hex --create_if_missing < <(./sst_dump --command=scan --output_hex --file=/dev/shm/normal_db/dbbench/000007.sst | awk 'began {print "0x" substr($1, 2, length($1) - 2), "==>", "0x" $5} ; /^Sst file format: block-based/ {began=1}')
$ ./ldb ingest_extern_sst ./tmp.sst --db=/dev/shm/ingestion_db/dbbench/
```

benchmark run command:

```
$ TEST_TMPDIR=/dev/shm/$DB/ ./db_bench -benchmarks=seekrandom -seek_nexts=$SEEK_NEXT -use_existing_db=true -cache_index_and_filter_blocks=false -num=1000000 -cache_size=0 -threads=1 -reads=200000000 -mmap_read=1 -verify_checksum=false
```

results: perf improved marginally for ingestion_db and did not change significantly for normal_db:

SEEK_NEXT | DB | code | ops/sec | % change
-- | -- | -- | -- | --
0 | normal_db | master | 350880 |  
0 | normal_db | PR6944 | 351040 | 0.0
0 | ingestion_db | master | 343255 |  
0 | ingestion_db | PR6944 | 349424 | 1.8
10 | normal_db | master | 218711 |  
10 | normal_db | PR6944 | 217892 | -0.4
10 | ingestion_db | master | 220334 |  
10 | ingestion_db | PR6944 | 226437 | 2.8

Reviewed By: pdillinger

Differential Revision: D21924676

Pulled By: ajkr

fbshipit-source-id: ea4288a2eefa8112eb6c651a671c1de18c12e538
main
Andrew Kryczka 5 years ago committed by Facebook GitHub Bot
parent 4202c48f80
commit dd29ad4223
  1. 3
      HISTORY.md
  2. 21
      db/dbformat.cc
  3. 68
      db/dbformat.h
  4. 2
      table/block_based/binary_search_index_reader.cc
  5. 126
      table/block_based/block.cc
  6. 305
      table/block_based/block.h
  7. 17
      table/block_based/block_based_table_reader.cc
  8. 46
      table/block_based/block_test.cc
  9. 6
      table/block_based/data_block_hash_index_test.cc
  10. 2
      table/block_based/hash_index_reader.cc
  11. 10
      table/block_based/partitioned_filter_block.cc
  12. 6
      table/block_based/partitioned_index_reader.cc
  13. 11
      table/meta_blocks.cc
  14. 97
      table/table_test.cc
  15. 4
      util/user_comparator_wrapper.h

@ -26,6 +26,9 @@
### Bug Fixes ### Bug Fixes
* Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further. * Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further.
### Performance Improvements
* Eliminate key copies for internal comparisons while accessing ingested block-based tables.
## 6.11 (6/12/2020) ## 6.11 (6/12/2020)
### Bug Fixes ### Bug Fixes
* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true. * Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true.

@ -26,12 +26,6 @@ namespace ROCKSDB_NAMESPACE {
const ValueType kValueTypeForSeek = kTypeDeletionWithTimestamp; const ValueType kValueTypeForSeek = kTypeDeletionWithTimestamp;
const ValueType kValueTypeForSeekForPrev = kTypeDeletion; const ValueType kValueTypeForSeekForPrev = kTypeDeletion;
uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(t));
return (seq << 8) | t;
}
EntryType GetEntryType(ValueType value_type) { EntryType GetEntryType(ValueType value_type) {
switch (value_type) { switch (value_type) {
case kTypeValue: case kTypeValue:
@ -62,14 +56,6 @@ bool ParseFullKey(const Slice& internal_key, FullKey* fkey) {
return true; return true;
} }
void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t) {
*seq = packed >> 8;
*t = static_cast<ValueType>(packed & 0xff);
assert(*seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(*t));
}
void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
result->append(key.user_key.data(), key.user_key.size()); result->append(key.user_key.data(), key.user_key.size());
PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
@ -111,7 +97,12 @@ std::string InternalKey::DebugString(bool hex) const {
return result; return result;
} }
const char* InternalKeyComparator::Name() const { return name_.c_str(); } const char* InternalKeyComparator::Name() const {
if (name_.empty()) {
return "rocksdb.anonymous.InternalKeyComparator";
}
return name_.c_str();
}
int InternalKeyComparator::Compare(const ParsedInternalKey& a, int InternalKeyComparator::Compare(const ParsedInternalKey& a,
const ParsedInternalKey& b) const { const ParsedInternalKey& b) const {

@ -124,11 +124,22 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
} }
// Pack a sequence number and a ValueType into a uint64_t // Pack a sequence number and a ValueType into a uint64_t
extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t); inline uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(t));
return (seq << 8) | t;
}
// Given the result of PackSequenceAndType, store the sequence number in *seq // Given the result of PackSequenceAndType, store the sequence number in *seq
// and the ValueType in *t. // and the ValueType in *t.
extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t); inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq,
ValueType* t) {
*seq = packed >> 8;
*t = static_cast<ValueType>(packed & 0xff);
assert(*seq <= kMaxSequenceNumber);
assert(IsExtendedValueType(*t));
}
EntryType GetEntryType(ValueType value_type); EntryType GetEntryType(ValueType value_type);
@ -200,11 +211,22 @@ class InternalKeyComparator
std::string name_; std::string name_;
public: public:
explicit InternalKeyComparator(const Comparator* c) // `InternalKeyComparator`s constructed with the default constructor are not
: Comparator(c->timestamp_size()), // usable and will segfault on any attempt to use them for comparisons.
user_comparator_(c), InternalKeyComparator() = default;
name_("rocksdb.InternalKeyComparator:" +
std::string(user_comparator_.Name())) {} // @param named If true, assign a name to this comparator based on the
// underlying comparator's name. This involves an allocation and copy in
// this constructor to precompute the result of `Name()`. To avoid this
// overhead, set `named` to false. In that case, `Name()` will return a
// generic name that is non-specific to the underlying comparator.
explicit InternalKeyComparator(const Comparator* c, bool named = true)
: Comparator(c->timestamp_size()), user_comparator_(c) {
if (named) {
name_ = "rocksdb.InternalKeyComparator:" +
std::string(user_comparator_.Name());
}
}
virtual ~InternalKeyComparator() {} virtual ~InternalKeyComparator() {}
virtual const char* Name() const override; virtual const char* Name() const override;
@ -221,6 +243,12 @@ class InternalKeyComparator
int Compare(const InternalKey& a, const InternalKey& b) const; int Compare(const InternalKey& a, const InternalKey& b) const;
int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const; int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
// In this `Compare()` overload, the sequence numbers provided in
// `a_global_seqno` and `b_global_seqno` override the sequence numbers in `a`
// and `b`, respectively. To disable sequence number override(s), provide the
// value `kDisableGlobalSequenceNumber`.
int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
SequenceNumber b_global_seqno) const;
virtual const Comparator* GetRootComparator() const override { virtual const Comparator* GetRootComparator() const override {
return user_comparator_.GetRootComparator(); return user_comparator_.GetRootComparator();
} }
@ -677,6 +705,32 @@ inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
return r; return r;
} }
inline int InternalKeyComparator::Compare(const Slice& a,
SequenceNumber a_global_seqno,
const Slice& b,
SequenceNumber b_global_seqno) const {
int r = user_comparator_.Compare(ExtractUserKey(a), ExtractUserKey(b));
if (r == 0) {
uint64_t a_footer, b_footer;
if (a_global_seqno == kDisableGlobalSequenceNumber) {
a_footer = ExtractInternalKeyFooter(a);
} else {
a_footer = PackSequenceAndType(a_global_seqno, ExtractValueType(a));
}
if (b_global_seqno == kDisableGlobalSequenceNumber) {
b_footer = ExtractInternalKeyFooter(b);
} else {
b_footer = PackSequenceAndType(b_global_seqno, ExtractValueType(b));
}
if (a_footer > b_footer) {
r = -1;
} else if (a_footer < b_footer) {
r = +1;
}
}
return r;
}
// Wrap InternalKeyComparator as a comparator class for ParsedInternalKey. // Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
struct ParsedInternalKeyComparator { struct ParsedInternalKeyComparator {
explicit ParsedInternalKeyComparator(const InternalKeyComparator* c) explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)

@ -61,7 +61,7 @@ InternalIteratorBase<IndexValue>* BinarySearchIndexReader::NewIterator(
// We don't return pinned data from index blocks, so no need // We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
auto it = index_block.GetValue()->NewIndexIterator( auto it = index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(), internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, true, rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_value_is_full()); index_has_first_key(), index_key_includes_seq(), index_value_is_full());

@ -127,19 +127,15 @@ struct DecodeKeyV4 {
} }
}; };
void DataBlockIter::Next() { void DataBlockIter::NextImpl() { ParseNextDataKey<DecodeEntry>(); }
ParseNextDataKey<DecodeEntry>();
}
void DataBlockIter::NextOrReport() { void DataBlockIter::NextOrReportImpl() {
ParseNextDataKey<CheckAndDecodeEntry>(); ParseNextDataKey<CheckAndDecodeEntry>();
} }
void IndexBlockIter::Next() { void IndexBlockIter::NextImpl() { ParseNextIndexKey(); }
ParseNextIndexKey();
}
void IndexBlockIter::Prev() { void IndexBlockIter::PrevImpl() {
assert(Valid()); assert(Valid());
// Scan backwards to a restart point before current_ // Scan backwards to a restart point before current_
const uint32_t original = current_; const uint32_t original = current_;
@ -158,8 +154,8 @@ void IndexBlockIter::Prev() {
} }
} }
// Similar to IndexBlockIter::Prev but also caches the prev entries // Similar to IndexBlockIter::PrevImpl but also caches the prev entries
void DataBlockIter::Prev() { void DataBlockIter::PrevImpl() {
assert(Valid()); assert(Valid());
assert(prev_entries_idx_ == -1 || assert(prev_entries_idx_ == -1 ||
@ -173,25 +169,26 @@ void DataBlockIter::Prev() {
prev_entries_[prev_entries_idx_]; prev_entries_[prev_entries_idx_];
const char* key_ptr = nullptr; const char* key_ptr = nullptr;
bool raw_key_cached;
if (current_prev_entry.key_ptr != nullptr) { if (current_prev_entry.key_ptr != nullptr) {
// The key is not delta encoded and stored in the data block // The key is not delta encoded and stored in the data block
key_ptr = current_prev_entry.key_ptr; key_ptr = current_prev_entry.key_ptr;
key_pinned_ = true; raw_key_cached = false;
} else { } else {
// The key is delta encoded and stored in prev_entries_keys_buff_ // The key is delta encoded and stored in prev_entries_keys_buff_
key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset; key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset;
key_pinned_ = false; raw_key_cached = true;
} }
const Slice current_key(key_ptr, current_prev_entry.key_size); const Slice current_key(key_ptr, current_prev_entry.key_size);
current_ = current_prev_entry.offset; current_ = current_prev_entry.offset;
raw_key_.SetKey(current_key, false /* copy */); // TODO(ajkr): the copy when `raw_key_cached` is done here for convenience,
// not necessity. It is convenient since this class treats keys as pinned
// when `raw_key_` points to an outside buffer. So we cannot allow
// `raw_key_` point into Prev cache as it is a transient outside buffer
// (i.e., keys in it are not actually pinned).
raw_key_.SetKey(current_key, raw_key_cached /* copy */);
value_ = current_prev_entry.value; value_ = current_prev_entry.value;
key_ = applied_key_.UpdateAndGetKey();
// This is kind of odd in that applied_key_ may say the key is pinned while
// key_pinned_ ends up being false. That'll only happen when the key resides
// in a transient caching buffer.
key_pinned_ = key_pinned_ && applied_key_.IsKeyPinned();
return; return;
} }
@ -238,7 +235,7 @@ void DataBlockIter::Prev() {
prev_entries_idx_ = static_cast<int32_t>(prev_entries_.size()) - 1; prev_entries_idx_ = static_cast<int32_t>(prev_entries_.size()) - 1;
} }
void DataBlockIter::Seek(const Slice& target) { void DataBlockIter::SeekImpl(const Slice& target) {
Slice seek_key = target; Slice seek_key = target;
PERF_TIMER_GUARD(block_seek_nanos); PERF_TIMER_GUARD(block_seek_nanos);
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
@ -247,12 +244,12 @@ void DataBlockIter::Seek(const Slice& target) {
uint32_t index = 0; uint32_t index = 0;
bool skip_linear_scan = false; bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index, bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_); &skip_linear_scan);
if (!ok) { if (!ok) {
return; return;
} }
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
} }
// Optimized Seek for point lookup for an internal key `target` // Optimized Seek for point lookup for an internal key `target`
@ -286,7 +283,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
if (entry == kCollision) { if (entry == kCollision) {
// HashSeek not effective, falling back // HashSeek not effective, falling back
Seek(target); SeekImpl(target);
return true; return true;
} }
@ -330,7 +327,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
// TODO(fwu): check the left and write boundary of the restart interval // TODO(fwu): check the left and write boundary of the restart interval
// to avoid linear seek a target key that is out of range. // to avoid linear seek a target key that is out of range.
if (!ParseNextDataKey<DecodeEntry>(limit) || if (!ParseNextDataKey<DecodeEntry>(limit) ||
comparator_->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) { CompareCurrentKey(target) >= 0) {
// we stop at the first potential matching user key. // we stop at the first potential matching user key.
break; break;
} }
@ -355,18 +352,18 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true; return true;
} }
if (user_comparator_->Compare(raw_key_.GetUserKey(), target_user_key) != 0) { if (ucmp_wrapper_.Compare(raw_key_.GetUserKey(), target_user_key) != 0) {
// the key is not in this block and cannot be at the next block either. // the key is not in this block and cannot be at the next block either.
return false; return false;
} }
// Here we are conservative and only support a limited set of cases // Here we are conservative and only support a limited set of cases
ValueType value_type = ExtractValueType(applied_key_.UpdateAndGetKey()); ValueType value_type = ExtractValueType(raw_key_.GetInternalKey());
if (value_type != ValueType::kTypeValue && if (value_type != ValueType::kTypeValue &&
value_type != ValueType::kTypeDeletion && value_type != ValueType::kTypeDeletion &&
value_type != ValueType::kTypeSingleDeletion && value_type != ValueType::kTypeSingleDeletion &&
value_type != ValueType::kTypeBlobIndex) { value_type != ValueType::kTypeBlobIndex) {
Seek(target); SeekImpl(target);
return true; return true;
} }
@ -374,14 +371,14 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true; return true;
} }
void IndexBlockIter::Seek(const Slice& target) { void IndexBlockIter::SeekImpl(const Slice& target) {
TEST_SYNC_POINT("IndexBlockIter::Seek:0"); TEST_SYNC_POINT("IndexBlockIter::Seek:0");
PERF_TIMER_GUARD(block_seek_nanos); PERF_TIMER_GUARD(block_seek_nanos);
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
Slice seek_key = target; Slice seek_key = target;
if (!key_includes_seq_) { if (raw_key_.IsUserKey()) {
seek_key = ExtractUserKey(target); seek_key = ExtractUserKey(target);
} }
status_ = Status::OK(); status_ = Status::OK();
@ -403,19 +400,19 @@ void IndexBlockIter::Seek(const Slice& target) {
skip_linear_scan = true; skip_linear_scan = true;
} else if (value_delta_encoded_) { } else if (value_delta_encoded_) {
ok = BinarySeek<DecodeKeyV4>(seek_key, 0, num_restarts_ - 1, &index, ok = BinarySeek<DecodeKeyV4>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_); &skip_linear_scan);
} else { } else {
ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index, ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_); &skip_linear_scan);
} }
if (!ok) { if (!ok) {
return; return;
} }
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
} }
void DataBlockIter::SeekForPrev(const Slice& target) { void DataBlockIter::SeekForPrevImpl(const Slice& target) {
PERF_TIMER_GUARD(block_seek_nanos); PERF_TIMER_GUARD(block_seek_nanos);
Slice seek_key = target; Slice seek_key = target;
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
@ -424,24 +421,23 @@ void DataBlockIter::SeekForPrev(const Slice& target) {
uint32_t index = 0; uint32_t index = 0;
bool skip_linear_scan = false; bool skip_linear_scan = false;
bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index, bool ok = BinarySeek<DecodeKey>(seek_key, 0, num_restarts_ - 1, &index,
&skip_linear_scan, comparator_); &skip_linear_scan);
if (!ok) { if (!ok) {
return; return;
} }
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
if (!Valid()) { if (!Valid()) {
SeekToLast(); SeekToLastImpl();
} else { } else {
while (Valid() && while (Valid() && CompareCurrentKey(seek_key) > 0) {
comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) > 0) { PrevImpl();
Prev();
} }
} }
} }
void DataBlockIter::SeekToFirst() { void DataBlockIter::SeekToFirstImpl() {
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
@ -449,7 +445,7 @@ void DataBlockIter::SeekToFirst() {
ParseNextDataKey<DecodeEntry>(); ParseNextDataKey<DecodeEntry>();
} }
void DataBlockIter::SeekToFirstOrReport() { void DataBlockIter::SeekToFirstOrReportImpl() {
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
@ -457,7 +453,7 @@ void DataBlockIter::SeekToFirstOrReport() {
ParseNextDataKey<CheckAndDecodeEntry>(); ParseNextDataKey<CheckAndDecodeEntry>();
} }
void IndexBlockIter::SeekToFirst() { void IndexBlockIter::SeekToFirstImpl() {
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
@ -466,7 +462,7 @@ void IndexBlockIter::SeekToFirst() {
ParseNextIndexKey(); ParseNextIndexKey();
} }
void DataBlockIter::SeekToLast() { void DataBlockIter::SeekToLastImpl() {
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
@ -476,7 +472,7 @@ void DataBlockIter::SeekToLast() {
} }
} }
void IndexBlockIter::SeekToLast() { void IndexBlockIter::SeekToLastImpl() {
if (data_ == nullptr) { // Not init yet if (data_ == nullptr) { // Not init yet
return; return;
} }
@ -526,8 +522,6 @@ bool DataBlockIter::ParseNextDataKey(const char* limit) {
// This key share `shared` bytes with prev key, we need to decode it // This key share `shared` bytes with prev key, we need to decode it
raw_key_.TrimAppend(shared, p, non_shared); raw_key_.TrimAppend(shared, p, non_shared);
} }
key_ = applied_key_.UpdateAndGetKey();
key_pinned_ = applied_key_.IsKeyPinned();
#ifndef NDEBUG #ifndef NDEBUG
if (global_seqno_ != kDisableGlobalSequenceNumber) { if (global_seqno_ != kDisableGlobalSequenceNumber) {
@ -590,8 +584,6 @@ bool IndexBlockIter::ParseNextIndexKey() {
// This key share `shared` bytes with prev key, we need to decode it // This key share `shared` bytes with prev key, we need to decode it
raw_key_.TrimAppend(shared, p, non_shared); raw_key_.TrimAppend(shared, p, non_shared);
} }
key_ = applied_key_.UpdateAndGetKey();
key_pinned_ = applied_key_.IsKeyPinned();
value_ = Slice(p + non_shared, value_length); value_ = Slice(p + non_shared, value_length);
if (shared == 0) { if (shared == 0) {
while (restart_index_ + 1 < num_restarts_ && while (restart_index_ + 1 < num_restarts_ &&
@ -651,12 +643,12 @@ void IndexBlockIter::DecodeCurrentValue(uint32_t shared) {
template <class TValue> template <class TValue>
void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target, void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
uint32_t index, uint32_t index,
bool skip_linear_scan, bool skip_linear_scan) {
const Comparator* comp) {
// SeekToRestartPoint() only does the lookup in the restart block. We need // SeekToRestartPoint() only does the lookup in the restart block. We need
// to follow it up with Next() to position the iterator at the restart key. // to follow it up with NextImpl() to position the iterator at the restart
// key.
SeekToRestartPoint(index); SeekToRestartPoint(index);
Next(); NextImpl();
if (!skip_linear_scan) { if (!skip_linear_scan) {
// Linear search (within restart block) for first key >= target // Linear search (within restart block) for first key >= target
@ -672,14 +664,14 @@ void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
max_offset = port::kMaxUint32; max_offset = port::kMaxUint32;
} }
while (true) { while (true) {
Next(); NextImpl();
if (!Valid()) { if (!Valid()) {
break; break;
} }
if (current_ == max_offset) { if (current_ == max_offset) {
assert(comp->Compare(applied_key_.UpdateAndGetKey(), target) > 0); assert(CompareCurrentKey(target) > 0);
break; break;
} else if (comp->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) { } else if (CompareCurrentKey(target) >= 0) {
break; break;
} }
} }
@ -698,8 +690,7 @@ template <class TValue>
template <typename DecodeKeyFunc> template <typename DecodeKeyFunc>
bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left, bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
uint32_t right, uint32_t* index, uint32_t right, uint32_t* index,
bool* skip_linear_scan, bool* skip_linear_scan) {
const Comparator* comp) {
assert(left <= right); assert(left <= right);
if (restarts_ == 0) { if (restarts_ == 0) {
// SST files dedicated to range tombstones are written with index blocks // SST files dedicated to range tombstones are written with index blocks
@ -724,7 +715,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
} }
Slice mid_key(key_ptr, non_shared); Slice mid_key(key_ptr, non_shared);
raw_key_.SetKey(mid_key, false /* copy */); raw_key_.SetKey(mid_key, false /* copy */);
int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target); int cmp = CompareCurrentKey(target);
if (cmp < 0) { if (cmp < 0) {
// Key at "mid" is smaller than "target". Therefore all // Key at "mid" is smaller than "target". Therefore all
// blocks before "mid" are uninteresting. // blocks before "mid" are uninteresting.
@ -757,7 +748,7 @@ bool BlockIter<TValue>::BinarySeek(const Slice& target, uint32_t left,
} }
Slice first_key(key_ptr, non_shared); Slice first_key(key_ptr, non_shared);
raw_key_.SetKey(first_key, false /* copy */); raw_key_.SetKey(first_key, false /* copy */);
int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target); int cmp = CompareCurrentKey(target);
*skip_linear_scan = cmp >= 0; *skip_linear_scan = cmp >= 0;
} }
return true; return true;
@ -780,7 +771,7 @@ int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
} }
Slice block_key(key_ptr, non_shared); Slice block_key(key_ptr, non_shared);
raw_key_.SetKey(block_key, false /* copy */); raw_key_.SetKey(block_key, false /* copy */);
return comparator_->Compare(applied_key_.UpdateAndGetKey(), target); return CompareCurrentKey(target);
} }
// Binary search in block_ids to find the first block // Binary search in block_ids to find the first block
@ -874,7 +865,7 @@ bool IndexBlockIter::PrefixSeek(const Slice& target, uint32_t* index,
assert(prefix_index_); assert(prefix_index_);
*prefix_may_exist = true; *prefix_may_exist = true;
Slice seek_key = target; Slice seek_key = target;
if (!key_includes_seq_) { if (raw_key_.IsUserKey()) {
seek_key = ExtractUserKey(target); seek_key = ExtractUserKey(target);
} }
uint32_t* block_ids = nullptr; uint32_t* block_ids = nullptr;
@ -989,8 +980,7 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
} }
} }
DataBlockIter* Block::NewDataIterator(const Comparator* cmp, DataBlockIter* Block::NewDataIterator(const Comparator* ucmp,
const Comparator* ucmp,
SequenceNumber global_seqno, SequenceNumber global_seqno,
DataBlockIter* iter, Statistics* stats, DataBlockIter* iter, Statistics* stats,
bool block_contents_pinned) { bool block_contents_pinned) {
@ -1010,7 +1000,7 @@ DataBlockIter* Block::NewDataIterator(const Comparator* cmp,
return ret_iter; return ret_iter;
} else { } else {
ret_iter->Initialize( ret_iter->Initialize(
cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno, ucmp, data_, restart_offset_, num_restarts_, global_seqno,
read_amp_bitmap_.get(), block_contents_pinned, read_amp_bitmap_.get(), block_contents_pinned,
data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr); data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr);
if (read_amp_bitmap_) { if (read_amp_bitmap_) {
@ -1025,10 +1015,10 @@ DataBlockIter* Block::NewDataIterator(const Comparator* cmp,
} }
IndexBlockIter* Block::NewIndexIterator( IndexBlockIter* Block::NewIndexIterator(
const Comparator* cmp, const Comparator* ucmp, SequenceNumber global_seqno, const Comparator* ucmp, SequenceNumber global_seqno, IndexBlockIter* iter,
IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek, Statistics* /*stats*/, bool total_order_seek, bool have_first_key,
bool have_first_key, bool key_includes_seq, bool value_is_full, bool key_includes_seq, bool value_is_full, bool block_contents_pinned,
bool block_contents_pinned, BlockPrefixIndex* prefix_index) { BlockPrefixIndex* prefix_index) {
IndexBlockIter* ret_iter; IndexBlockIter* ret_iter;
if (iter != nullptr) { if (iter != nullptr) {
ret_iter = iter; ret_iter = iter;
@ -1046,7 +1036,7 @@ IndexBlockIter* Block::NewIndexIterator(
} else { } else {
BlockPrefixIndex* prefix_index_ptr = BlockPrefixIndex* prefix_index_ptr =
total_order_seek ? nullptr : prefix_index; total_order_seek ? nullptr : prefix_index;
ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_, ret_iter->Initialize(ucmp, data_, restart_offset_, num_restarts_,
global_seqno, prefix_index_ptr, have_first_key, global_seqno, prefix_index_ptr, have_first_key,
key_includes_seq, value_is_full, key_includes_seq, value_is_full,
block_contents_pinned); block_contents_pinned);

@ -168,8 +168,8 @@ class Block {
BlockBasedTableOptions::DataBlockIndexType IndexType() const; BlockBasedTableOptions::DataBlockIndexType IndexType() const;
// If comparator is InternalKeyComparator, user_comparator is its user // ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator; they are equal otherwise. // comparator.
// //
// If iter is null, return new Iterator // If iter is null, return new Iterator
// If iter is not null, update this one and return it as Iterator* // If iter is not null, update this one and return it as Iterator*
@ -187,13 +187,15 @@ class Block {
// NOTE: for the hash based lookup, if a key prefix doesn't match any key, // NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning // the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key. // the key that is just pass the target key.
DataBlockIter* NewDataIterator(const Comparator* comparator, DataBlockIter* NewDataIterator(const Comparator* ucmp,
const Comparator* user_comparator,
SequenceNumber global_seqno, SequenceNumber global_seqno,
DataBlockIter* iter = nullptr, DataBlockIter* iter = nullptr,
Statistics* stats = nullptr, Statistics* stats = nullptr,
bool block_contents_pinned = false); bool block_contents_pinned = false);
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator.
//
// key_includes_seq, default true, means that the keys are in internal key // key_includes_seq, default true, means that the keys are in internal key
// format. // format.
// value_is_full, default true, means that no delta encoding is // value_is_full, default true, means that no delta encoding is
@ -206,8 +208,7 @@ class Block {
// first_internal_key. It affects data serialization format, so the same value // first_internal_key. It affects data serialization format, so the same value
// have_first_key must be used when writing and reading index. // have_first_key must be used when writing and reading index.
// It is determined by IndexType property of the table. // It is determined by IndexType property of the table.
IndexBlockIter* NewIndexIterator(const Comparator* comparator, IndexBlockIter* NewIndexIterator(const Comparator* ucmp,
const Comparator* user_comparator,
SequenceNumber global_seqno, SequenceNumber global_seqno,
IndexBlockIter* iter, Statistics* stats, IndexBlockIter* iter, Statistics* stats,
bool total_order_seek, bool have_first_key, bool total_order_seek, bool have_first_key,
@ -228,61 +229,36 @@ class Block {
DataBlockHashIndex data_block_hash_index_; DataBlockHashIndex data_block_hash_index_;
}; };
// A GlobalSeqnoAppliedKey exposes a key with global sequence number applied // A `BlockIter` iterates over the entries in a `Block`'s data buffer. The
// if configured with `global_seqno != kDisableGlobalSequenceNumber`. It may // format of this data buffer is an uncompressed, sorted sequence of key-value
// hold a user key or an internal key since `format_version>=3` index blocks // pairs (see `Block` API for more details).
// contain user keys. In case it holds user keys, it must be configured with //
// `global_seqno == kDisableGlobalSequenceNumber`. // Notably, the keys may either be in internal key format or user key format.
class GlobalSeqnoAppliedKey { // Subclasses are responsible for configuring the key format.
public: //
void Initialize(IterKey* key, SequenceNumber global_seqno) { // `BlockIter` intends to provide final overrides for all of
key_ = key; // `InternalIteratorBase` functions that can move the iterator. It does
global_seqno_ = global_seqno; // this to guarantee `UpdateKey()` is called exactly once after each key
#ifndef NDEBUG // movement potentially visible to users. In this step, the key is prepared
init_ = true; // (e.g., serialized if global seqno is in effect) so it can be returned
#endif // NDEBUG // immediately when the user asks for it via calling `key() const`.
} //
// For its subclasses, it provides protected variants of the above-mentioned
Slice UpdateAndGetKey() { // final-overridden methods. They are named with the "Impl" suffix, e.g.,
assert(init_); // `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
if (global_seqno_ == kDisableGlobalSequenceNumber) { // "Impl" functions are responsible for positioning `raw_key_` but not
return key_->GetKey(); // invoking `UpdateKey()`.
}
ParsedInternalKey parsed(Slice(), 0, kTypeValue);
if (!ParseInternalKey(key_->GetInternalKey(), &parsed)) {
assert(false); // error not handled in optimized builds
return Slice();
}
parsed.sequence = global_seqno_;
scratch_.SetInternalKey(parsed);
return scratch_.GetInternalKey();
}
bool IsKeyPinned() const {
return global_seqno_ == kDisableGlobalSequenceNumber && key_->IsKeyPinned();
}
private:
const IterKey* key_;
SequenceNumber global_seqno_;
IterKey scratch_;
#ifndef NDEBUG
bool init_ = false;
#endif // NDEBUG
};
template <class TValue> template <class TValue>
class BlockIter : public InternalIteratorBase<TValue> { class BlockIter : public InternalIteratorBase<TValue> {
public: public:
void InitializeBase(const Comparator* comparator, const char* data, void InitializeBase(const Comparator* ucmp, const char* data,
uint32_t restarts, uint32_t num_restarts, uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno, bool block_contents_pinned) { SequenceNumber global_seqno, bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid assert(num_restarts > 0); // Ensure the param is valid
applied_key_.Initialize(&raw_key_, global_seqno); ucmp_wrapper_ = UserComparatorWrapper(ucmp);
icmp_ = InternalKeyComparator(ucmp, false /* named */);
comparator_ = comparator;
data_ = data; data_ = data;
restarts_ = restarts; restarts_ = restarts;
num_restarts_ = num_restarts; num_restarts_ = num_restarts;
@ -309,6 +285,43 @@ class BlockIter : public InternalIteratorBase<TValue> {
} }
bool Valid() const override { return current_ < restarts_; } bool Valid() const override { return current_ < restarts_; }
virtual void SeekToFirst() override final {
SeekToFirstImpl();
UpdateKey();
}
virtual void SeekToLast() override final {
SeekToLastImpl();
UpdateKey();
}
virtual void Seek(const Slice& target) override final {
SeekImpl(target);
UpdateKey();
}
virtual void SeekForPrev(const Slice& target) override final {
SeekForPrevImpl(target);
UpdateKey();
}
virtual void Next() override final {
NextImpl();
UpdateKey();
}
virtual bool NextAndGetResult(IterateResult* result) override final {
// This does not need to call `UpdateKey()` as the parent class only has
// access to the `UpdateKey()`-invoking functions.
return InternalIteratorBase<TValue>::NextAndGetResult(result);
}
virtual void Prev() override final {
PrevImpl();
UpdateKey();
}
Status status() const override { return status_; } Status status() const override { return status_; }
Slice key() const override { Slice key() const override {
assert(Valid()); assert(Valid());
@ -343,12 +356,10 @@ class BlockIter : public InternalIteratorBase<TValue> {
Cache::Handle* cache_handle() { return cache_handle_; } Cache::Handle* cache_handle() { return cache_handle_; }
virtual void Next() override = 0;
protected: protected:
// Note: The type could be changed to InternalKeyComparator but we see a weird UserComparatorWrapper ucmp_wrapper_;
// performance drop by that. InternalKeyComparator icmp_;
const Comparator* comparator_;
const char* data_; // underlying block contents const char* data_; // underlying block contents
uint32_t num_restarts_; // Number of uint32_t entries in restart array uint32_t num_restarts_; // Number of uint32_t entries in restart array
@ -359,13 +370,12 @@ class BlockIter : public InternalIteratorBase<TValue> {
uint32_t current_; uint32_t current_;
// Raw key from block. // Raw key from block.
IterKey raw_key_; IterKey raw_key_;
// raw_key_ with global seqno applied if necessary. Use this one for // Buffer for key data when global seqno assignment is enabled.
// comparisons. IterKey key_buf_;
GlobalSeqnoAppliedKey applied_key_;
// Key to be exposed to users.
Slice key_;
Slice value_; Slice value_;
Status status_; Status status_;
// Key to be exposed to users.
Slice key_;
bool key_pinned_; bool key_pinned_;
// Whether the block data is guaranteed to outlive this iterator, and // Whether the block data is guaranteed to outlive this iterator, and
// as long as the cleanup functions are transferred to another class, // as long as the cleanup functions are transferred to another class,
@ -373,6 +383,50 @@ class BlockIter : public InternalIteratorBase<TValue> {
bool block_contents_pinned_; bool block_contents_pinned_;
SequenceNumber global_seqno_; SequenceNumber global_seqno_;
virtual void SeekToFirstImpl() = 0;
virtual void SeekToLastImpl() = 0;
virtual void SeekImpl(const Slice& target) = 0;
virtual void SeekForPrevImpl(const Slice& target) = 0;
virtual void NextImpl() = 0;
virtual void PrevImpl() = 0;
// Must be called every time a key is found that needs to be returned to user,
// and may be called when no key is found (as a no-op). Updates `key_`,
// `key_buf_`, and `key_pinned_` with info about the found key.
void UpdateKey() {
key_buf_.Clear();
if (!Valid()) {
return;
}
if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber);
key_ = raw_key_.GetUserKey();
key_pinned_ = raw_key_.IsKeyPinned();
} else if (global_seqno_ == kDisableGlobalSequenceNumber) {
key_ = raw_key_.GetInternalKey();
key_pinned_ = raw_key_.IsKeyPinned();
} else {
key_buf_.SetInternalKey(raw_key_.GetUserKey(), global_seqno_,
ExtractValueType(raw_key_.GetInternalKey()));
key_ = key_buf_.GetInternalKey();
key_pinned_ = false;
}
}
// Returns the result of `Comparator::Compare()`, where the appropriate
// comparator is used for the block contents, the LHS argument is the current
// key with global seqno applied, and the RHS argument is `other`.
int CompareCurrentKey(const Slice& other) {
if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber);
return ucmp_wrapper_.Compare(raw_key_.GetUserKey(), other);
} else if (global_seqno_ == kDisableGlobalSequenceNumber) {
return icmp_.Compare(raw_key_.GetInternalKey(), other);
}
return icmp_.Compare(raw_key_.GetInternalKey(), global_seqno_, other,
kDisableGlobalSequenceNumber);
}
private: private:
// Store the cache handle, if the block is cached. We need this since the // Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable // only other place the handle is stored is as an argument to the Cleanable
@ -408,37 +462,31 @@ class BlockIter : public InternalIteratorBase<TValue> {
protected: protected:
template <typename DecodeKeyFunc> template <typename DecodeKeyFunc>
inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
uint32_t* index, bool* is_index_key_result, uint32_t* index, bool* is_index_key_result);
const Comparator* comp);
void FindKeyAfterBinarySeek(const Slice& target, uint32_t index, void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
bool is_index_key_result, const Comparator* comp); bool is_index_key_result);
}; };
class DataBlockIter final : public BlockIter<Slice> { class DataBlockIter final : public BlockIter<Slice> {
public: public:
DataBlockIter() DataBlockIter()
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {} : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
DataBlockIter(const Comparator* comparator, const Comparator* user_comparator, DataBlockIter(const Comparator* ucmp, const char* data, uint32_t restarts,
const char* data, uint32_t restarts, uint32_t num_restarts, uint32_t num_restarts, SequenceNumber global_seqno,
SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index) DataBlockHashIndex* data_block_hash_index)
: DataBlockIter() { : DataBlockIter() {
Initialize(comparator, user_comparator, data, restarts, num_restarts, Initialize(ucmp, data, restarts, num_restarts, global_seqno,
global_seqno, read_amp_bitmap, block_contents_pinned, read_amp_bitmap, block_contents_pinned, data_block_hash_index);
data_block_hash_index); }
} void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts,
void Initialize(const Comparator* comparator, uint32_t num_restarts, SequenceNumber global_seqno,
const Comparator* user_comparator, const char* data,
uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, BlockReadAmpBitmap* read_amp_bitmap,
bool block_contents_pinned, bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index) { DataBlockHashIndex* data_block_hash_index) {
InitializeBase(comparator, data, restarts, num_restarts, global_seqno, InitializeBase(ucmp, data, restarts, num_restarts, global_seqno,
block_contents_pinned); block_contents_pinned);
user_comparator_ = user_comparator;
raw_key_.SetIsUserKey(false); raw_key_.SetIsUserKey(false);
read_amp_bitmap_ = read_amp_bitmap; read_amp_bitmap_ = read_amp_bitmap;
last_bitmap_offset_ = current_ + 1; last_bitmap_offset_ = current_ + 1;
@ -456,36 +504,32 @@ class DataBlockIter final : public BlockIter<Slice> {
return value_; return value_;
} }
void Seek(const Slice& target) override;
inline bool SeekForGet(const Slice& target) { inline bool SeekForGet(const Slice& target) {
if (!data_block_hash_index_) { if (!data_block_hash_index_) {
Seek(target); SeekImpl(target);
UpdateKey();
return true; return true;
} }
bool res = SeekForGetImpl(target);
return SeekForGetImpl(target); UpdateKey();
return res;
} }
void SeekForPrev(const Slice& target) override;
void Prev() override;
void Next() final override;
// Try to advance to the next entry in the block. If there is data corruption // Try to advance to the next entry in the block. If there is data corruption
// or error, report it to the caller instead of aborting the process. May // or error, report it to the caller instead of aborting the process. May
// incur higher CPU overhead because we need to perform check on every entry. // incur higher CPU overhead because we need to perform check on every entry.
void NextOrReport(); void NextOrReport() {
NextOrReportImpl();
void SeekToFirst() override; UpdateKey();
}
// Try to seek to the first entry in the block. If there is data corruption // Try to seek to the first entry in the block. If there is data corruption
// or error, report it to caller instead of aborting the process. May incur // or error, report it to caller instead of aborting the process. May incur
// higher CPU overhead because we need to perform check on every entry. // higher CPU overhead because we need to perform check on every entry.
void SeekToFirstOrReport(); void SeekToFirstOrReport() {
SeekToFirstOrReportImpl();
void SeekToLast() override; UpdateKey();
}
void Invalidate(Status s) { void Invalidate(Status s) {
InvalidateBase(s); InvalidateBase(s);
@ -495,6 +539,14 @@ class DataBlockIter final : public BlockIter<Slice> {
prev_entries_idx_ = -1; prev_entries_idx_ = -1;
} }
protected:
virtual void SeekToFirstImpl() override;
virtual void SeekToLastImpl() override;
virtual void SeekImpl(const Slice& target) override;
virtual void SeekForPrevImpl(const Slice& target) override;
virtual void NextImpl() override;
virtual void PrevImpl() override;
private: private:
// read-amp bitmap // read-amp bitmap
BlockReadAmpBitmap* read_amp_bitmap_; BlockReadAmpBitmap* read_amp_bitmap_;
@ -525,12 +577,13 @@ class DataBlockIter final : public BlockIter<Slice> {
int32_t prev_entries_idx_ = -1; int32_t prev_entries_idx_ = -1;
DataBlockHashIndex* data_block_hash_index_; DataBlockHashIndex* data_block_hash_index_;
const Comparator* user_comparator_;
template <typename DecodeEntryFunc> template <typename DecodeEntryFunc>
inline bool ParseNextDataKey(const char* limit = nullptr); inline bool ParseNextDataKey(const char* limit = nullptr);
bool SeekForGetImpl(const Slice& target); bool SeekForGetImpl(const Slice& target);
void NextOrReportImpl();
void SeekToFirstOrReportImpl();
}; };
class IndexBlockIter final : public BlockIter<IndexValue> { class IndexBlockIter final : public BlockIter<IndexValue> {
@ -541,22 +594,14 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// format. // format.
// value_is_full, default true, means that no delta encoding is // value_is_full, default true, means that no delta encoding is
// applied to values. // applied to values.
void Initialize(const Comparator* comparator, void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts,
const Comparator* user_comparator, const char* data, uint32_t num_restarts, SequenceNumber global_seqno,
uint32_t restarts, uint32_t num_restarts, BlockPrefixIndex* prefix_index, bool have_first_key,
SequenceNumber global_seqno, BlockPrefixIndex* prefix_index, bool key_includes_seq, bool value_is_full,
bool have_first_key, bool key_includes_seq, bool block_contents_pinned) {
bool value_is_full, bool block_contents_pinned) { InitializeBase(ucmp, data, restarts, num_restarts,
if (!key_includes_seq) { kDisableGlobalSequenceNumber, block_contents_pinned);
user_comparator_wrapper_ = std::unique_ptr<UserComparatorWrapper>( raw_key_.SetIsUserKey(!key_includes_seq);
new UserComparatorWrapper(user_comparator));
}
InitializeBase(
key_includes_seq ? comparator : user_comparator_wrapper_.get(), data,
restarts, num_restarts, kDisableGlobalSequenceNumber,
block_contents_pinned);
key_includes_seq_ = key_includes_seq;
raw_key_.SetIsUserKey(!key_includes_seq_);
prefix_index_ = prefix_index; prefix_index_ = prefix_index;
value_delta_encoded_ = !value_is_full; value_delta_encoded_ = !value_is_full;
have_first_key_ = have_first_key; have_first_key_ = have_first_key;
@ -568,10 +613,8 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
} }
Slice user_key() const override { Slice user_key() const override {
if (key_includes_seq_) { assert(Valid());
return ExtractUserKey(key()); return raw_key_.GetUserKey();
}
return key();
} }
IndexValue value() const override { IndexValue value() const override {
@ -588,6 +631,13 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
} }
} }
void Invalidate(Status s) { InvalidateBase(s); }
bool IsValuePinned() const override {
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
}
protected:
// IndexBlockIter follows a different contract for prefix iterator // IndexBlockIter follows a different contract for prefix iterator
// from data iterators. // from data iterators.
// If prefix of the seek key `target` exists in the file, it must // If prefix of the seek key `target` exists in the file, it must
@ -595,9 +645,9 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// If the prefix of `target` doesn't exist in the file, it can either // If the prefix of `target` doesn't exist in the file, it can either
// return the result of total order seek, or set both of Valid() = false // return the result of total order seek, or set both of Valid() = false
// and status() = NotFound(). // and status() = NotFound().
void Seek(const Slice& target) override; void SeekImpl(const Slice& target) override;
void SeekForPrev(const Slice&) override { void SeekForPrevImpl(const Slice&) override {
assert(false); assert(false);
current_ = restarts_; current_ = restarts_;
restart_index_ = num_restarts_; restart_index_ = num_restarts_;
@ -608,24 +658,15 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
value_.clear(); value_.clear();
} }
void Prev() override; void PrevImpl() override;
void Next() override; void NextImpl() override;
void SeekToFirst() override; void SeekToFirstImpl() override;
void SeekToLast() override; void SeekToLastImpl() override;
void Invalidate(Status s) { InvalidateBase(s); }
bool IsValuePinned() const override {
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
}
private: private:
std::unique_ptr<UserComparatorWrapper> user_comparator_wrapper_;
// Key is in InternalKey format
bool key_includes_seq_;
bool value_delta_encoded_; bool value_delta_encoded_;
bool have_first_key_; // value includes first_internal_key bool have_first_key_; // value includes first_internal_key
BlockPrefixIndex* prefix_index_; BlockPrefixIndex* prefix_index_;

@ -1125,8 +1125,7 @@ Status BlockBasedTable::ReadMetaIndexBlock(
*metaindex_block = std::move(metaindex); *metaindex_block = std::move(metaindex);
// meta block uses bytewise comparator. // meta block uses bytewise comparator.
iter->reset(metaindex_block->get()->NewDataIterator( iter->reset(metaindex_block->get()->NewDataIterator(
BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
return Status::OK(); return Status::OK();
} }
@ -1381,15 +1380,14 @@ InternalIteratorBase<IndexValue>* BlockBasedTable::NewIndexIterator(
lookup_context); lookup_context);
} }
template <> template <>
DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>( DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
const Rep* rep, Block* block, BlockType block_type, const Rep* rep, Block* block, BlockType block_type,
DataBlockIter* input_iter, bool block_contents_pinned) { DataBlockIter* input_iter, bool block_contents_pinned) {
return block->NewDataIterator( return block->NewDataIterator(rep->internal_comparator.user_comparator(),
&rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->get_global_seqno(block_type), input_iter,
rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics, rep->ioptions.statistics,
block_contents_pinned); block_contents_pinned);
} }
template <> template <>
@ -1397,14 +1395,13 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
const Rep* rep, Block* block, BlockType block_type, const Rep* rep, Block* block, BlockType block_type,
IndexBlockIter* input_iter, bool block_contents_pinned) { IndexBlockIter* input_iter, bool block_contents_pinned) {
return block->NewIndexIterator( return block->NewIndexIterator(
&rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->internal_comparator.user_comparator(),
rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics, rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics,
/* total_order_seek */ true, rep->index_has_first_key, /* total_order_seek */ true, rep->index_has_first_key,
rep->index_key_includes_seq, rep->index_value_is_full, rep->index_key_includes_seq, rep->index_value_is_full,
block_contents_pinned); block_contents_pinned);
} }
// If contents is nullptr, this function looks up the block caches for the // If contents is nullptr, this function looks up the block caches for the
// data block referenced by handle, and read the block from disk if necessary. // data block referenced by handle, and read the block from disk if necessary.
// If contents is non-null, it skips the cache lookup and disk read, since // If contents is non-null, it skips the cache lookup and disk read, since
@ -1945,7 +1942,7 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
// We don't return pinned data from index blocks, so no need // We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
return block->second.GetValue()->NewIndexIterator( return block->second.GetValue()->NewIndexIterator(
&rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->internal_comparator.user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
rep->index_has_first_key, rep->index_key_includes_seq, rep->index_has_first_key, rep->index_key_includes_seq,
rep->index_value_is_full); rep->index_value_is_full);

@ -34,8 +34,9 @@ static std::string RandomString(Random *rnd, int len) {
test::RandomString(rnd, len, &r); test::RandomString(rnd, len, &r);
return r; return r;
} }
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
Random *rnd) { std::string GenerateInternalKey(int primary_key, int secondary_key,
int padding_size, Random *rnd) {
char buf[50]; char buf[50];
char *p = &buf[0]; char *p = &buf[0];
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
@ -43,6 +44,7 @@ std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
if (padding_size) { if (padding_size) {
k += RandomString(rnd, padding_size); k += RandomString(rnd, padding_size);
} }
AppendInternalKeyFooter(&k, 0 /* seqno */, kTypeValue);
return k; return k;
} }
@ -61,7 +63,8 @@ void GenerateRandomKVs(std::vector<std::string> *keys,
for (int i = from; i < from + len; i += step) { for (int i = from; i < from + len; i += step) {
// generating keys that shares the prefix // generating keys that shares the prefix
for (int j = 0; j < keys_share_prefix; ++j) { for (int j = 0; j < keys_share_prefix; ++j) {
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); // `DataBlockIter` assumes it reads only internal keys.
keys->emplace_back(GenerateInternalKey(i, j, padding_size, &rnd));
// 100 bytes values // 100 bytes values
values->emplace_back(RandomString(&rnd, 100)); values->emplace_back(RandomString(&rnd, 100));
@ -97,8 +100,8 @@ TEST_F(BlockTest, SimpleTest) {
// read contents of block sequentially // read contents of block sequentially
int count = 0; int count = 0;
InternalIterator *iter = reader.NewDataIterator( InternalIterator *iter =
options.comparator, options.comparator, kDisableGlobalSequenceNumber); reader.NewDataIterator(options.comparator, kDisableGlobalSequenceNumber);
for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
// read kv from block // read kv from block
Slice k = iter->key(); Slice k = iter->key();
@ -111,8 +114,8 @@ TEST_F(BlockTest, SimpleTest) {
delete iter; delete iter;
// read block contents randomly // read block contents randomly
iter = reader.NewDataIterator(options.comparator, options.comparator, iter =
kDisableGlobalSequenceNumber); reader.NewDataIterator(options.comparator, kDisableGlobalSequenceNumber);
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
// find a random key in the lookaside array // find a random key in the lookaside array
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);
@ -158,9 +161,8 @@ void CheckBlockContents(BlockContents contents, const int max_key,
std::unique_ptr<const SliceTransform> prefix_extractor( std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(prefix_size)); NewFixedPrefixTransform(prefix_size));
std::unique_ptr<InternalIterator> regular_iter( std::unique_ptr<InternalIterator> regular_iter(reader2.NewDataIterator(
reader2.NewDataIterator(BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
// Seek existent keys // Seek existent keys
for (size_t i = 0; i < keys.size(); i++) { for (size_t i = 0; i < keys.size(); i++) {
@ -177,7 +179,8 @@ void CheckBlockContents(BlockContents contents, const int max_key,
// simply be set as invalid; whereas the binary search based iterator will // simply be set as invalid; whereas the binary search based iterator will
// return the one that is closest. // return the one that is closest.
for (int i = 1; i < max_key - 1; i += 2) { for (int i = 1; i < max_key - 1; i += 2) {
auto key = GenerateKey(i, 0, 0, nullptr); // `DataBlockIter` assumes its APIs receive only internal keys.
auto key = GenerateInternalKey(i, 0, 0, nullptr);
regular_iter->Seek(key); regular_iter->Seek(key);
ASSERT_TRUE(regular_iter->Valid()); ASSERT_TRUE(regular_iter->Valid());
} }
@ -382,8 +385,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
// read contents of block sequentially // read contents of block sequentially
size_t read_bytes = 0; size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator( DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
nullptr, stats.get());
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
iter->value(); iter->value();
read_bytes += iter->TEST_CurrentEntrySize(); read_bytes += iter->TEST_CurrentEntrySize();
@ -414,8 +416,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
size_t read_bytes = 0; size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator( DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
nullptr, stats.get());
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
Slice k(keys[i]); Slice k(keys[i]);
@ -449,8 +450,7 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) {
size_t read_bytes = 0; size_t read_bytes = 0;
DataBlockIter *iter = reader.NewDataIterator( DataBlockIter *iter = reader.NewDataIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get());
nullptr, stats.get());
std::unordered_set<int> read_keys; std::unordered_set<int> read_keys;
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);
@ -574,9 +574,8 @@ TEST_P(IndexBlockTest, IndexValueEncodingTest) {
Statistics *kNullStats = nullptr; Statistics *kNullStats = nullptr;
// read contents of block sequentially // read contents of block sequentially
InternalIteratorBase<IndexValue> *iter = reader.NewIndexIterator( InternalIteratorBase<IndexValue> *iter = reader.NewIndexIterator(
options.comparator, options.comparator, kDisableGlobalSequenceNumber, options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kNullIter, kNullStats, kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull);
kValueIsFull);
iter->SeekToFirst(); iter->SeekToFirst();
for (int index = 0; index < num_records; ++index) { for (int index = 0; index < num_records; ++index) {
ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(iter->Valid());
@ -595,10 +594,9 @@ TEST_P(IndexBlockTest, IndexValueEncodingTest) {
delete iter; delete iter;
// read block contents randomly // read block contents randomly
iter = reader.NewIndexIterator(options.comparator, options.comparator, iter = reader.NewIndexIterator(
kDisableGlobalSequenceNumber, kNullIter, options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats,
kNullStats, kTotalOrderSeek, includeFirstKey(), kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull);
kIncludesSeq, kValueIsFull);
for (int i = 0; i < num_records * 2; i++) { for (int i = 0; i < num_records * 2; i++) {
// find a random key in the lookaside array // find a random key in the lookaside array
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);

@ -391,7 +391,7 @@ TEST(DataBlockHashIndex, BlockTestSingleKey) {
Block reader(std::move(contents)); Block reader(std::move(contents));
const InternalKeyComparator icmp(BytewiseComparator()); const InternalKeyComparator icmp(BytewiseComparator());
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
bool may_exist; bool may_exist;
// search in block for the key just inserted // search in block for the key just inserted
@ -475,7 +475,7 @@ TEST(DataBlockHashIndex, BlockTestLarge) {
// random seek existent keys // random seek existent keys
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
// find a random key in the lookaside array // find a random key in the lookaside array
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);
@ -513,7 +513,7 @@ TEST(DataBlockHashIndex, BlockTestLarge) {
// C true false // C true false
for (int i = 0; i < num_records; i++) { for (int i = 0; i < num_records; i++) {
auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), auto iter = reader.NewDataIterator(icmp.user_comparator(),
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
// find a random key in the lookaside array // find a random key in the lookaside array
int index = rnd.Uniform(num_records); int index = rnd.Uniform(num_records);

@ -133,7 +133,7 @@ InternalIteratorBase<IndexValue>* HashIndexReader::NewIterator(
// We don't return pinned data from index blocks, so no need // We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
auto it = index_block.GetValue()->NewIndexIterator( auto it = index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(), internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, rep->get_global_seqno(BlockType::kIndex), iter, kNullStats,
total_order_seek, index_has_first_key(), index_key_includes_seq(), total_order_seek, index_has_first_key(), index_key_includes_seq(),
index_value_is_full(), false /* block_contents_pinned */, index_value_is_full(), false /* block_contents_pinned */,

@ -238,7 +238,7 @@ BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle(
const InternalKeyComparator* const comparator = internal_comparator(); const InternalKeyComparator* const comparator = internal_comparator();
Statistics* kNullStats = nullptr; Statistics* kNullStats = nullptr;
filter_block.GetValue()->NewIndexIterator( filter_block.GetValue()->NewIndexIterator(
comparator, comparator->user_comparator(), comparator->user_comparator(),
table()->get_rep()->get_global_seqno(BlockType::kFilter), &iter, table()->get_rep()->get_global_seqno(BlockType::kFilter), &iter,
kNullStats, true /* total_order_seek */, false /* have_first_key */, kNullStats, true /* total_order_seek */, false /* have_first_key */,
index_key_includes_seq(), index_value_is_full()); index_key_includes_seq(), index_value_is_full());
@ -441,10 +441,10 @@ void PartitionedFilterBlockReader::CacheDependencies(const ReadOptions& ro,
const InternalKeyComparator* const comparator = internal_comparator(); const InternalKeyComparator* const comparator = internal_comparator();
Statistics* kNullStats = nullptr; Statistics* kNullStats = nullptr;
filter_block.GetValue()->NewIndexIterator( filter_block.GetValue()->NewIndexIterator(
comparator, comparator->user_comparator(), comparator->user_comparator(), rep->get_global_seqno(BlockType::kFilter),
rep->get_global_seqno(BlockType::kFilter), &biter, kNullStats, &biter, kNullStats, true /* total_order_seek */,
true /* total_order_seek */, false /* have_first_key */, false /* have_first_key */, index_key_includes_seq(),
index_key_includes_seq(), index_value_is_full()); index_value_is_full());
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset // Read the first block offset
biter.SeekToFirst(); biter.SeekToFirst();

@ -70,7 +70,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
new BlockBasedTable::PartitionedIndexIteratorState(table(), new BlockBasedTable::PartitionedIndexIteratorState(table(),
&partition_map_), &partition_map_),
index_block.GetValue()->NewIndexIterator( index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(), internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_has_first_key(), index_key_includes_seq(),
index_value_is_full())); index_value_is_full()));
@ -82,7 +82,7 @@ InternalIteratorBase<IndexValue>* PartitionIndexReader::NewIterator(
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter( std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(
index_block.GetValue()->NewIndexIterator( index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(), internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_has_first_key(), index_key_includes_seq(),
index_value_is_full())); index_value_is_full()));
@ -126,7 +126,7 @@ void PartitionIndexReader::CacheDependencies(const ReadOptions& ro, bool pin) {
// We don't return pinned data from index blocks, so no need // We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`. // to set `block_contents_pinned`.
index_block.GetValue()->NewIndexIterator( index_block.GetValue()->NewIndexIterator(
internal_comparator(), internal_comparator()->user_comparator(), internal_comparator()->user_comparator(),
rep->get_global_seqno(BlockType::kIndex), &biter, kNullStats, true, rep->get_global_seqno(BlockType::kIndex), &biter, kNullStats, true,
index_has_first_key(), index_key_includes_seq(), index_value_is_full()); index_has_first_key(), index_key_includes_seq(), index_value_is_full());
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.

@ -231,7 +231,7 @@ Status ReadProperties(const ReadOptions& read_options,
Block properties_block(std::move(block_contents)); Block properties_block(std::move(block_contents));
DataBlockIter iter; DataBlockIter iter;
properties_block.NewDataIterator(BytewiseComparator(), BytewiseComparator(), properties_block.NewDataIterator(BytewiseComparator(),
kDisableGlobalSequenceNumber, &iter); kDisableGlobalSequenceNumber, &iter);
auto new_table_properties = new TableProperties(); auto new_table_properties = new TableProperties();
@ -395,8 +395,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
// are to compress it. // are to compress it.
Block metaindex_block(std::move(metaindex_contents)); Block metaindex_block(std::move(metaindex_contents));
std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator( std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
// -- Read property block // -- Read property block
bool found_properties_block = true; bool found_properties_block = true;
@ -468,8 +467,7 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
std::unique_ptr<InternalIterator> meta_iter; std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset(metaindex_block.NewDataIterator( meta_iter.reset(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
} }
@ -514,8 +512,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
std::unique_ptr<InternalIterator> meta_iter; std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset(metaindex_block.NewDataIterator( meta_iter.reset(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
BlockHandle block_handle; BlockHandle block_handle;
status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle); status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);

@ -211,47 +211,6 @@ class Constructor {
stl_wrappers::KVMap data_; stl_wrappers::KVMap data_;
}; };
class BlockConstructor: public Constructor {
public:
explicit BlockConstructor(const Comparator* cmp)
: Constructor(cmp),
comparator_(cmp),
block_(nullptr) { }
~BlockConstructor() override { delete block_; }
Status FinishImpl(const Options& /*options*/,
const ImmutableCFOptions& /*ioptions*/,
const MutableCFOptions& /*moptions*/,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& /*internal_comparator*/,
const stl_wrappers::KVMap& kv_map) override {
delete block_;
block_ = nullptr;
BlockBuilder builder(table_options.block_restart_interval);
for (const auto& kv : kv_map) {
builder.Add(kv.first, kv.second);
}
// Open the block
data_ = builder.Finish().ToString();
BlockContents contents;
contents.data = data_;
block_ = new Block(std::move(contents));
return Status::OK();
}
InternalIterator* NewIterator(
const SliceTransform* /*prefix_extractor*/) const override {
return block_->NewDataIterator(comparator_, comparator_,
kDisableGlobalSequenceNumber);
}
private:
const Comparator* comparator_;
std::string data_;
Block* block_;
BlockConstructor();
};
// A helper class that converts internal format keys into user keys // A helper class that converts internal format keys into user keys
class KeyConvertingIterator : public InternalIterator { class KeyConvertingIterator : public InternalIterator {
public: public:
@ -309,7 +268,56 @@ class KeyConvertingIterator : public InternalIterator {
void operator=(const KeyConvertingIterator&); void operator=(const KeyConvertingIterator&);
}; };
class TableConstructor: public Constructor { // `BlockConstructor` APIs always accept/return user keys.
class BlockConstructor : public Constructor {
public:
explicit BlockConstructor(const Comparator* cmp)
: Constructor(cmp), comparator_(cmp), block_(nullptr) {}
~BlockConstructor() override { delete block_; }
Status FinishImpl(const Options& /*options*/,
const ImmutableCFOptions& /*ioptions*/,
const MutableCFOptions& /*moptions*/,
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& /*internal_comparator*/,
const stl_wrappers::KVMap& kv_map) override {
delete block_;
block_ = nullptr;
BlockBuilder builder(table_options.block_restart_interval);
for (const auto& kv : kv_map) {
// `DataBlockIter` assumes it reads only internal keys. `BlockConstructor`
// clients provide user keys, so we need to convert to internal key format
// before writing the data block.
ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
std::string encoded;
AppendInternalKey(&encoded, ikey);
builder.Add(encoded, kv.second);
}
// Open the block
data_ = builder.Finish().ToString();
BlockContents contents;
contents.data = data_;
block_ = new Block(std::move(contents));
return Status::OK();
}
InternalIterator* NewIterator(
const SliceTransform* /*prefix_extractor*/) const override {
// `DataBlockIter` returns the internal keys it reads.
// `KeyConvertingIterator` converts them to user keys before they are
// exposed to the `BlockConstructor` clients.
return new KeyConvertingIterator(
block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber));
}
private:
const Comparator* comparator_;
std::string data_;
Block* block_;
BlockConstructor();
};
class TableConstructor : public Constructor {
public: public:
explicit TableConstructor(const Comparator* cmp, explicit TableConstructor(const Comparator* cmp,
bool convert_to_internal_key = false, bool convert_to_internal_key = false,
@ -4350,8 +4358,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
Block metaindex_block(std::move(metaindex_contents)); Block metaindex_block(std::move(metaindex_contents));
std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator( std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
BytewiseComparator(), BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber));
kDisableGlobalSequenceNumber));
bool found_properties_block = true; bool found_properties_block = true;
ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block)); ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
ASSERT_TRUE(found_properties_block); ASSERT_TRUE(found_properties_block);
@ -4430,7 +4437,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
// verify properties block comes last // verify properties block comes last
std::unique_ptr<InternalIterator> metaindex_iter{ std::unique_ptr<InternalIterator> metaindex_iter{
metaindex_block.NewDataIterator(options.comparator, options.comparator, metaindex_block.NewDataIterator(options.comparator,
kDisableGlobalSequenceNumber)}; kDisableGlobalSequenceNumber)};
uint64_t max_offset = 0; uint64_t max_offset = 0;
std::string key_at_max_offset; std::string key_at_max_offset;

@ -17,6 +17,10 @@ namespace ROCKSDB_NAMESPACE {
// perf_context.user_key_comparison_count. // perf_context.user_key_comparison_count.
class UserComparatorWrapper final : public Comparator { class UserComparatorWrapper final : public Comparator {
public: public:
// `UserComparatorWrapper`s constructed with the default constructor are not
// usable and will segfault on any attempt to use them for comparisons.
UserComparatorWrapper() : user_comparator_(nullptr) {}
explicit UserComparatorWrapper(const Comparator* const user_cmp) explicit UserComparatorWrapper(const Comparator* const user_cmp)
: Comparator(user_cmp->timestamp_size()), user_comparator_(user_cmp) {} : Comparator(user_cmp->timestamp_size()), user_comparator_(user_cmp) {}

Loading…
Cancel
Save