diff --git a/db/dbformat.h b/db/dbformat.h index 81cfceb03..2b88de732 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -346,6 +346,12 @@ class IterKey { ~IterKey() { ResetBuffer(); } + // The bool will be picked up by the next calls to SetKey + void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; } + + // Returns the key in whichever format that was provided to KeyIter + Slice GetKey() const { return Slice(key_, key_size_); } + Slice GetInternalKey() const { assert(!IsUserKey()); return Slice(key_, key_size_); @@ -395,6 +401,11 @@ class IterKey { key_size_ = total_size; } + Slice SetKey(const Slice& key, bool copy = true) { + // is_user_key_ expected to be set already via SetIsUserKey + return SetKeyImpl(key, copy); + } + Slice SetUserKey(const Slice& key, bool copy = true) { is_user_key_ = true; return SetKeyImpl(key, copy); diff --git a/table/block.cc b/table/block.cc index 0efb3b73b..c1891801c 100644 --- a/table/block.cc +++ b/table/block.cc @@ -56,12 +56,40 @@ static inline const char* DecodeEntry(const char* p, const char* limit, return p; } -void BlockIter::Next() { +void DataBlockIter::Next() { assert(Valid()); - ParseNextKey(); + ParseNextDataKey(); } -void BlockIter::Prev() { +void IndexBlockIter::Next() { + assert(Valid()); + ParseNextIndexKey(); +} + +void IndexBlockIter::Prev() { + assert(Valid()); + // Scan backwards to a restart point before current_ + const uint32_t original = current_; + while (GetRestartPoint(restart_index_) >= original) { + if (restart_index_ == 0) { + // No more entries + current_ = restarts_; + restart_index_ = num_restarts_; + return; + } + restart_index_--; + } + SeekToRestartPoint(restart_index_); + do { + if (!ParseNextIndexKey()) { + break; + } + // Loop until end of current entry hits the start of original entry + } while (NextEntryOffset() < original); +} + +// Similar to IndexBlockIter::Prev but also caches the prev entries +void DataBlockIter::Prev() { assert(Valid()); assert(prev_entries_idx_ == -1 || @@ -87,11 +115,7 @@ void BlockIter::Prev() { const Slice current_key(key_ptr, current_prev_entry.key_size); current_ = current_prev_entry.offset; - if (key_includes_seq_) { - key_.SetInternalKey(current_key, false /* copy */); - } else { - key_.SetUserKey(current_key, false /* copy */); - } + key_.SetKey(current_key, false /* copy */); value_ = current_prev_entry.value; return; @@ -117,7 +141,7 @@ void BlockIter::Prev() { SeekToRestartPoint(restart_index_); do { - if (!ParseNextKey()) { + if (!ParseNextDataKey()) { break; } Slice current_key = key(); @@ -155,7 +179,7 @@ void DataBlockIter::Seek(const Slice& target) { // Linear search (within restart block) for first key >= target while (true) { - if (!ParseNextKey() || Compare(key_, seek_key) >= 0) { + if (!ParseNextDataKey() || Compare(key_, seek_key) >= 0) { return; } } @@ -175,8 +199,7 @@ void IndexBlockIter::Seek(const Slice& target) { if (prefix_index_) { ok = PrefixSeek(target, &index); } else { - ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - key_includes_seq_ ? comparator_ : user_comparator_); + ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, active_comparator_); } if (!ok) { @@ -186,7 +209,7 @@ void IndexBlockIter::Seek(const Slice& target) { // Linear search (within restart block) for first key >= target while (true) { - if (!ParseNextKey() || Compare(key_, seek_key) >= 0) { + if (!ParseNextIndexKey() || Compare(key_, seek_key) >= 0) { return; } } @@ -207,7 +230,7 @@ void DataBlockIter::SeekForPrev(const Slice& target) { SeekToRestartPoint(index); // Linear search (within restart block) for first key >= seek_key - while (ParseNextKey() && Compare(key_, seek_key) < 0) { + while (ParseNextDataKey() && Compare(key_, seek_key) < 0) { } if (!Valid()) { SeekToLast(); @@ -218,20 +241,38 @@ void DataBlockIter::SeekForPrev(const Slice& target) { } } -void BlockIter::SeekToFirst() { +void DataBlockIter::SeekToFirst() { + if (data_ == nullptr) { // Not init yet + return; + } + SeekToRestartPoint(0); + ParseNextDataKey(); +} + +void IndexBlockIter::SeekToFirst() { if (data_ == nullptr) { // Not init yet return; } SeekToRestartPoint(0); - ParseNextKey(); + ParseNextIndexKey(); } -void BlockIter::SeekToLast() { +void DataBlockIter::SeekToLast() { if (data_ == nullptr) { // Not init yet return; } SeekToRestartPoint(num_restarts_ - 1); - while (ParseNextKey() && NextEntryOffset() < restarts_) { + while (ParseNextDataKey() && NextEntryOffset() < restarts_) { + // Keep skipping + } +} + +void IndexBlockIter::SeekToLast() { + if (data_ == nullptr) { // Not init yet + return; + } + SeekToRestartPoint(num_restarts_ - 1); + while (ParseNextIndexKey() && NextEntryOffset() < restarts_) { // Keep skipping } } @@ -244,7 +285,7 @@ void BlockIter::CorruptionError() { value_.clear(); } -bool BlockIter::ParseNextKey() { +bool DataBlockIter::ParseNextDataKey() { current_ = NextEntryOffset(); const char* p = data_ + current_; const char* limit = data_ + restarts_; // Restarts come right after data @@ -265,11 +306,7 @@ bool BlockIter::ParseNextKey() { if (shared == 0) { // If this key dont share any bytes with prev key then we dont need // to decode it and can use it's address in the block directly. - if (key_includes_seq_) { - key_.SetInternalKey(Slice(p, non_shared), false /* copy */); - } else { - key_.SetUserKey(Slice(p, non_shared), false /* copy */); - } + key_.SetKey(Slice(p, non_shared), false /* copy */); key_pinned_ = true; } else { // This key share `shared` bytes with prev key, we need to decode it @@ -283,7 +320,7 @@ bool BlockIter::ParseNextKey() { // type is kTypeValue, kTypeMerge, kTypeDeletion, or kTypeRangeDeletion. assert(GetInternalKeySeqno(key_.GetInternalKey()) == 0); - ValueType value_type = ExtractValueType(key_.GetInternalKey()); + ValueType value_type = ExtractValueType(key_.GetKey()); assert(value_type == ValueType::kTypeValue || value_type == ValueType::kTypeMerge || value_type == ValueType::kTypeDeletion || @@ -311,6 +348,42 @@ bool BlockIter::ParseNextKey() { } } +bool IndexBlockIter::ParseNextIndexKey() { + current_ = NextEntryOffset(); + const char* p = data_ + current_; + const char* limit = data_ + restarts_; // Restarts come right after data + if (p >= limit) { + // No more entries to return. Mark as invalid. + current_ = restarts_; + restart_index_ = num_restarts_; + return false; + } + + // Decode next entry + uint32_t shared, non_shared, value_length; + p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); + if (p == nullptr || key_.Size() < shared) { + CorruptionError(); + return false; + } + if (shared == 0) { + // If this key dont share any bytes with prev key then we dont need + // to decode it and can use it's address in the block directly. + key_.SetKey(Slice(p, non_shared), false /* copy */); + key_pinned_ = true; + } else { + // This key share `shared` bytes with prev key, we need to decode it + key_.TrimAppend(shared, p, non_shared); + key_pinned_ = false; + } + value_ = Slice(p + non_shared, value_length); + while (restart_index_ + 1 < num_restarts_ && + GetRestartPoint(restart_index_ + 1) < current_) { + ++restart_index_; + } + return true; +} + // Binary search in restart array to find the first restart point that // is either the last restart point with a key less than target, // which means the key of next restart point is larger than target, or @@ -349,7 +422,7 @@ bool BlockIter::BinarySeek(const Slice& target, uint32_t left, uint32_t right, } // Compare target key and the block key of the block of `block_index`. -// Return -1 if eror. +// Return -1 if error. int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) { uint32_t region_offset = GetRestartPoint(block_index); uint32_t shared, non_shared, value_length; diff --git a/table/block.h b/table/block.h index d1e04c6d0..8ee450ca9 100644 --- a/table/block.h +++ b/table/block.h @@ -208,8 +208,7 @@ class BlockIter : public InternalIterator { public: void InitializeBase(const Comparator* comparator, const char* data, uint32_t restarts, uint32_t num_restarts, - SequenceNumber global_seqno, bool key_includes_seq, - bool block_contents_pinned) { + SequenceNumber global_seqno, bool block_contents_pinned) { assert(data_ == nullptr); // Ensure it is called only once assert(num_restarts > 0); // Ensure the param is valid @@ -220,13 +219,12 @@ class BlockIter : public InternalIterator { current_ = restarts_; restart_index_ = num_restarts_; global_seqno_ = global_seqno; - key_includes_seq_ = key_includes_seq; block_contents_pinned_ = block_contents_pinned; } // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do // nothing. Calls cleanup functions. - void Invalidate(Status s) { + void InvalidateBase(Status s) { // Assert that the BlockIter is never deleted while Pinning is Enabled. assert(!pinned_iters_mgr_ || (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); @@ -237,32 +235,19 @@ class BlockIter : public InternalIterator { // Call cleanup callbacks. Cleanable::Reset(); - - // Clear prev entries cache. - prev_entries_keys_buff_.clear(); - prev_entries_.clear(); - prev_entries_idx_ = -1; } virtual bool Valid() const override { return current_ < restarts_; } virtual Status status() const override { return status_; } virtual Slice key() const override { assert(Valid()); - return key_.GetInternalKey(); + return key_.GetKey(); } virtual Slice value() const override { assert(Valid()); return value_; } - virtual void Next() override; - - virtual void Prev() override; - - virtual void SeekToFirst() override; - - virtual void SeekToLast() override; - #ifndef NDEBUG virtual ~BlockIter() { // Assert that the BlockIter is never deleted while Pinning is Enabled. @@ -305,34 +290,8 @@ class BlockIter : public InternalIterator { bool key_pinned_; // whether the block data is guaranteed to outlive this iterator bool block_contents_pinned_; - // Key is in InternalKey format - bool key_includes_seq_; SequenceNumber global_seqno_; - struct CachedPrevEntry { - explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, - size_t _key_offset, size_t _key_size, Slice _value) - : offset(_offset), - key_ptr(_key_ptr), - key_offset(_key_offset), - key_size(_key_size), - value(_value) {} - - // offset of entry in block - uint32_t offset; - // Pointer to key data in block (nullptr if key is delta-encoded) - const char* key_ptr; - // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr) - size_t key_offset; - // size of key - size_t key_size; - // value slice pointing to data in block - Slice value; - }; - std::string prev_entries_keys_buff_; - std::vector prev_entries_; - int32_t prev_entries_idx_ = -1; - public: // Return the offset in data_ just past the end of the current entry. inline uint32_t NextEntryOffset() const { @@ -357,7 +316,6 @@ class BlockIter : public InternalIterator { void CorruptionError(); - bool ParseNextKey(); bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, uint32_t* index, const Comparator* comp); }; @@ -380,9 +338,9 @@ class DataBlockIter final : public BlockIter { SequenceNumber global_seqno, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned) { - const bool kKeyIncludesSeq = true; InitializeBase(comparator, data, restarts, num_restarts, global_seqno, - kKeyIncludesSeq, block_contents_pinned); + block_contents_pinned); + key_.SetIsUserKey(false); read_amp_bitmap_ = read_amp_bitmap; last_bitmap_offset_ = current_ + 1; } @@ -402,11 +360,52 @@ class DataBlockIter final : public BlockIter { virtual void SeekForPrev(const Slice& target) override; + virtual void Prev() override; + + virtual void Next() override; + + virtual void SeekToFirst() override; + + virtual void SeekToLast() override; + + void Invalidate(Status s) { + InvalidateBase(s); + // Clear prev entries cache. + prev_entries_keys_buff_.clear(); + prev_entries_.clear(); + prev_entries_idx_ = -1; + } + private: // read-amp bitmap BlockReadAmpBitmap* read_amp_bitmap_; // last `current_` value we report to read-amp bitmp mutable uint32_t last_bitmap_offset_; + struct CachedPrevEntry { + explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, + size_t _key_offset, size_t _key_size, Slice _value) + : offset(_offset), + key_ptr(_key_ptr), + key_offset(_key_offset), + key_size(_key_size), + value(_value) {} + + // offset of entry in block + uint32_t offset; + // Pointer to key data in block (nullptr if key is delta-encoded) + const char* key_ptr; + // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr) + size_t key_offset; + // size of key + size_t key_size; + // value slice pointing to data in block + Slice value; + }; + std::string prev_entries_keys_buff_; + std::vector prev_entries_; + int32_t prev_entries_idx_ = -1; + + bool ParseNextDataKey(); inline int Compare(const IterKey& ikey, const Slice& b) const { return comparator_->Compare(ikey.GetInternalKey(), b); @@ -419,7 +418,7 @@ class IndexBlockIter final : public BlockIter { virtual Slice key() const override { assert(Valid()); - return key_includes_seq_ ? key_.GetInternalKey() : key_.GetUserKey(); + return key_.GetKey(); } IndexBlockIter(const Comparator* comparator, const Comparator* user_comparator, const char* data, @@ -436,10 +435,11 @@ class IndexBlockIter final : public BlockIter { uint32_t restarts, uint32_t num_restarts, BlockPrefixIndex* prefix_index, bool key_includes_seq, bool block_contents_pinned) { - user_comparator_ = user_comparator; InitializeBase(comparator, data, restarts, num_restarts, - kDisableGlobalSequenceNumber, key_includes_seq, - block_contents_pinned); + kDisableGlobalSequenceNumber, block_contents_pinned); + key_includes_seq_ = key_includes_seq; + active_comparator_ = key_includes_seq_ ? comparator_ : user_comparator; + key_.SetIsUserKey(!key_includes_seq_); prefix_index_ = prefix_index; } @@ -450,12 +450,22 @@ class IndexBlockIter final : public BlockIter { current_ = restarts_; restart_index_ = num_restarts_; status_ = Status::InvalidArgument( - "RocksDB internal error: should never all SeekForPrev() for index " + "RocksDB internal error: should never call SeekForPrev() on index " "blocks"); key_.Clear(); value_.clear(); } + virtual void Prev() override; + + virtual void Next() override; + + virtual void SeekToFirst() override; + + virtual void SeekToLast() override; + + void Invalidate(Status s) { InvalidateBase(s); } + private: bool PrefixSeek(const Slice& target, uint32_t* index); bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, @@ -464,23 +474,19 @@ class IndexBlockIter final : public BlockIter { int CompareBlockKey(uint32_t block_index, const Slice& target); inline int Compare(const Slice& a, const Slice& b) const { - if (key_includes_seq_) { - return comparator_->Compare(a, b); - } else { - return user_comparator_->Compare(a, b); - } + return active_comparator_->Compare(a, b); } inline int Compare(const IterKey& ikey, const Slice& b) const { - if (key_includes_seq_) { - return comparator_->Compare(ikey.GetInternalKey(), b); - } else { - return user_comparator_->Compare(ikey.GetUserKey(), b); - } + return active_comparator_->Compare(ikey.GetKey(), b); } - // Same as comparator_ if comparator_ is not InernalKeyComparator - const Comparator* user_comparator_; + bool ParseNextIndexKey(); + + // Key is in InternalKey format + bool key_includes_seq_; + // key_includes_seq_ ? comparator_ : user_comparator_ + const Comparator* active_comparator_; BlockPrefixIndex* prefix_index_; };