diff --git a/table/block.cc b/table/block.cc index a6672d3ca..8b08ebbc6 100644 --- a/table/block.cc +++ b/table/block.cc @@ -63,6 +63,40 @@ void BlockIter::Next() { void BlockIter::Prev() { assert(Valid()); + assert(prev_entries_idx_ == -1 || + static_cast(prev_entries_idx_) < prev_entries_.size()); + // Check if we can use cached prev_entries_ + if (prev_entries_idx_ > 0 && + prev_entries_[prev_entries_idx_].offset == current_) { + // Read cached CachedPrevEntry + prev_entries_idx_--; + const CachedPrevEntry& current_prev_entry = + prev_entries_[prev_entries_idx_]; + + const char* key_ptr = current_prev_entry.key_ptr; + if (current_prev_entry.key_ptr != nullptr) { + // The key is not delta encoded and stored in the data block + key_ptr = current_prev_entry.key_ptr; + key_pinned_ = true; + } else { + // The key is delta encoded and stored in prev_entries_keys_buff_ + key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset; + key_pinned_ = false; + } + const Slice current_key(key_ptr, current_prev_entry.key_size); + + current_ = current_prev_entry.offset; + key_.SetKey(current_key, false /* copy */); + value_ = current_prev_entry.value; + + return; + } + + // Clear prev entries cache + prev_entries_idx_ = -1; + prev_entries_.clear(); + prev_entries_keys_buff_.clear(); + // Scan backwards to a restart point before current_ const uint32_t original = current_; while (GetRestartPoint(restart_index_) >= original) { @@ -76,9 +110,28 @@ void BlockIter::Prev() { } SeekToRestartPoint(restart_index_); + do { + if (!ParseNextKey()) { + break; + } + Slice current_key = key(); + + if (key_.IsKeyPinned()) { + // The key is not delta encoded + prev_entries_.emplace_back(current_, current_key.data(), 0, + current_key.size(), value()); + } else { + // The key is delta encoded, cache decoded key in buffer + size_t new_key_offset = prev_entries_keys_buff_.size(); + prev_entries_keys_buff_.append(current_key.data(), current_key.size()); + + prev_entries_.emplace_back(current_, nullptr, new_key_offset, + current_key.size(), value()); + } // Loop until end of current entry hits the start of original entry - } while (ParseNextKey() && NextEntryOffset() < original); + } while (NextEntryOffset() < original); + prev_entries_idx_ = prev_entries_.size() - 1; } void BlockIter::Seek(const Slice& target) { @@ -155,9 +208,11 @@ bool BlockIter::ParseNextKey() { // If this key dont share any bytes with prev key then we dont need // to decode it and can use it's address in the block directly. key_.SetKey(Slice(p, non_shared), false /* copy */); + key_pinned_ = true; } else { // This key share `shared` bytes with prev key, we need to decode it key_.TrimAppend(shared, p, non_shared); + key_pinned_ = false; } value_ = Slice(p + non_shared, value_length); while (restart_index_ + 1 < num_restarts_ && diff --git a/table/block.h b/table/block.h index 200be753c..033b27ba8 100644 --- a/table/block.h +++ b/table/block.h @@ -10,6 +10,8 @@ #pragma once #include #include +#include +#include #ifdef ROCKSDB_MALLOC_USABLE_SIZE #include #endif @@ -96,7 +98,8 @@ class BlockIter : public InternalIterator { current_(0), restart_index_(0), status_(Status::OK()), - prefix_index_(nullptr) {} + prefix_index_(nullptr), + key_pinned_(false) {} BlockIter(const Comparator* comparator, const char* data, uint32_t restarts, uint32_t num_restarts, BlockPrefixIndex* prefix_index) @@ -157,7 +160,7 @@ class BlockIter : public InternalIterator { PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; #endif - virtual bool IsKeyPinned() const override { return key_.IsKeyPinned(); } + virtual bool IsKeyPinned() const override { return key_pinned_; } private: const Comparator* comparator_; @@ -172,6 +175,31 @@ class BlockIter : public InternalIterator { Slice value_; Status status_; BlockPrefixIndex* prefix_index_; + bool key_pinned_; + + struct CachedPrevEntry { + explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, + size_t _key_offset, size_t _key_size, Slice _value) + : offset(_offset), + key_ptr(_key_ptr), + key_offset(_key_offset), + key_size(_key_size), + value(_value) {} + + // offset of entry in block + uint32_t offset; + // Pointer to key data in block (nullptr if key is delta-encoded) + const char* key_ptr; + // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr) + size_t key_offset; + // size of key + size_t key_size; + // value slice pointing to data in block + Slice value; + }; + std::string prev_entries_keys_buff_; + std::vector prev_entries_; + int32_t prev_entries_idx_ = -1; inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b);