minimize BlockIter comparator scope (#7149)

Summary:
PR https://github.com/facebook/rocksdb/issues/6944 transitioned `BlockIter` from using `Comparator*` to using
concrete `UserComparatorWrapper` and `InternalKeyComparator`. However,
adding them as instance variables to `BlockIter` was not optimal.
Bloating `BlockIter` caused the `ArenaWrappedDBIter`'s arena allocator to do more heap
allocations (in certain cases) which harmed performance of `DB::NewIterator()`. This PR
pushes down the concrete comparator objects to the point of usage, which
forces them to be on the stack. As a result, the `BlockIter` is back to
its original size prior to https://github.com/facebook/rocksdb/issues/6944 (actually a bit smaller since there
were two `Comparator*` before).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7149

Test Plan:
verified our internal `DB::NewIterator()`-heavy regression
test no longer reports regression.

Reviewed By: riversand963

Differential Revision: D22623189

Pulled By: ajkr

fbshipit-source-id: f6d69accfe5de51e0bd9874a480b32b29909bab6
main
Andrew Kryczka 4 years ago committed by Facebook GitHub Bot
parent 9870704420
commit 643c863b72
  1. 16
      table/block_based/block.cc
  2. 52
      table/block_based/block.h

@ -351,7 +351,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
return true; return true;
} }
if (ucmp_wrapper_.Compare(raw_key_.GetUserKey(), target_user_key) != 0) { if (ucmp().Compare(raw_key_.GetUserKey(), target_user_key) != 0) {
// the key is not in this block and cannot be at the next block either. // the key is not in this block and cannot be at the next block either.
return false; return false;
} }
@ -969,7 +969,7 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
} }
} }
DataBlockIter* Block::NewDataIterator(const Comparator* ucmp, DataBlockIter* Block::NewDataIterator(const Comparator* raw_ucmp,
SequenceNumber global_seqno, SequenceNumber global_seqno,
DataBlockIter* iter, Statistics* stats, DataBlockIter* iter, Statistics* stats,
bool block_contents_pinned) { bool block_contents_pinned) {
@ -989,7 +989,7 @@ DataBlockIter* Block::NewDataIterator(const Comparator* ucmp,
return ret_iter; return ret_iter;
} else { } else {
ret_iter->Initialize( ret_iter->Initialize(
ucmp, data_, restart_offset_, num_restarts_, global_seqno, raw_ucmp, data_, restart_offset_, num_restarts_, global_seqno,
read_amp_bitmap_.get(), block_contents_pinned, read_amp_bitmap_.get(), block_contents_pinned,
data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr); data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr);
if (read_amp_bitmap_) { if (read_amp_bitmap_) {
@ -1004,10 +1004,10 @@ DataBlockIter* Block::NewDataIterator(const Comparator* ucmp,
} }
IndexBlockIter* Block::NewIndexIterator( IndexBlockIter* Block::NewIndexIterator(
const Comparator* ucmp, SequenceNumber global_seqno, IndexBlockIter* iter, const Comparator* raw_ucmp, SequenceNumber global_seqno,
Statistics* /*stats*/, bool total_order_seek, bool have_first_key, IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek,
bool key_includes_seq, bool value_is_full, bool block_contents_pinned, bool have_first_key, bool key_includes_seq, bool value_is_full,
BlockPrefixIndex* prefix_index) { bool block_contents_pinned, BlockPrefixIndex* prefix_index) {
IndexBlockIter* ret_iter; IndexBlockIter* ret_iter;
if (iter != nullptr) { if (iter != nullptr) {
ret_iter = iter; ret_iter = iter;
@ -1025,7 +1025,7 @@ IndexBlockIter* Block::NewIndexIterator(
} else { } else {
BlockPrefixIndex* prefix_index_ptr = BlockPrefixIndex* prefix_index_ptr =
total_order_seek ? nullptr : prefix_index; total_order_seek ? nullptr : prefix_index;
ret_iter->Initialize(ucmp, data_, restart_offset_, num_restarts_, ret_iter->Initialize(raw_ucmp, data_, restart_offset_, num_restarts_,
global_seqno, prefix_index_ptr, have_first_key, global_seqno, prefix_index_ptr, have_first_key,
key_includes_seq, value_is_full, key_includes_seq, value_is_full,
block_contents_pinned); block_contents_pinned);

@ -168,7 +168,7 @@ class Block {
BlockBasedTableOptions::DataBlockIndexType IndexType() const; BlockBasedTableOptions::DataBlockIndexType IndexType() const;
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator. // comparator.
// //
// If iter is null, return new Iterator // If iter is null, return new Iterator
@ -187,13 +187,13 @@ class Block {
// NOTE: for the hash based lookup, if a key prefix doesn't match any key, // NOTE: for the hash based lookup, if a key prefix doesn't match any key,
// the iterator will simply be set as "invalid", rather than returning // the iterator will simply be set as "invalid", rather than returning
// the key that is just pass the target key. // the key that is just pass the target key.
DataBlockIter* NewDataIterator(const Comparator* ucmp, DataBlockIter* NewDataIterator(const Comparator* raw_ucmp,
SequenceNumber global_seqno, SequenceNumber global_seqno,
DataBlockIter* iter = nullptr, DataBlockIter* iter = nullptr,
Statistics* stats = nullptr, Statistics* stats = nullptr,
bool block_contents_pinned = false); bool block_contents_pinned = false);
// ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key
// comparator. // comparator.
// //
// key_includes_seq, default true, means that the keys are in internal key // key_includes_seq, default true, means that the keys are in internal key
@ -208,7 +208,7 @@ class Block {
// first_internal_key. It affects data serialization format, so the same value // first_internal_key. It affects data serialization format, so the same value
// have_first_key must be used when writing and reading index. // have_first_key must be used when writing and reading index.
// It is determined by IndexType property of the table. // It is determined by IndexType property of the table.
IndexBlockIter* NewIndexIterator(const Comparator* ucmp, IndexBlockIter* NewIndexIterator(const Comparator* raw_ucmp,
SequenceNumber global_seqno, SequenceNumber global_seqno,
IndexBlockIter* iter, Statistics* stats, IndexBlockIter* iter, Statistics* stats,
bool total_order_seek, bool have_first_key, bool total_order_seek, bool have_first_key,
@ -251,14 +251,13 @@ class Block {
template <class TValue> template <class TValue>
class BlockIter : public InternalIteratorBase<TValue> { class BlockIter : public InternalIteratorBase<TValue> {
public: public:
void InitializeBase(const Comparator* ucmp, const char* data, void InitializeBase(const Comparator* raw_ucmp, const char* data,
uint32_t restarts, uint32_t num_restarts, uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno, bool block_contents_pinned) { SequenceNumber global_seqno, bool block_contents_pinned) {
assert(data_ == nullptr); // Ensure it is called only once assert(data_ == nullptr); // Ensure it is called only once
assert(num_restarts > 0); // Ensure the param is valid assert(num_restarts > 0); // Ensure the param is valid
ucmp_wrapper_ = UserComparatorWrapper(ucmp); raw_ucmp_ = raw_ucmp;
icmp_ = InternalKeyComparator(ucmp, false /* named */);
data_ = data; data_ = data;
restarts_ = restarts; restarts_ = restarts;
num_restarts_ = num_restarts; num_restarts_ = num_restarts;
@ -357,9 +356,6 @@ class BlockIter : public InternalIteratorBase<TValue> {
Cache::Handle* cache_handle() { return cache_handle_; } Cache::Handle* cache_handle() { return cache_handle_; }
protected: protected:
UserComparatorWrapper ucmp_wrapper_;
InternalKeyComparator icmp_;
const char* data_; // underlying block contents const char* data_; // underlying block contents
uint32_t num_restarts_; // Number of uint32_t entries in restart array uint32_t num_restarts_; // Number of uint32_t entries in restart array
@ -390,6 +386,12 @@ class BlockIter : public InternalIteratorBase<TValue> {
virtual void NextImpl() = 0; virtual void NextImpl() = 0;
virtual void PrevImpl() = 0; virtual void PrevImpl() = 0;
InternalKeyComparator icmp() {
return InternalKeyComparator(raw_ucmp_, false /* named */);
}
UserComparatorWrapper ucmp() { return UserComparatorWrapper(raw_ucmp_); }
// Must be called every time a key is found that needs to be returned to user, // Must be called every time a key is found that needs to be returned to user,
// and may be called when no key is found (as a no-op). Updates `key_`, // and may be called when no key is found (as a no-op). Updates `key_`,
// `key_buf_`, and `key_pinned_` with info about the found key. // `key_buf_`, and `key_pinned_` with info about the found key.
@ -419,15 +421,16 @@ class BlockIter : public InternalIteratorBase<TValue> {
int CompareCurrentKey(const Slice& other) { int CompareCurrentKey(const Slice& other) {
if (raw_key_.IsUserKey()) { if (raw_key_.IsUserKey()) {
assert(global_seqno_ == kDisableGlobalSequenceNumber); assert(global_seqno_ == kDisableGlobalSequenceNumber);
return ucmp_wrapper_.Compare(raw_key_.GetUserKey(), other); return ucmp().Compare(raw_key_.GetUserKey(), other);
} else if (global_seqno_ == kDisableGlobalSequenceNumber) { } else if (global_seqno_ == kDisableGlobalSequenceNumber) {
return icmp_.Compare(raw_key_.GetInternalKey(), other); return icmp().Compare(raw_key_.GetInternalKey(), other);
} }
return icmp_.Compare(raw_key_.GetInternalKey(), global_seqno_, other, return icmp().Compare(raw_key_.GetInternalKey(), global_seqno_, other,
kDisableGlobalSequenceNumber); kDisableGlobalSequenceNumber);
} }
private: private:
const Comparator* raw_ucmp_;
// Store the cache handle, if the block is cached. We need this since the // Store the cache handle, if the block is cached. We need this since the
// only other place the handle is stored is as an argument to the Cleanable // only other place the handle is stored is as an argument to the Cleanable
// function callback, which is hard to retrieve. When multiple value // function callback, which is hard to retrieve. When multiple value
@ -472,20 +475,21 @@ class DataBlockIter final : public BlockIter<Slice> {
public: public:
DataBlockIter() DataBlockIter()
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {} : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
DataBlockIter(const Comparator* ucmp, const char* data, uint32_t restarts, DataBlockIter(const Comparator* raw_ucmp, const char* data, uint32_t restarts,
uint32_t num_restarts, SequenceNumber global_seqno, uint32_t num_restarts, SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index) DataBlockHashIndex* data_block_hash_index)
: DataBlockIter() { : DataBlockIter() {
Initialize(ucmp, data, restarts, num_restarts, global_seqno, Initialize(raw_ucmp, data, restarts, num_restarts, global_seqno,
read_amp_bitmap, block_contents_pinned, data_block_hash_index); read_amp_bitmap, block_contents_pinned, data_block_hash_index);
} }
void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts, void Initialize(const Comparator* raw_ucmp, const char* data,
uint32_t num_restarts, SequenceNumber global_seqno, uint32_t restarts, uint32_t num_restarts,
SequenceNumber global_seqno,
BlockReadAmpBitmap* read_amp_bitmap, BlockReadAmpBitmap* read_amp_bitmap,
bool block_contents_pinned, bool block_contents_pinned,
DataBlockHashIndex* data_block_hash_index) { DataBlockHashIndex* data_block_hash_index) {
InitializeBase(ucmp, data, restarts, num_restarts, global_seqno, InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno,
block_contents_pinned); block_contents_pinned);
raw_key_.SetIsUserKey(false); raw_key_.SetIsUserKey(false);
read_amp_bitmap_ = read_amp_bitmap; read_amp_bitmap_ = read_amp_bitmap;
@ -594,12 +598,12 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
// format. // format.
// value_is_full, default true, means that no delta encoding is // value_is_full, default true, means that no delta encoding is
// applied to values. // applied to values.
void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts, void Initialize(const Comparator* raw_ucmp, const char* data,
uint32_t num_restarts, SequenceNumber global_seqno, uint32_t restarts, uint32_t num_restarts,
BlockPrefixIndex* prefix_index, bool have_first_key, SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
bool key_includes_seq, bool value_is_full, bool have_first_key, bool key_includes_seq,
bool block_contents_pinned) { bool value_is_full, bool block_contents_pinned) {
InitializeBase(ucmp, data, restarts, num_restarts, InitializeBase(raw_ucmp, data, restarts, num_restarts,
kDisableGlobalSequenceNumber, block_contents_pinned); kDisableGlobalSequenceNumber, block_contents_pinned);
raw_key_.SetIsUserKey(!key_includes_seq); raw_key_.SetIsUserKey(!key_includes_seq);
prefix_index_ = prefix_index; prefix_index_ = prefix_index;

Loading…
Cancel
Save