From 643c863b72abbc3cae4382b106c4fe7b42d98a95 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 20 Jul 2020 14:05:21 -0700 Subject: [PATCH] minimize BlockIter comparator scope (#7149) Summary: PR https://github.com/facebook/rocksdb/issues/6944 transitioned `BlockIter` from using `Comparator*` to using concrete `UserComparatorWrapper` and `InternalKeyComparator`. However, adding them as instance variables to `BlockIter` was not optimal. Bloating `BlockIter` caused the `ArenaWrappedDBIter`'s arena allocator to do more heap allocations (in certain cases) which harmed performance of `DB::NewIterator()`. This PR pushes down the concrete comparator objects to the point of usage, which forces them to be on the stack. As a result, the `BlockIter` is back to its original size prior to https://github.com/facebook/rocksdb/issues/6944 (actually a bit smaller since there were two `Comparator*` before). Pull Request resolved: https://github.com/facebook/rocksdb/pull/7149 Test Plan: verified our internal `DB::NewIterator()`-heavy regression test no longer reports regression. Reviewed By: riversand963 Differential Revision: D22623189 Pulled By: ajkr fbshipit-source-id: f6d69accfe5de51e0bd9874a480b32b29909bab6 --- table/block_based/block.cc | 16 +++++------ table/block_based/block.h | 54 ++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/table/block_based/block.cc b/table/block_based/block.cc index a8ad264a7..94a71adef 100644 --- a/table/block_based/block.cc +++ b/table/block_based/block.cc @@ -351,7 +351,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { return true; } - if (ucmp_wrapper_.Compare(raw_key_.GetUserKey(), target_user_key) != 0) { + if (ucmp().Compare(raw_key_.GetUserKey(), target_user_key) != 0) { // the key is not in this block and cannot be at the next block either. return false; } @@ -969,7 +969,7 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit, } } -DataBlockIter* Block::NewDataIterator(const Comparator* ucmp, +DataBlockIter* Block::NewDataIterator(const Comparator* raw_ucmp, SequenceNumber global_seqno, DataBlockIter* iter, Statistics* stats, bool block_contents_pinned) { @@ -989,7 +989,7 @@ DataBlockIter* Block::NewDataIterator(const Comparator* ucmp, return ret_iter; } else { ret_iter->Initialize( - ucmp, data_, restart_offset_, num_restarts_, global_seqno, + raw_ucmp, data_, restart_offset_, num_restarts_, global_seqno, read_amp_bitmap_.get(), block_contents_pinned, data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr); if (read_amp_bitmap_) { @@ -1004,10 +1004,10 @@ DataBlockIter* Block::NewDataIterator(const Comparator* ucmp, } IndexBlockIter* Block::NewIndexIterator( - const Comparator* ucmp, SequenceNumber global_seqno, IndexBlockIter* iter, - Statistics* /*stats*/, bool total_order_seek, bool have_first_key, - bool key_includes_seq, bool value_is_full, bool block_contents_pinned, - BlockPrefixIndex* prefix_index) { + const Comparator* raw_ucmp, SequenceNumber global_seqno, + IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek, + bool have_first_key, bool key_includes_seq, bool value_is_full, + bool block_contents_pinned, BlockPrefixIndex* prefix_index) { IndexBlockIter* ret_iter; if (iter != nullptr) { ret_iter = iter; @@ -1025,7 +1025,7 @@ IndexBlockIter* Block::NewIndexIterator( } else { BlockPrefixIndex* prefix_index_ptr = total_order_seek ? nullptr : prefix_index; - ret_iter->Initialize(ucmp, data_, restart_offset_, num_restarts_, + ret_iter->Initialize(raw_ucmp, data_, restart_offset_, num_restarts_, global_seqno, prefix_index_ptr, have_first_key, key_includes_seq, value_is_full, block_contents_pinned); diff --git a/table/block_based/block.h b/table/block_based/block.h index ae23ab182..651bdc739 100644 --- a/table/block_based/block.h +++ b/table/block_based/block.h @@ -168,7 +168,7 @@ class Block { BlockBasedTableOptions::DataBlockIndexType IndexType() const; - // ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key + // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key // comparator. // // If iter is null, return new Iterator @@ -187,13 +187,13 @@ class Block { // NOTE: for the hash based lookup, if a key prefix doesn't match any key, // the iterator will simply be set as "invalid", rather than returning // the key that is just pass the target key. - DataBlockIter* NewDataIterator(const Comparator* ucmp, + DataBlockIter* NewDataIterator(const Comparator* raw_ucmp, SequenceNumber global_seqno, DataBlockIter* iter = nullptr, Statistics* stats = nullptr, bool block_contents_pinned = false); - // ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key + // raw_ucmp is a raw (i.e., not wrapped by `UserComparatorWrapper`) user key // comparator. // // key_includes_seq, default true, means that the keys are in internal key @@ -208,7 +208,7 @@ class Block { // first_internal_key. It affects data serialization format, so the same value // have_first_key must be used when writing and reading index. // It is determined by IndexType property of the table. - IndexBlockIter* NewIndexIterator(const Comparator* ucmp, + IndexBlockIter* NewIndexIterator(const Comparator* raw_ucmp, SequenceNumber global_seqno, IndexBlockIter* iter, Statistics* stats, bool total_order_seek, bool have_first_key, @@ -251,14 +251,13 @@ class Block { template class BlockIter : public InternalIteratorBase { public: - void InitializeBase(const Comparator* ucmp, const char* data, + void InitializeBase(const Comparator* raw_ucmp, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, bool block_contents_pinned) { assert(data_ == nullptr); // Ensure it is called only once assert(num_restarts > 0); // Ensure the param is valid - ucmp_wrapper_ = UserComparatorWrapper(ucmp); - icmp_ = InternalKeyComparator(ucmp, false /* named */); + raw_ucmp_ = raw_ucmp; data_ = data; restarts_ = restarts; num_restarts_ = num_restarts; @@ -357,9 +356,6 @@ class BlockIter : public InternalIteratorBase { Cache::Handle* cache_handle() { return cache_handle_; } protected: - UserComparatorWrapper ucmp_wrapper_; - InternalKeyComparator icmp_; - const char* data_; // underlying block contents uint32_t num_restarts_; // Number of uint32_t entries in restart array @@ -390,6 +386,12 @@ class BlockIter : public InternalIteratorBase { virtual void NextImpl() = 0; virtual void PrevImpl() = 0; + InternalKeyComparator icmp() { + return InternalKeyComparator(raw_ucmp_, false /* named */); + } + + UserComparatorWrapper ucmp() { return UserComparatorWrapper(raw_ucmp_); } + // Must be called every time a key is found that needs to be returned to user, // and may be called when no key is found (as a no-op). Updates `key_`, // `key_buf_`, and `key_pinned_` with info about the found key. @@ -419,15 +421,16 @@ class BlockIter : public InternalIteratorBase { int CompareCurrentKey(const Slice& other) { if (raw_key_.IsUserKey()) { assert(global_seqno_ == kDisableGlobalSequenceNumber); - return ucmp_wrapper_.Compare(raw_key_.GetUserKey(), other); + return ucmp().Compare(raw_key_.GetUserKey(), other); } else if (global_seqno_ == kDisableGlobalSequenceNumber) { - return icmp_.Compare(raw_key_.GetInternalKey(), other); + return icmp().Compare(raw_key_.GetInternalKey(), other); } - return icmp_.Compare(raw_key_.GetInternalKey(), global_seqno_, other, - kDisableGlobalSequenceNumber); + return icmp().Compare(raw_key_.GetInternalKey(), global_seqno_, other, + kDisableGlobalSequenceNumber); } private: + const Comparator* raw_ucmp_; // Store the cache handle, if the block is cached. We need this since the // only other place the handle is stored is as an argument to the Cleanable // function callback, which is hard to retrieve. When multiple value @@ -472,20 +475,21 @@ class DataBlockIter final : public BlockIter { public: DataBlockIter() : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {} - DataBlockIter(const Comparator* ucmp, const char* data, uint32_t restarts, + DataBlockIter(const Comparator* raw_ucmp, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, DataBlockHashIndex* data_block_hash_index) : DataBlockIter() { - Initialize(ucmp, data, restarts, num_restarts, global_seqno, + Initialize(raw_ucmp, data, restarts, num_restarts, global_seqno, read_amp_bitmap, block_contents_pinned, data_block_hash_index); } - void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts, - uint32_t num_restarts, SequenceNumber global_seqno, + void Initialize(const Comparator* raw_ucmp, const char* data, + uint32_t restarts, uint32_t num_restarts, + SequenceNumber global_seqno, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, DataBlockHashIndex* data_block_hash_index) { - InitializeBase(ucmp, data, restarts, num_restarts, global_seqno, + InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno, block_contents_pinned); raw_key_.SetIsUserKey(false); read_amp_bitmap_ = read_amp_bitmap; @@ -594,12 +598,12 @@ class IndexBlockIter final : public BlockIter { // format. // value_is_full, default true, means that no delta encoding is // applied to values. - void Initialize(const Comparator* ucmp, const char* data, uint32_t restarts, - uint32_t num_restarts, SequenceNumber global_seqno, - BlockPrefixIndex* prefix_index, bool have_first_key, - bool key_includes_seq, bool value_is_full, - bool block_contents_pinned) { - InitializeBase(ucmp, data, restarts, num_restarts, + void Initialize(const Comparator* raw_ucmp, const char* data, + uint32_t restarts, uint32_t num_restarts, + SequenceNumber global_seqno, BlockPrefixIndex* prefix_index, + bool have_first_key, bool key_includes_seq, + bool value_is_full, bool block_contents_pinned) { + InitializeBase(raw_ucmp, data, restarts, num_restarts, kDisableGlobalSequenceNumber, block_contents_pinned); raw_key_.SetIsUserKey(!key_includes_seq); prefix_index_ = prefix_index;