Avoid dynamic memory allocation on read path (#10453)

Summary:
lambda function dynamicly allocates memory from heap if it needs to
capture multiple values, which could be expensive.
Switch to explictly use local functor from stack.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10453

Test Plan:
CI
db_bench shows ~2-3% read improvement:
```
# before the change
TEST_TMPDIR=/tmp/dbbench4 ./db_bench_main --benchmarks=filluniquerandom,readrandom -compression_type=none -max_background_jobs=12 -num=10000000
readrandom   :       8.528 micros/op 117265 ops/sec 85.277 seconds 10000000 operations;   13.0 MB/s (10000000 of 10000000 found)
# after the change
TEST_TMPDIR=/tmp/dbbench5 ./db_bench_new --benchmarks=filluniquerandom,readrandom -compression_type=none -max_background_jobs=12 -num=10000000
readrandom   :       8.263 micros/op 121015 ops/sec 82.634 seconds 10000000 operations;   13.4 MB/s (10000000 of 10000000 found)
```
details: https://gist.github.com/jay-zhuang/5ac0628db8fc9cbcb499e056d4cb5918

Micro-benchmark shows a similar improvement ~1-2%:
before the change:
https://gist.github.com/jay-zhuang/9dc0ebf51bbfbf4af82f6193d43cf75b
after the change:
https://gist.github.com/jay-zhuang/fc061f1813cd8f441109ad0b0fe7c185

Reviewed By: ajkr

Differential Revision: D38345056

Pulled By: jay-zhuang

fbshipit-source-id: f3597aeeee338a804d37bf2e81386d5a100665e0
main
Jay Zhuang 2 years ago committed by Facebook GitHub Bot
parent 0cc9e98bbb
commit 0d885e80d4
  1. 1
      HISTORY.md
  2. 13
      table/block_based/block_based_table_reader.cc
  3. 36
      table/block_based/block_like_traits.h

@ -30,6 +30,7 @@
### Performance Improvements ### Performance Improvements
* Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. * Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables.
* Improve read performance by avoiding dynamic memory allocation.
## 7.5.0 (07/15/2022) ## 7.5.0 (07/15/2022)
### New Features ### New Features

@ -1253,8 +1253,12 @@ Status BlockBasedTable::GetDataBlockFromCache(
Statistics* statistics = rep_->ioptions.statistics.get(); Statistics* statistics = rep_->ioptions.statistics.get();
bool using_zstd = rep_->blocks_definitely_zstd_compressed; bool using_zstd = rep_->blocks_definitely_zstd_compressed;
const FilterPolicy* filter_policy = rep_->filter_policy; const FilterPolicy* filter_policy = rep_->filter_policy;
Cache::CreateCallback create_cb = GetCreateCallback<TBlocklike>( CacheCreateCallback<TBlocklike> callback(read_amp_bytes_per_bit, statistics,
read_amp_bytes_per_bit, statistics, using_zstd, filter_policy); using_zstd, filter_policy);
// avoid dynamic memory allocation by using the reference (std::ref) of the
// callback. Otherwise, binding a functor to std::function will allocate extra
// memory from heap.
Cache::CreateCallback create_cb(std::ref(callback));
// Lookup uncompressed cache first // Lookup uncompressed cache first
if (block_cache != nullptr) { if (block_cache != nullptr) {
@ -1284,8 +1288,11 @@ Status BlockBasedTable::GetDataBlockFromCache(
BlockContents contents; BlockContents contents;
if (rep_->ioptions.lowest_used_cache_tier == if (rep_->ioptions.lowest_used_cache_tier ==
CacheTier::kNonVolatileBlockTier) { CacheTier::kNonVolatileBlockTier) {
Cache::CreateCallback create_cb_special = GetCreateCallback<BlockContents>( CacheCreateCallback<BlockContents> special_callback(
read_amp_bytes_per_bit, statistics, using_zstd, filter_policy); read_amp_bytes_per_bit, statistics, using_zstd, filter_policy);
// avoid dynamic memory allocation by using the reference (std::ref) of the
// callback. Make sure the callback is only used within this code block.
Cache::CreateCallback create_cb_special(std::ref(special_callback));
block_cache_compressed_handle = block_cache_compressed->Lookup( block_cache_compressed_handle = block_cache_compressed->Lookup(
cache_key, cache_key,
BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type), BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),

@ -21,24 +21,42 @@ template <typename T, CacheEntryRole R>
Cache::CacheItemHelper* GetCacheItemHelperForRole(); Cache::CacheItemHelper* GetCacheItemHelperForRole();
template <typename TBlocklike> template <typename TBlocklike>
Cache::CreateCallback GetCreateCallback(size_t read_amp_bytes_per_bit, class CacheCreateCallback {
public:
CacheCreateCallback() = delete;
CacheCreateCallback(const CacheCreateCallback&) = delete;
CacheCreateCallback(CacheCreateCallback&&) = delete;
CacheCreateCallback& operator=(const CacheCreateCallback&) = delete;
CacheCreateCallback& operator=(CacheCreateCallback&&) = delete;
explicit CacheCreateCallback(size_t read_amp_bytes_per_bit,
Statistics* statistics, bool using_zstd, Statistics* statistics, bool using_zstd,
const FilterPolicy* filter_policy) { const FilterPolicy* filter_policy)
return [read_amp_bytes_per_bit, statistics, using_zstd, filter_policy]( : read_amp_bytes_per_bit_(read_amp_bytes_per_bit),
const void* buf, size_t size, void** out_obj, statistics_(statistics),
size_t* charge) -> Status { using_zstd_(using_zstd),
filter_policy_(filter_policy) {}
Status operator()(const void* buf, size_t size, void** out_obj,
size_t* charge) {
assert(buf != nullptr); assert(buf != nullptr);
std::unique_ptr<char[]> buf_data(new char[size]()); std::unique_ptr<char[]> buf_data(new char[size]());
memcpy(buf_data.get(), buf, size); memcpy(buf_data.get(), buf, size);
BlockContents bc = BlockContents(std::move(buf_data), size); BlockContents bc = BlockContents(std::move(buf_data), size);
TBlocklike* ucd_ptr = BlocklikeTraits<TBlocklike>::Create( TBlocklike* ucd_ptr = BlocklikeTraits<TBlocklike>::Create(
std::move(bc), read_amp_bytes_per_bit, statistics, using_zstd, std::move(bc), read_amp_bytes_per_bit_, statistics_, using_zstd_,
filter_policy); filter_policy_);
*out_obj = reinterpret_cast<void*>(ucd_ptr); *out_obj = reinterpret_cast<void*>(ucd_ptr);
*charge = size; *charge = size;
return Status::OK(); return Status::OK();
}; }
}
private:
const size_t read_amp_bytes_per_bit_;
Statistics* statistics_;
const bool using_zstd_;
const FilterPolicy* filter_policy_;
};
template <> template <>
class BlocklikeTraits<BlockContents> { class BlocklikeTraits<BlockContents> {

Loading…
Cancel
Save