From 0d885e80d41f2ace03e87bd00dcc981868209509 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 8 Aug 2022 12:59:31 -0700 Subject: [PATCH] Avoid dynamic memory allocation on read path (#10453) Summary: lambda function dynamicly allocates memory from heap if it needs to capture multiple values, which could be expensive. Switch to explictly use local functor from stack. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10453 Test Plan: CI db_bench shows ~2-3% read improvement: ``` # before the change TEST_TMPDIR=/tmp/dbbench4 ./db_bench_main --benchmarks=filluniquerandom,readrandom -compression_type=none -max_background_jobs=12 -num=10000000 readrandom : 8.528 micros/op 117265 ops/sec 85.277 seconds 10000000 operations; 13.0 MB/s (10000000 of 10000000 found) # after the change TEST_TMPDIR=/tmp/dbbench5 ./db_bench_new --benchmarks=filluniquerandom,readrandom -compression_type=none -max_background_jobs=12 -num=10000000 readrandom : 8.263 micros/op 121015 ops/sec 82.634 seconds 10000000 operations; 13.4 MB/s (10000000 of 10000000 found) ``` details: https://gist.github.com/jay-zhuang/5ac0628db8fc9cbcb499e056d4cb5918 Micro-benchmark shows a similar improvement ~1-2%: before the change: https://gist.github.com/jay-zhuang/9dc0ebf51bbfbf4af82f6193d43cf75b after the change: https://gist.github.com/jay-zhuang/fc061f1813cd8f441109ad0b0fe7c185 Reviewed By: ajkr Differential Revision: D38345056 Pulled By: jay-zhuang fbshipit-source-id: f3597aeeee338a804d37bf2e81386d5a100665e0 --- HISTORY.md | 1 + table/block_based/block_based_table_reader.cc | 13 +++++-- table/block_based/block_like_traits.h | 38 ++++++++++++++----- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 2f6adf235..ea937ea0c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -30,6 +30,7 @@ ### Performance Improvements * Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. +* Improve read performance by avoiding dynamic memory allocation. ## 7.5.0 (07/15/2022) ### New Features diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 4d53997ba..6983e1f7d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1253,8 +1253,12 @@ Status BlockBasedTable::GetDataBlockFromCache( Statistics* statistics = rep_->ioptions.statistics.get(); bool using_zstd = rep_->blocks_definitely_zstd_compressed; const FilterPolicy* filter_policy = rep_->filter_policy; - Cache::CreateCallback create_cb = GetCreateCallback( - read_amp_bytes_per_bit, statistics, using_zstd, filter_policy); + CacheCreateCallback callback(read_amp_bytes_per_bit, statistics, + using_zstd, filter_policy); + // avoid dynamic memory allocation by using the reference (std::ref) of the + // callback. Otherwise, binding a functor to std::function will allocate extra + // memory from heap. + Cache::CreateCallback create_cb(std::ref(callback)); // Lookup uncompressed cache first if (block_cache != nullptr) { @@ -1284,8 +1288,11 @@ Status BlockBasedTable::GetDataBlockFromCache( BlockContents contents; if (rep_->ioptions.lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) { - Cache::CreateCallback create_cb_special = GetCreateCallback( + CacheCreateCallback special_callback( read_amp_bytes_per_bit, statistics, using_zstd, filter_policy); + // avoid dynamic memory allocation by using the reference (std::ref) of the + // callback. Make sure the callback is only used within this code block. + Cache::CreateCallback create_cb_special(std::ref(special_callback)); block_cache_compressed_handle = block_cache_compressed->Lookup( cache_key, BlocklikeTraits::GetCacheItemHelper(block_type), diff --git a/table/block_based/block_like_traits.h b/table/block_based/block_like_traits.h index aeb255114..86df17db2 100644 --- a/table/block_based/block_like_traits.h +++ b/table/block_based/block_like_traits.h @@ -21,24 +21,42 @@ template Cache::CacheItemHelper* GetCacheItemHelperForRole(); template -Cache::CreateCallback GetCreateCallback(size_t read_amp_bytes_per_bit, - Statistics* statistics, bool using_zstd, - const FilterPolicy* filter_policy) { - return [read_amp_bytes_per_bit, statistics, using_zstd, filter_policy]( - const void* buf, size_t size, void** out_obj, - size_t* charge) -> Status { +class CacheCreateCallback { + public: + CacheCreateCallback() = delete; + CacheCreateCallback(const CacheCreateCallback&) = delete; + CacheCreateCallback(CacheCreateCallback&&) = delete; + CacheCreateCallback& operator=(const CacheCreateCallback&) = delete; + CacheCreateCallback& operator=(CacheCreateCallback&&) = delete; + + explicit CacheCreateCallback(size_t read_amp_bytes_per_bit, + Statistics* statistics, bool using_zstd, + const FilterPolicy* filter_policy) + : read_amp_bytes_per_bit_(read_amp_bytes_per_bit), + statistics_(statistics), + using_zstd_(using_zstd), + filter_policy_(filter_policy) {} + + Status operator()(const void* buf, size_t size, void** out_obj, + size_t* charge) { assert(buf != nullptr); std::unique_ptr buf_data(new char[size]()); memcpy(buf_data.get(), buf, size); BlockContents bc = BlockContents(std::move(buf_data), size); TBlocklike* ucd_ptr = BlocklikeTraits::Create( - std::move(bc), read_amp_bytes_per_bit, statistics, using_zstd, - filter_policy); + std::move(bc), read_amp_bytes_per_bit_, statistics_, using_zstd_, + filter_policy_); *out_obj = reinterpret_cast(ucd_ptr); *charge = size; return Status::OK(); - }; -} + } + + private: + const size_t read_amp_bytes_per_bit_; + Statistics* statistics_; + const bool using_zstd_; + const FilterPolicy* filter_policy_; +}; template <> class BlocklikeTraits {