From b32d087dbb3b9d9f2c9597caa650d0ca9d2e2d7f Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Tue, 13 Nov 2018 13:46:15 -0800 Subject: [PATCH] Move MemoryAllocator option from Cache to BlockBasedTableOptions (#4676) Summary: Per offline discussion with siying, `MemoryAllocator` and `Cache` should be decouple. The idea is that memory allocator handles memory allocation, while cache handle cache policy. It is normal that external cache libraries pack couple the two components for better optimization. If we want to integrate with such library in the future, we can make a wrapper of the library implementing both `Cache` and `MemoryAllocator` interface. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4676 Differential Revision: D13047662 Pulled By: yiwu-arbug fbshipit-source-id: cd42e246d80ab600b4de47d073f7d2db308ce6dd --- HISTORY.md | 4 +++- cache/lru_cache.cc | 19 +++++++------------ cache/lru_cache.h | 3 +-- cache/sharded_cache.cc | 7 ++----- cache/sharded_cache.h | 3 +-- db/version_set.cc | 3 ++- include/rocksdb/cache.h | 27 +++++++-------------------- include/rocksdb/table.h | 5 +++++ options/options_settable_test.cc | 2 ++ table/block_based_table_builder.cc | 2 +- table/block_based_table_factory.cc | 8 ++++++++ table/block_based_table_factory.h | 1 + table/block_based_table_reader.cc | 4 +--- table/table_test.cc | 2 +- 14 files changed, 42 insertions(+), 48 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 850d83a35..0f3f9e5f1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,9 +3,11 @@ ### Public API Change * `NO_ITERATORS` is divided into two counters `NO_ITERATOR_CREATED` and `NO_ITERATOR_DELETE`. Both of them are only increasing now, just as other counters. +### New Features +* Introduced `Memoryllocator`, which lets the user specify custom memory allocator for block based table. + ## 5.18.0 (11/12/2018) ### New Features -* Introduced `Memoryllocator`, which lets the user specify custom allocator for memory in block cache. * Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. Added per-level perf context for bloom filter and `Get` query. * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index e11ac6ddb..d4cbb9a45 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -461,10 +461,8 @@ std::string LRUCacheShard::GetPrintableOptions() const { } LRUCache::LRUCache(size_t capacity, int num_shard_bits, - bool strict_capacity_limit, double high_pri_pool_ratio, - std::shared_ptr allocator) - : ShardedCache(capacity, num_shard_bits, strict_capacity_limit, - std::move(allocator)) { + bool strict_capacity_limit, double high_pri_pool_ratio) + : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { num_shards_ = 1 << num_shard_bits; shards_ = reinterpret_cast( port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_)); @@ -539,14 +537,12 @@ double LRUCache::GetHighPriPoolRatio() { std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits, cache_opts.strict_capacity_limit, - cache_opts.high_pri_pool_ratio, - cache_opts.memory_allocator); + cache_opts.high_pri_pool_ratio); } -std::shared_ptr NewLRUCache( - size_t capacity, int num_shard_bits, bool strict_capacity_limit, - double high_pri_pool_ratio, - std::shared_ptr memory_allocator) { +std::shared_ptr NewLRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit, + double high_pri_pool_ratio) { if (num_shard_bits >= 20) { return nullptr; // the cache cannot be sharded into too many fine pieces } @@ -558,8 +554,7 @@ std::shared_ptr NewLRUCache( num_shard_bits = GetDefaultCacheShardBits(capacity); } return std::make_shared(capacity, num_shard_bits, - strict_capacity_limit, high_pri_pool_ratio, - std::move(memory_allocator)); + strict_capacity_limit, high_pri_pool_ratio); } } // namespace rocksdb diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 0194d26a5..3c067f0c1 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -279,8 +279,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { class LRUCache : public ShardedCache { public: LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, - double high_pri_pool_ratio, - std::shared_ptr memory_allocator = nullptr); + double high_pri_pool_ratio); virtual ~LRUCache(); virtual const char* Name() const override { return "LRUCache"; } virtual CacheShard* GetShard(int shard) override; diff --git a/cache/sharded_cache.cc b/cache/sharded_cache.cc index a48a32185..e0011a848 100644 --- a/cache/sharded_cache.cc +++ b/cache/sharded_cache.cc @@ -20,9 +20,8 @@ namespace rocksdb { ShardedCache::ShardedCache(size_t capacity, int num_shard_bits, - bool strict_capacity_limit, - std::shared_ptr allocator) - : Cache(std::move(allocator)), + bool strict_capacity_limit) + : Cache(), num_shard_bits_(num_shard_bits), capacity_(capacity), strict_capacity_limit_(strict_capacity_limit), @@ -144,8 +143,6 @@ std::string ShardedCache::GetPrintableOptions() const { strict_capacity_limit_); ret.append(buffer); } - snprintf(buffer, kBufferSize, " memory_allocator : %s\n", - memory_allocator() ? memory_allocator()->Name() : "None"); ret.append(buffer); ret.append(GetShard(0)->GetPrintableOptions()); return ret; diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h index 543286b9e..4f9dea2ad 100644 --- a/cache/sharded_cache.h +++ b/cache/sharded_cache.h @@ -47,8 +47,7 @@ class CacheShard { // Keys are sharded by the highest num_shard_bits bits of hash value. class ShardedCache : public Cache { public: - ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, - std::shared_ptr memory_allocator = nullptr); + ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit); virtual ~ShardedCache() = default; virtual const char* Name() const override = 0; virtual CacheShard* GetShard(int shard) = 0; diff --git a/db/version_set.cc b/db/version_set.cc index 5862bd235..1850a2b99 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1259,7 +1259,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, } else if (fp.GetHitFileLevel() >= 2) { RecordTick(db_statistics_, GET_HIT_L2_AND_UP); } - PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, fp.GetHitFileLevel()); + PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, + fp.GetHitFileLevel()); return; case GetContext::kDeleted: // Use empty error message for speed diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 3ceda0d01..da3b934d8 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -25,7 +25,6 @@ #include #include #include -#include "rocksdb/memory_allocator.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" @@ -59,20 +58,13 @@ struct LRUCacheOptions { // BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority. double high_pri_pool_ratio = 0.0; - // If non-nullptr will use this allocator instead of system allocator when - // allocating memory for cache blocks. Call this method before you start using - // the cache! - std::shared_ptr memory_allocator; - LRUCacheOptions() {} LRUCacheOptions(size_t _capacity, int _num_shard_bits, - bool _strict_capacity_limit, double _high_pri_pool_ratio, - std::shared_ptr _memory_allocator = nullptr) + bool _strict_capacity_limit, double _high_pri_pool_ratio) : capacity(_capacity), num_shard_bits(_num_shard_bits), strict_capacity_limit(_strict_capacity_limit), - high_pri_pool_ratio(_high_pri_pool_ratio), - memory_allocator(std::move(_memory_allocator)) {} + high_pri_pool_ratio(_high_pri_pool_ratio) {} }; // Create a new cache with a fixed size capacity. The cache is sharded @@ -83,10 +75,10 @@ struct LRUCacheOptions { // high_pri_pool_pct. // num_shard_bits = -1 means it is automatically determined: every shard // will be at least 512KB and number of shard bits will not exceed 6. -extern std::shared_ptr NewLRUCache( - size_t capacity, int num_shard_bits = -1, - bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.0, - std::shared_ptr memory_allocator = nullptr); +extern std::shared_ptr NewLRUCache(size_t capacity, + int num_shard_bits = -1, + bool strict_capacity_limit = false, + double high_pri_pool_ratio = 0.0); extern std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts); @@ -105,8 +97,7 @@ class Cache { // likely to get evicted than low priority entries. enum class Priority { HIGH, LOW }; - Cache(std::shared_ptr allocator = nullptr) - : memory_allocator_(std::move(allocator)) {} + Cache() {} // Destroys all existing entries by calling the "deleter" // function that was passed via the Insert() function. @@ -237,14 +228,10 @@ class Cache { virtual void TEST_mark_as_data_block(const Slice& /*key*/, size_t /*charge*/) {} - MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); } - private: // No copying allowed Cache(const Cache&); Cache& operator=(const Cache&); - - std::shared_ptr memory_allocator_; }; } // namespace rocksdb diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 7e5bc6a11..7efe8ab2c 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -24,6 +24,7 @@ #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" +#include "rocksdb/memory_allocator.h" #include "rocksdb/options.h" #include "rocksdb/status.h" @@ -255,6 +256,10 @@ struct BlockBasedTableOptions { // Align data blocks on lesser of page size and block size bool block_align = false; + + // If non-nullptr will use this allocator instead of malloc/free to + // allocating memory for blocks. + std::shared_ptr memory_allocator; }; // Table Properties that are specific to block-based table properties. diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index cad1af3d7..4fa47d07b 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -97,6 +97,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, filter_policy), sizeof(std::shared_ptr)}, + {offsetof(struct BlockBasedTableOptions, memory_allocator), + sizeof(std::shared_ptr)}, }; // In this test, we catch a new option of BlockBasedTableOptions that is not diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 834bb2181..fb1f617d1 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -673,7 +673,7 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, size_t size = block_contents.size(); auto ubuf = - AllocateBlock(size + 1, block_cache_compressed->memory_allocator()); + AllocateBlock(size + 1, r->table_options.memory_allocator.get()); memcpy(ubuf.get(), block_contents.data(), size); ubuf[size] = type; diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index fbb7406a3..16532085c 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -383,6 +383,14 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " block_align: %d\n", table_options_.block_align); ret.append(buffer); + snprintf(buffer, kBufferSize, " memory_allocator: %p\n", + table_options_.memory_allocator.get()); + ret.append(buffer); + if (table_options_.memory_allocator) { + snprintf(buffer, kBufferSize, " memory_allocator_name: %s\n", + table_options_.memory_allocator->Name()); + ret.append(buffer); + } return ret; } diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index cde6f6535..8899bf441 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -99,6 +99,7 @@ static std::unordered_map /* currently not supported std::shared_ptr block_cache = nullptr; std::shared_ptr block_cache_compressed = nullptr; + std::shared_ptr memory_allocator = nullptr; */ {"flush_block_policy_factory", {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index e0b967d6c..9870cc908 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -97,9 +97,7 @@ Status ReadBlockFromFile( inline MemoryAllocator* GetMemoryAllocator( const BlockBasedTableOptions& table_options) { - return table_options.block_cache.get() - ? table_options.block_cache->memory_allocator() - : nullptr; + return table_options.memory_allocator.get(); } // Delete the resource that is held by the iterator. diff --git a/table/table_test.cc b/table/table_test.cc index d5d70ef33..9007ca7c9 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2516,8 +2516,8 @@ TEST_P(BlockBasedTableTest, MemoryAllocator) { opt.compression = kNoCompression; BlockBasedTableOptions table_options; table_options.block_size = 1024; + table_options.memory_allocator = custom_memory_allocator; LRUCacheOptions lruOptions; - lruOptions.memory_allocator = custom_memory_allocator; lruOptions.capacity = 16 * 1024 * 1024; lruOptions.num_shard_bits = 4; table_options.block_cache = NewLRUCache(std::move(lruOptions));