From 4a16c32ece0ab9872af3d4f6d0ec97d26a8e348f Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Tue, 23 Aug 2016 13:44:13 -0700 Subject: [PATCH] Option to cache index/filter blocks with priority Summary: Add option to block based table to insert index/filter blocks to block cache with priority. Combined with LRUCache with high_pri_pool_ratio, we can reserved space for index/filter blocks, make them less likely to be evicted. Depends on D61977. Test Plan: See unit test. Reviewers: lightmark, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, march, leveldb Differential Revision: https://reviews.facebook.net/D62241 --- db/db_block_cache_test.cc | 86 +++++++++++++++++++++++++++++++ include/rocksdb/table.h | 7 +++ table/block_based_table_reader.cc | 15 ++++-- util/lru_cache.cc | 61 ++++++++++------------ util/lru_cache.h | 17 ++++++ util/options_helper.h | 4 ++ util/options_settable_test.cc | 1 + 7 files changed, 153 insertions(+), 38 deletions(-) diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 96d5be980..f54764982 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -9,6 +9,7 @@ #include #include "db/db_test_util.h" #include "port/stack_trace.h" +#include "util/lru_cache.h" namespace rocksdb { @@ -323,6 +324,91 @@ TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) { filter_bytes_insert); } +namespace { + +// A mock cache wraps LRUCache, and record how many entries have been +// inserted for each priority. +class MockCache : public LRUCache { + public: + static uint32_t high_pri_insert_count; + static uint32_t low_pri_insert_count; + + MockCache() : LRUCache(1 << 25, 0, false, 0.0) {} + + virtual Status Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Handle** handle, Priority priority) override { + if (priority == Priority::LOW) { + low_pri_insert_count++; + } else { + high_pri_insert_count++; + } + return LRUCache::Insert(key, value, charge, deleter, handle, priority); + } +}; + +uint32_t MockCache::high_pri_insert_count = 0; +uint32_t MockCache::low_pri_insert_count = 0; + +} // anonymous namespace + +TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) { + for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.block_cache.reset(new MockCache()); + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + table_options.cache_index_and_filter_blocks_with_high_priority = + priority == Cache::Priority::HIGH ? true : false; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + MockCache::high_pri_insert_count = 0; + MockCache::low_pri_insert_count = 0; + + // Create a new table. + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Put("bar", "value")); + ASSERT_OK(Flush()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + + // index/filter blocks added to block cache right after table creation. + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(2, /* only index/filter were added */ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); + if (priority == Cache::Priority::LOW) { + ASSERT_EQ(0, MockCache::high_pri_insert_count); + ASSERT_EQ(2, MockCache::low_pri_insert_count); + } else { + ASSERT_EQ(2, MockCache::high_pri_insert_count); + ASSERT_EQ(0, MockCache::low_pri_insert_count); + } + + // Access data block. + ASSERT_EQ("value", Get("foo")); + + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(3, /*adding data block*/ + TestGetTickerCount(options, BLOCK_CACHE_ADD)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); + + // Data block should be inserted with low priority. + if (priority == Cache::Priority::LOW) { + ASSERT_EQ(0, MockCache::high_pri_insert_count); + ASSERT_EQ(3, MockCache::low_pri_insert_count); + } else { + ASSERT_EQ(2, MockCache::high_pri_insert_count); + ASSERT_EQ(1, MockCache::low_pri_insert_count); + } + } +} + TEST_F(DBBlockCacheTest, ParanoidFileChecks) { Options options = CurrentOptions(); options.create_if_missing = true; diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 333275316..215385756 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -20,6 +20,7 @@ #include #include +#include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/immutable_options.h" #include "rocksdb/iterator.h" @@ -65,6 +66,12 @@ struct BlockBasedTableOptions { // block during table initialization. bool cache_index_and_filter_blocks = false; + // If cache_index_and_filter_blocks is enabled, cache index and filter + // blocks with high priority. If set to true, depending on implementation of + // block cache, index and filter blocks may be less likely to be eviected + // than data blocks. + bool cache_index_and_filter_blocks_with_high_priority = false; + // if cache_index_and_filter_blocks is true and the below is true, then // filter and index blocks are stored in the cache, but a reference is // held in the "table reader" object so the blocks are pinned and only diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index b29da97d9..4f1e49b5c 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1029,8 +1029,11 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( filter = ReadFilter(rep_); if (filter != nullptr) { assert(filter->size() > 0); - Status s = block_cache->Insert(key, filter, filter->size(), - &DeleteCachedFilterEntry, &cache_handle); + Status s = block_cache->Insert( + key, filter, filter->size(), &DeleteCachedFilterEntry, &cache_handle, + rep_->table_options.cache_index_and_filter_blocks_with_high_priority + ? Cache::Priority::HIGH + : Cache::Priority::LOW); if (s.ok()) { RecordTick(statistics, BLOCK_CACHE_ADD); RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size()); @@ -1092,8 +1095,12 @@ InternalIterator* BlockBasedTable::NewIndexIterator( s = CreateIndexReader(&index_reader); if (s.ok()) { assert(index_reader != nullptr); - s = block_cache->Insert(key, index_reader, index_reader->usable_size(), - &DeleteCachedIndexEntry, &cache_handle); + s = block_cache->Insert( + key, index_reader, index_reader->usable_size(), + &DeleteCachedIndexEntry, &cache_handle, + rep_->table_options.cache_index_and_filter_blocks_with_high_priority + ? Cache::Priority::HIGH + : Cache::Priority::LOW); } if (s.ok()) { diff --git a/util/lru_cache.cc b/util/lru_cache.cc index d9e37cbe3..dd8fd5a9d 100644 --- a/util/lru_cache.cc +++ b/util/lru_cache.cc @@ -405,48 +405,41 @@ size_t LRUCacheShard::GetPinnedUsage() const { return usage_ - lru_usage_; } -class LRUCache : public ShardedCache { - public: - LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, - double high_pri_pool_ratio) - : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { - int num_shards = 1 << num_shard_bits; - shards_ = new LRUCacheShard[num_shards]; - SetCapacity(capacity); - SetStrictCapacityLimit(strict_capacity_limit); - for (int i = 0; i < num_shards; i++) { - shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio); - } +LRUCache::LRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit, double high_pri_pool_ratio) + : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { + int num_shards = 1 << num_shard_bits; + shards_ = new LRUCacheShard[num_shards]; + SetCapacity(capacity); + SetStrictCapacityLimit(strict_capacity_limit); + for (int i = 0; i < num_shards; i++) { + shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio); } +} - virtual ~LRUCache() { delete[] shards_; } - - virtual const char* Name() const override { return "LRUCache"; } - virtual CacheShard* GetShard(int shard) override { - return reinterpret_cast(&shards_[shard]); - } +LRUCache::~LRUCache() { delete[] shards_; } - virtual const CacheShard* GetShard(int shard) const override { - return reinterpret_cast(&shards_[shard]); - } +CacheShard* LRUCache::GetShard(int shard) { + return reinterpret_cast(&shards_[shard]); +} - virtual void* Value(Handle* handle) override { - return reinterpret_cast(handle)->value; - } +const CacheShard* LRUCache::GetShard(int shard) const { + return reinterpret_cast(&shards_[shard]); +} - virtual size_t GetCharge(Handle* handle) const override { - return reinterpret_cast(handle)->charge; - } +void* LRUCache::Value(Handle* handle) { + return reinterpret_cast(handle)->value; +} - virtual uint32_t GetHash(Handle* handle) const override { - return reinterpret_cast(handle)->hash; - } +size_t LRUCache::GetCharge(Handle* handle) const { + return reinterpret_cast(handle)->charge; +} - virtual void DisownData() override { shards_ = nullptr; } +uint32_t LRUCache::GetHash(Handle* handle) const { + return reinterpret_cast(handle)->hash; +} - private: - LRUCacheShard* shards_; -}; +void LRUCache::DisownData() { shards_ = nullptr; } std::shared_ptr NewLRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, diff --git a/util/lru_cache.h b/util/lru_cache.h index 5a2415236..807af9cbb 100644 --- a/util/lru_cache.h +++ b/util/lru_cache.h @@ -248,4 +248,21 @@ class LRUCacheShard : public CacheShard { LRUHandleTable table_; }; +class LRUCache : public ShardedCache { + public: + LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, + double high_pri_pool_ratio); + virtual ~LRUCache(); + virtual const char* Name() const override { return "LRUCache"; } + virtual CacheShard* GetShard(int shard) override; + virtual const CacheShard* GetShard(int shard) const override; + virtual void* Value(Handle* handle) override; + virtual size_t GetCharge(Handle* handle) const override; + virtual uint32_t GetHash(Handle* handle) const override; + virtual void DisownData() override; + + private: + LRUCacheShard* shards_; +}; + } // namespace rocksdb diff --git a/util/options_helper.h b/util/options_helper.h index 67979ab25..221f6ee6d 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -511,6 +511,10 @@ static std::unordered_map {offsetof(struct BlockBasedTableOptions, cache_index_and_filter_blocks), OptionType::kBoolean, OptionVerificationType::kNormal}}, + {"cache_index_and_filter_blocks_with_high_priority", + {offsetof(struct BlockBasedTableOptions, + cache_index_and_filter_blocks_with_high_priority), + OptionType::kBoolean, OptionVerificationType::kNormal}}, {"pin_l0_filter_and_index_blocks_in_cache", {offsetof(struct BlockBasedTableOptions, pin_l0_filter_and_index_blocks_in_cache), diff --git a/util/options_settable_test.cc b/util/options_settable_test.cc index 2d5e238c0..320bd6657 100644 --- a/util/options_settable_test.cc +++ b/util/options_settable_test.cc @@ -149,6 +149,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { ASSERT_OK(GetBlockBasedTableOptionsFromString( *bbto, "cache_index_and_filter_blocks=1;" + "cache_index_and_filter_blocks_with_high_priority=true;" "pin_l0_filter_and_index_blocks_in_cache=1;" "index_type=kHashSearch;" "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"