Option to cache index/filter blocks with priority

Summary:
Add option to block based table to insert index/filter blocks to block cache with priority. Combined with LRUCache with high_pri_pool_ratio, we can reserved space for index/filter blocks, make them less likely to be evicted.

Depends on D61977.

Test Plan: See unit test.

Reviewers: lightmark, IslamAbdelRahman, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba, march, leveldb

Differential Revision: https://reviews.facebook.net/D62241
main
Yi Wu 9 years ago
parent 99c4af7160
commit 4a16c32ece
  1. 86
      db/db_block_cache_test.cc
  2. 7
      include/rocksdb/table.h
  3. 15
      table/block_based_table_reader.cc
  4. 61
      util/lru_cache.cc
  5. 17
      util/lru_cache.h
  6. 4
      util/options_helper.h
  7. 1
      util/options_settable_test.cc

@ -9,6 +9,7 @@
#include <cstdlib>
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "util/lru_cache.h"
namespace rocksdb {
@ -323,6 +324,91 @@ TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
filter_bytes_insert);
}
namespace {
// A mock cache wraps LRUCache, and record how many entries have been
// inserted for each priority.
class MockCache : public LRUCache {
public:
static uint32_t high_pri_insert_count;
static uint32_t low_pri_insert_count;
MockCache() : LRUCache(1 << 25, 0, false, 0.0) {}
virtual Status Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
Handle** handle, Priority priority) override {
if (priority == Priority::LOW) {
low_pri_insert_count++;
} else {
high_pri_insert_count++;
}
return LRUCache::Insert(key, value, charge, deleter, handle, priority);
}
};
uint32_t MockCache::high_pri_insert_count = 0;
uint32_t MockCache::low_pri_insert_count = 0;
} // anonymous namespace
TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.block_cache.reset(new MockCache());
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
table_options.cache_index_and_filter_blocks_with_high_priority =
priority == Cache::Priority::HIGH ? true : false;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
DestroyAndReopen(options);
MockCache::high_pri_insert_count = 0;
MockCache::low_pri_insert_count = 0;
// Create a new table.
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(Put("bar", "value"));
ASSERT_OK(Flush());
ASSERT_EQ(1, NumTableFilesAtLevel(0));
// index/filter blocks added to block cache right after table creation.
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
ASSERT_EQ(2, /* only index/filter were added */
TestGetTickerCount(options, BLOCK_CACHE_ADD));
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
if (priority == Cache::Priority::LOW) {
ASSERT_EQ(0, MockCache::high_pri_insert_count);
ASSERT_EQ(2, MockCache::low_pri_insert_count);
} else {
ASSERT_EQ(2, MockCache::high_pri_insert_count);
ASSERT_EQ(0, MockCache::low_pri_insert_count);
}
// Access data block.
ASSERT_EQ("value", Get("foo"));
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
ASSERT_EQ(3, /*adding data block*/
TestGetTickerCount(options, BLOCK_CACHE_ADD));
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
// Data block should be inserted with low priority.
if (priority == Cache::Priority::LOW) {
ASSERT_EQ(0, MockCache::high_pri_insert_count);
ASSERT_EQ(3, MockCache::low_pri_insert_count);
} else {
ASSERT_EQ(2, MockCache::high_pri_insert_count);
ASSERT_EQ(1, MockCache::low_pri_insert_count);
}
}
}
TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
Options options = CurrentOptions();
options.create_if_missing = true;

@ -20,6 +20,7 @@
#include <string>
#include <unordered_map>
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "rocksdb/immutable_options.h"
#include "rocksdb/iterator.h"
@ -65,6 +66,12 @@ struct BlockBasedTableOptions {
// block during table initialization.
bool cache_index_and_filter_blocks = false;
// If cache_index_and_filter_blocks is enabled, cache index and filter
// blocks with high priority. If set to true, depending on implementation of
// block cache, index and filter blocks may be less likely to be eviected
// than data blocks.
bool cache_index_and_filter_blocks_with_high_priority = false;
// if cache_index_and_filter_blocks is true and the below is true, then
// filter and index blocks are stored in the cache, but a reference is
// held in the "table reader" object so the blocks are pinned and only

@ -1029,8 +1029,11 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
filter = ReadFilter(rep_);
if (filter != nullptr) {
assert(filter->size() > 0);
Status s = block_cache->Insert(key, filter, filter->size(),
&DeleteCachedFilterEntry, &cache_handle);
Status s = block_cache->Insert(
key, filter, filter->size(), &DeleteCachedFilterEntry, &cache_handle,
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
? Cache::Priority::HIGH
: Cache::Priority::LOW);
if (s.ok()) {
RecordTick(statistics, BLOCK_CACHE_ADD);
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size());
@ -1092,8 +1095,12 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
s = CreateIndexReader(&index_reader);
if (s.ok()) {
assert(index_reader != nullptr);
s = block_cache->Insert(key, index_reader, index_reader->usable_size(),
&DeleteCachedIndexEntry, &cache_handle);
s = block_cache->Insert(
key, index_reader, index_reader->usable_size(),
&DeleteCachedIndexEntry, &cache_handle,
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
? Cache::Priority::HIGH
: Cache::Priority::LOW);
}
if (s.ok()) {

@ -405,48 +405,41 @@ size_t LRUCacheShard::GetPinnedUsage() const {
return usage_ - lru_usage_;
}
class LRUCache : public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
int num_shards = 1 << num_shard_bits;
shards_ = new LRUCacheShard[num_shards];
SetCapacity(capacity);
SetStrictCapacityLimit(strict_capacity_limit);
for (int i = 0; i < num_shards; i++) {
shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio);
}
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
int num_shards = 1 << num_shard_bits;
shards_ = new LRUCacheShard[num_shards];
SetCapacity(capacity);
SetStrictCapacityLimit(strict_capacity_limit);
for (int i = 0; i < num_shards; i++) {
shards_[i].SetHighPriorityPoolRatio(high_pri_pool_ratio);
}
}
virtual ~LRUCache() { delete[] shards_; }
virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(int shard) override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
LRUCache::~LRUCache() { delete[] shards_; }
virtual const CacheShard* GetShard(int shard) const override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
CacheShard* LRUCache::GetShard(int shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
virtual void* Value(Handle* handle) override {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
const CacheShard* LRUCache::GetShard(int shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
virtual size_t GetCharge(Handle* handle) const override {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
void* LRUCache::Value(Handle* handle) {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
virtual uint32_t GetHash(Handle* handle) const override {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
virtual void DisownData() override { shards_ = nullptr; }
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
private:
LRUCacheShard* shards_;
};
void LRUCache::DisownData() { shards_ = nullptr; }
std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,

@ -248,4 +248,21 @@ class LRUCacheShard : public CacheShard {
LRUHandleTable table_;
};
class LRUCache : public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio);
virtual ~LRUCache();
virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(int shard) override;
virtual const CacheShard* GetShard(int shard) const override;
virtual void* Value(Handle* handle) override;
virtual size_t GetCharge(Handle* handle) const override;
virtual uint32_t GetHash(Handle* handle) const override;
virtual void DisownData() override;
private:
LRUCacheShard* shards_;
};
} // namespace rocksdb

@ -511,6 +511,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
{offsetof(struct BlockBasedTableOptions,
cache_index_and_filter_blocks),
OptionType::kBoolean, OptionVerificationType::kNormal}},
{"cache_index_and_filter_blocks_with_high_priority",
{offsetof(struct BlockBasedTableOptions,
cache_index_and_filter_blocks_with_high_priority),
OptionType::kBoolean, OptionVerificationType::kNormal}},
{"pin_l0_filter_and_index_blocks_in_cache",
{offsetof(struct BlockBasedTableOptions,
pin_l0_filter_and_index_blocks_in_cache),

@ -149,6 +149,7 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
ASSERT_OK(GetBlockBasedTableOptionsFromString(
*bbto,
"cache_index_and_filter_blocks=1;"
"cache_index_and_filter_blocks_with_high_priority=true;"
"pin_l0_filter_and_index_blocks_in_cache=1;"
"index_type=kHashSearch;"
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"

Loading…
Cancel
Save