Object lifetime in cache

Summary:
Any non-raw-data dependent object must be destructed before the table
    closes. There was a bug of not doing that for filter object. This patch
    fixes the bug and adds a unit test to prevent such bugs in future.
Closes https://github.com/facebook/rocksdb/pull/2246

Differential Revision: D5001318

Pulled By: maysamyabandeh

fbshipit-source-id: 6d8772e58765485868094b92964da82ef9730b6d
main
Maysam Yabandeh 8 years ago committed by Facebook Github Bot
parent fdaefa0309
commit 40af2381ec
  1. 4
      include/rocksdb/cache.h
  2. 6
      table/block_based_table_reader.cc
  3. 4
      table/block_based_table_reader.h
  4. 39
      table/partitioned_filter_block.cc
  5. 6
      table/partitioned_filter_block.h
  6. 133
      table/table_test.cc

@ -187,6 +187,10 @@ class Cache {
virtual std::string GetPrintableOptions() const { return ""; } virtual std::string GetPrintableOptions() const { return ""; }
// Mark the last inserted object as being a raw data block. This will be used
// in tests. The default implementation does nothing.
virtual void TEST_mark_as_data_block(const Slice& key, size_t charge) {}
private: private:
// No copying allowed // No copying allowed
Cache(const Cache&); Cache(const Cache&);

@ -913,6 +913,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
s = block_cache->Insert( s = block_cache->Insert(
block_cache_key, block->value, block->value->usable_size(), block_cache_key, block->value, block->value->usable_size(),
&DeleteCachedEntry<Block>, &(block->cache_handle)); &DeleteCachedEntry<Block>, &(block->cache_handle));
block_cache->TEST_mark_as_data_block(block_cache_key,
block->value->usable_size());
if (s.ok()) { if (s.ok()) {
RecordTick(statistics, BLOCK_CACHE_ADD); RecordTick(statistics, BLOCK_CACHE_ADD);
if (is_index) { if (is_index) {
@ -994,6 +996,8 @@ Status BlockBasedTable::PutDataBlockToCache(
s = block_cache->Insert( s = block_cache->Insert(
block_cache_key, block->value, block->value->usable_size(), block_cache_key, block->value, block->value->usable_size(),
&DeleteCachedEntry<Block>, &(block->cache_handle), priority); &DeleteCachedEntry<Block>, &(block->cache_handle), priority);
block_cache->TEST_mark_as_data_block(block_cache_key,
block->value->usable_size());
if (s.ok()) { if (s.ok()) {
assert(block->cache_handle != nullptr); assert(block->cache_handle != nullptr);
RecordTick(statistics, BLOCK_CACHE_ADD); RecordTick(statistics, BLOCK_CACHE_ADD);
@ -2090,7 +2094,7 @@ void BlockBasedTable::Close() {
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
// Get the filter block key // Get the filter block key
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->footer.metaindex_handle(), cache_key); rep_->filter_handle, cache_key);
rep_->table_options.block_cache.get()->Erase(key); rep_->table_options.block_cache.get()->Erase(key);
// Get the index block key // Get the index block key
key = GetCacheKeyFromOffset(rep_->cache_key_prefix, key = GetCacheKeyFromOffset(rep_->cache_key_prefix,

@ -376,9 +376,9 @@ struct BlockBasedTable::CachableEntry {
CachableEntry(TValue* _value, Cache::Handle* _cache_handle) CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
: value(_value), cache_handle(_cache_handle) {} : value(_value), cache_handle(_cache_handle) {}
CachableEntry() : CachableEntry(nullptr, nullptr) {} CachableEntry() : CachableEntry(nullptr, nullptr) {}
void Release(Cache* cache) { void Release(Cache* cache, bool force_erase = false) {
if (cache_handle) { if (cache_handle) {
cache->Release(cache_handle); cache->Release(cache_handle, force_erase);
value = nullptr; value = nullptr;
cache_handle = nullptr; cache_handle = nullptr;
} }

@ -89,9 +89,19 @@ PartitionedFilterBlockReader::PartitionedFilterBlockReader(
} }
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() { PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
ReadLock rl(&mu_); {
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) { ReadLock rl(&mu_);
table_->rep_->table_options.block_cache.get()->Release(*it); for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
table_->rep_->table_options.block_cache.get()->Release(*it);
}
}
char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length];
for (auto it = filter_block_set_.begin(); it != filter_block_set_.end();
++it) {
auto key = BlockBasedTable::GetCacheKey(table_->rep_->cache_key_prefix,
table_->rep_->cache_key_prefix_size,
*it, cache_key);
table_->rep_->table_options.block_cache.get()->Erase(key);
} }
} }
@ -106,8 +116,6 @@ bool PartitionedFilterBlockReader::KeyMayMatch(
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) { if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
return true; return true;
} }
// This is the user key vs. the full key in the partition index. We assume
// that user key <= full key
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr); auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range
return false; return false;
@ -198,15 +206,18 @@ PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
} }
auto filter = auto filter =
table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io); table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io);
if (pin_cached_filters && filter.IsSet()) { if (filter.IsSet()) {
WriteLock wl(&mu_); filter_block_set_.insert(fltr_blk_handle);
std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(), if (pin_cached_filters) {
filter.value); WriteLock wl(&mu_);
auto succ = filter_cache_.insert(pair).second; std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(),
if (succ) { filter.value);
handle_list_.push_back(filter.cache_handle); auto succ = filter_cache_.insert(pair).second;
} // Otherwise it is already inserted by a concurrent thread if (succ) {
*cached = true; handle_list_.push_back(filter.cache_handle);
} // Otherwise it is already inserted by a concurrent thread
*cached = true;
}
} }
return filter; return filter;
} else { } else {

@ -87,6 +87,12 @@ class PartitionedFilterBlockReader : public FilterBlockReader {
const BlockBasedTable* table_; const BlockBasedTable* table_;
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_; std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
autovector<Cache::Handle*> handle_list_; autovector<Cache::Handle*> handle_list_;
struct BlockHandleCmp {
bool operator()(const BlockHandle& lhs, const BlockHandle& rhs) const {
return lhs.offset() < rhs.offset();
}
};
std::set<BlockHandle, BlockHandleCmp> filter_block_set_;
port::RWMutex mu_; port::RWMutex mu_;
}; };

@ -9,7 +9,6 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <inttypes.h>
#include <stdio.h> #include <stdio.h>
#include <algorithm> #include <algorithm>
@ -19,6 +18,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "cache/lru_cache.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/memtable.h" #include "db/memtable.h"
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
@ -2127,6 +2127,137 @@ TEST_F(BlockBasedTableTest, BlockReadCountTest) {
} }
} }
// A wrapper around LRICache that also keeps track of data blocks (in contrast
// with the objects) in the cache. The class is very simple and can be used only
// for trivial tests.
class MockCache : public LRUCache {
public:
MockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio)
: LRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio) {}
virtual Status Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
Handle** handle = nullptr,
Priority priority = Priority::LOW) override {
// Replace the deleter with our own so that we keep track of data blocks
// erased from the cache
deleters_[key.ToString()] = deleter;
return ShardedCache::Insert(key, value, charge, &MockDeleter, handle,
priority);
}
// This is called by the application right after inserting a data block
virtual void TEST_mark_as_data_block(const Slice& key,
size_t charge) override {
marked_data_in_cache_[key.ToString()] = charge;
marked_size_ += charge;
}
using DeleterFunc = void (*)(const Slice& key, void* value);
static std::map<std::string, DeleterFunc> deleters_;
static std::map<std::string, size_t> marked_data_in_cache_;
static size_t marked_size_;
static void MockDeleter(const Slice& key, void* value) {
// If the item was marked for being data block, decrease its usage from the
// total data block usage of the cache
if (marked_data_in_cache_.find(key.ToString()) !=
marked_data_in_cache_.end()) {
marked_size_ -= marked_data_in_cache_[key.ToString()];
}
// Then call the origianl deleter
assert(deleters_.find(key.ToString()) != deleters_.end());
auto deleter = deleters_[key.ToString()];
deleter(key, value);
}
};
size_t MockCache::marked_size_ = 0;
std::map<std::string, MockCache::DeleterFunc> MockCache::deleters_;
std::map<std::string, size_t> MockCache::marked_data_in_cache_;
// Block cache can contain raw data blocks as well as general objects. If an
// object depends on the table to be live, it then must be destructed before the
// table is closed. This test makese sure that the only items remains in the
// cache after the table is closed are raw data blocks.
TEST_F(BlockBasedTableTest, NoObjectInCacheAfterTableClose) {
for (auto index_type :
{BlockBasedTableOptions::IndexType::kBinarySearch,
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}) {
for (bool block_based_filter : {true, false}) {
for (bool partition_filter : {true, false}) {
if (partition_filter &&
(block_based_filter ||
index_type !=
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch)) {
continue;
}
for (bool index_and_filter_in_cache : {true, false}) {
for (bool pin_l0 : {true, false}) {
if (pin_l0 && !index_and_filter_in_cache) {
continue;
}
// Create a table
Options opt;
unique_ptr<InternalKeyComparator> ikc;
ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
opt.compression = kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.pin_l0_filter_and_index_blocks_in_cache = pin_l0;
table_options.partition_filters = partition_filter;
table_options.cache_index_and_filter_blocks =
index_and_filter_in_cache;
// big enough so we don't ever lose cached values.
table_options.block_cache = std::shared_ptr<rocksdb::Cache>(
new MockCache(16 * 1024 * 1024, 4, false, 0.0));
table_options.filter_policy.reset(
rocksdb::NewBloomFilterPolicy(10, block_based_filter));
opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
TableConstructor c(BytewiseComparator());
std::string user_key = "k01";
std::string key =
InternalKey(user_key, 0, kTypeValue).Encode().ToString();
c.Add(key, "hello");
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;
const ImmutableCFOptions ioptions(opt);
c.Finish(opt, ioptions, table_options, *ikc, &keys, &kvmap);
// Doing a read to make index/filter loaded into the cache
auto table_reader =
dynamic_cast<BlockBasedTable*>(c.GetTableReader());
PinnableSlice value;
GetContext get_context(opt.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, user_key, &value,
nullptr, nullptr, nullptr, nullptr);
InternalKey ikey(user_key, 0, kTypeValue);
auto s = table_reader->Get(ReadOptions(), key, &get_context);
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_STREQ(value.data(), "hello");
// Close the table
c.ResetTableReader();
auto usage = table_options.block_cache->GetUsage();
auto pinned_usage = table_options.block_cache->GetPinnedUsage();
// The only usage must be for marked data blocks
ASSERT_EQ(usage, MockCache::marked_size_);
// There must be some pinned data since PinnableSlice has not
// released them yet
ASSERT_GT(pinned_usage, 0);
// Release pinnable slice reousrces
value.Reset();
pinned_usage = table_options.block_cache->GetPinnedUsage();
ASSERT_EQ(pinned_usage, 0);
}
}
}
}
}
}
TEST_F(BlockBasedTableTest, BlockCacheLeak) { TEST_F(BlockBasedTableTest, BlockCacheLeak) {
// Check that when we reopen a table we don't lose access to blocks already // Check that when we reopen a table we don't lose access to blocks already
// in the cache. This test checks whether the Table actually makes use of the // in the cache. This test checks whether the Table actually makes use of the

Loading…
Cancel
Save