Towards a production-quality ClockCache (#10418)

Summary:
In this PR we bring ClockCache closer to production quality. We implement the following changes:
1. Fixed a few bugs in ClockCache.
2. ClockCache now fully supports ``strict_capacity_limit == false``: When an insertion over capacity is commanded, we allocate a handle separately from the hash table.
3. ClockCache now runs on almost every test in cache_test. The only exceptions are a test where either the LRU policy is required, and a test that dynamically increases the table capacity.
4. ClockCache now supports dynamically decreasing capacity via SetCapacity. (This is easy: we shrink the capacity upper bound and run the clock algorithm.)
5. Old FastLRUCache tests in lru_cache_test.cc are now also used on ClockCache.

As a byproduct of 1. and 2. we are able to turn on ClockCache in the stress tests.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10418

Test Plan:
- ``make -j24 USE_CLANG=1 COMPILE_WITH_ASAN=1 COMPILE_WITH_UBSAN=1 check``
- ``make -j24 USE_CLANG=1 COMPILE_WITH_TSAN=1 check``
- ``make -j24 USE_CLANG=1 COMPILE_WITH_ASAN=1 COMPILE_WITH_UBSAN=1 CRASH_TEST_EXT_ARGS="--duration=960 --cache_type=clock_cache" blackbox_crash_test_with_atomic_flush``
- ``make -j24 USE_CLANG=1 COMPILE_WITH_TSAN=1 CRASH_TEST_EXT_ARGS="--duration=960 --cache_type=clock_cache" blackbox_crash_test_with_atomic_flush``

Reviewed By: pdillinger

Differential Revision: D38170673

Pulled By: guidotag

fbshipit-source-id: 508987b9dc9d9d68f1a03eefac769820b680340a
main
Guido Tagliavini Ponce 2 years ago committed by Facebook GitHub Bot
parent 8db8b98f98
commit 9d7de6517c
  1. 151
      cache/cache_test.cc
  2. 112
      cache/clock_cache.cc
  3. 76
      cache/clock_cache.h
  4. 4
      cache/fast_lru_cache.cc
  5. 196
      cache/lru_cache_test.cc
  6. 4
      tools/db_crashtest.py

151
cache/cache_test.cc vendored

@ -23,6 +23,11 @@
#include "util/coding.h"
#include "util/string_util.h"
// FastLRUCache and ClockCache only support 16-byte keys, so some of
// the tests originally wrote for LRUCache do not work on the other caches.
// Those tests were adapted to use 16-byte keys. We kept the original ones.
// TODO: Remove the original tests if they ever become unused.
namespace ROCKSDB_NAMESPACE {
namespace {
@ -58,17 +63,22 @@ int DecodeValue(void* v) {
return static_cast<int>(reinterpret_cast<uintptr_t>(v));
}
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kFast = "fast";
void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
void DumbDeleter(const Slice& /*key*/, void* /*value*/) {}
void eraseDeleter(const Slice& /*key*/, void* value) {
void EraseDeleter1(const Slice& /*key*/, void* value) {
Cache* cache = reinterpret_cast<Cache*>(value);
cache->Erase("foo");
}
void EraseDeleter2(const Slice& /*key*/, void* value) {
Cache* cache = reinterpret_cast<Cache*>(value);
cache->Erase(EncodeKey16Bytes(1234));
}
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kFast = "fast";
} // anonymous namespace
class CacheTest : public testing::TestWithParam<std::string> {
@ -223,13 +233,9 @@ class LRUCacheTest : public CacheTest {};
TEST_P(CacheTest, UsageTest) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys.");
return;
}
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 100000;
const size_t kCapacity = 100000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
ASSERT_EQ(0, cache->GetUsage());
@ -239,12 +245,17 @@ TEST_P(CacheTest, UsageTest) {
char value[10] = "abcdef";
// make sure everything will be cached
for (int i = 1; i < 100; ++i) {
std::string key(i, 'a');
std::string key;
if (type == kLRU) {
key = std::string(i, 'a');
} else {
key = EncodeKey(i);
}
auto kv_size = key.size() + 5;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter));
DumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter));
kv_size, DumbDeleter));
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
ASSERT_LT(usage, precise_cache->GetUsage());
@ -256,12 +267,17 @@ TEST_P(CacheTest, UsageTest) {
ASSERT_EQ(0, precise_cache->GetUsage());
// make sure the cache will be overloaded
for (uint64_t i = 1; i < kCapacity; ++i) {
auto key = std::to_string(i);
for (size_t i = 1; i < kCapacity; ++i) {
std::string key;
if (type == kLRU) {
key = std::to_string(i);
} else {
key = EncodeKey(static_cast<int>(1000 + i));
}
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
DumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
key.size() + 5, DumbDeleter));
}
// the usage should be close to the capacity
@ -271,15 +287,18 @@ TEST_P(CacheTest, UsageTest) {
ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
}
// TODO: This test takes longer than expected on ClockCache. This is
// because the values size estimate at construction is too sloppy.
// Fix this.
// Why is it so slow? The cache is constructed with an estimate of 1, but
// then the charge is claimed to be 21. This will cause the hash table
// to be extremely sparse, which in turn means clock needs to scan too
// many slots to find victims.
TEST_P(CacheTest, PinnedUsageTest) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys.");
return;
}
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 200000;
const size_t kCapacity = 200000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
@ -292,15 +311,20 @@ TEST_P(CacheTest, PinnedUsageTest) {
// Add entries. Unpin some of them after insertion. Then, pin some of them
// again. Check GetPinnedUsage().
for (int i = 1; i < 100; ++i) {
std::string key(i, 'a');
std::string key;
if (type == kLRU) {
key = std::string(i, 'a');
} else {
key = EncodeKey(i);
}
auto kv_size = key.size() + 5;
Cache::Handle* handle;
Cache::Handle* handle_in_precise_cache;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter, &handle));
DumbDeleter, &handle));
assert(handle);
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter,
kv_size, DumbDeleter,
&handle_in_precise_cache));
assert(handle_in_precise_cache);
pinned_usage += kv_size;
@ -334,12 +358,17 @@ TEST_P(CacheTest, PinnedUsageTest) {
ASSERT_LT(pinned_usage, precise_cache_pinned_usage);
// check that overloading the cache does not change the pinned usage
for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
auto key = std::to_string(i);
for (size_t i = 1; i < 2 * kCapacity; ++i) {
std::string key;
if (type == kLRU) {
key = std::to_string(i);
} else {
key = EncodeKey(static_cast<int>(1000 + i));
}
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
DumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
key.size() + 5, DumbDeleter));
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
@ -447,7 +476,7 @@ TEST_P(CacheTest, EvictionPolicy) {
Insert(200, 201);
// Frequently used entry must be kept around
for (int i = 0; i < kCacheSize * 2; i++) {
for (int i = 0; i < 2 * kCacheSize; i++) {
Insert(1000+i, 2000+i);
ASSERT_EQ(101, Lookup(100));
}
@ -500,9 +529,7 @@ TEST_P(CacheTest, EvictionPolicyRef) {
Insert(303, 104);
// Insert entries much more than cache capacity.
double load_factor =
std::min(fast_lru_cache::kLoadFactor, clock_cache::kLoadFactor);
for (int i = 0; i < 2 * static_cast<int>(kCacheSize / load_factor); i++) {
for (int i = 0; i < 100 * kCacheSize; i++) {
Insert(1000 + i, 2000 + i);
}
@ -533,31 +560,40 @@ TEST_P(CacheTest, EvictionPolicyRef) {
TEST_P(CacheTest, EvictEmptyCache) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys.");
return;
}
// Insert item large than capacity to trigger eviction on empty cache.
auto cache = NewCache(1, 0, false);
ASSERT_OK(cache->Insert("foo", nullptr, 10, dumbDeleter));
if (type == kLRU) {
ASSERT_OK(cache->Insert("foo", nullptr, 10, DumbDeleter));
} else {
ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, 10, DumbDeleter));
}
}
TEST_P(CacheTest, EraseFromDeleter) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys.");
return;
}
// Have deleter which will erase item from cache, which will re-enter
// the cache at that point.
std::shared_ptr<Cache> cache = NewCache(10, 0, false);
ASSERT_OK(cache->Insert("foo", nullptr, 1, dumbDeleter));
ASSERT_OK(cache->Insert("bar", cache.get(), 1, eraseDeleter));
cache->Erase("bar");
ASSERT_EQ(nullptr, cache->Lookup("foo"));
ASSERT_EQ(nullptr, cache->Lookup("bar"));
std::string foo, bar;
Cache::DeleterFn erase_deleter;
if (type == kLRU) {
foo = "foo";
bar = "bar";
erase_deleter = EraseDeleter1;
} else {
foo = EncodeKey(1234);
bar = EncodeKey(5678);
erase_deleter = EraseDeleter2;
}
ASSERT_OK(cache->Insert(foo, nullptr, 1, DumbDeleter));
ASSERT_OK(cache->Insert(bar, cache.get(), 1, erase_deleter));
cache->Erase(bar);
ASSERT_EQ(nullptr, cache->Lookup(foo));
ASSERT_EQ(nullptr, cache->Lookup(bar));
}
TEST_P(CacheTest, ErasedHandleState) {
@ -590,9 +626,9 @@ TEST_P(CacheTest, HeavyEntries) {
const int kHeavy = 10;
int added = 0;
int index = 0;
while (added < 2*kCacheSize) {
while (added < 2 * kCacheSize) {
const int weight = (index & 1) ? kLight : kHeavy;
Insert(index, 1000+index, weight);
Insert(index, 1000 + index, weight);
added += weight;
index++;
}
@ -603,7 +639,7 @@ TEST_P(CacheTest, HeavyEntries) {
int r = Lookup(i);
if (r >= 0) {
cached_weight += weight;
ASSERT_EQ(1000+i, r);
ASSERT_EQ(1000 + i, r);
}
}
ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10);
@ -615,7 +651,6 @@ TEST_P(CacheTest, NewId) {
ASSERT_NE(a, b);
}
class Value {
public:
explicit Value(int v) : v_(v) {}
@ -664,7 +699,8 @@ TEST_P(CacheTest, SetCapacity) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS(
"FastLRUCache and ClockCache don't support capacity adjustments.");
"FastLRUCache and ClockCache don't support arbitrary capacity "
"adjustments.");
return;
}
// test1: increase capacity
@ -716,9 +752,9 @@ TEST_P(CacheTest, SetCapacity) {
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
auto type = GetParam();
if (type == kFast || type == kClock) {
if (type == kFast) {
ROCKSDB_GTEST_BYPASS(
"FastLRUCache and ClockCache don't support an unbounded number of "
"FastLRUCache only supports a limited number of "
"inserts beyond "
"capacity.");
return;
@ -775,9 +811,8 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
TEST_P(CacheTest, OverCapacity) {
auto type = GetParam();
if (type == kFast || type == kClock) {
ROCKSDB_GTEST_BYPASS(
"FastLRUCache and ClockCache don't support capacity adjustments.");
if (type == kClock) {
ROCKSDB_GTEST_BYPASS("Requires LRU eviction policy.");
return;
}
size_t n = 10;

112
cache/clock_cache.cc vendored

@ -69,10 +69,10 @@ ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) {
// updates where it would be possible to combine into one CAS (more metadata
// under one atomic field) or maybe two atomic updates (one arithmetic, one
// bitwise). Something to think about optimizing.
e->InternalToExternalRef();
e->SetHit();
// The handle is now referenced, so we take it out of clock.
ClockOff(e);
e->InternalToExternalRef();
}
return e;
@ -312,17 +312,20 @@ void ClockHandleTable::ClockRun(size_t charge) {
// hot element, it will be hard to get an exclusive ref.
// Do we need a mechanism to prevent an element from sitting
// for a long time in cache waiting to be evicted?
assert(charge <= capacity_);
autovector<ClockHandle> deleted;
uint32_t max_iterations =
1 + static_cast<uint32_t>(GetTableSize() * kLoadFactor);
ClockHandle::ClockPriority::HIGH *
(1 +
static_cast<uint32_t>(
GetTableSize() *
kLoadFactor)); // It may take up to HIGH passes to evict an element.
size_t usage_local = usage_;
while (usage_local + charge > capacity_ && max_iterations--) {
size_t capacity_local = capacity_;
while (usage_local + charge > capacity_local && max_iterations--) {
uint32_t steps = 1 + static_cast<uint32_t>(1 / kLoadFactor);
uint32_t clock_pointer_local = (clock_pointer_ += steps) - steps;
for (uint32_t i = 0; i < steps; i++) {
ClockHandle* h = &array_[ModTableSize(clock_pointer_local + i)];
if (h->TryExclusiveRef()) {
if (h->WillBeDeleted()) {
Remove(h, &deleted);
@ -335,7 +338,6 @@ void ClockHandleTable::ClockRun(size_t charge) {
// exclusive ref, we know we are in the latter case. This can only
// happen when the last external reference to an element was
// released, and the element was not immediately removed.
ClockOn(h);
}
ClockHandle::ClockPriority priority = h->GetClockPriority();
@ -358,6 +360,7 @@ ClockCacheShard::ClockCacheShard(
size_t capacity, size_t estimated_value_size, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: strict_capacity_limit_(strict_capacity_limit),
detached_usage_(0),
table_(capacity, CalcHashBits(capacity, estimated_value_size,
metadata_charge_policy)) {
set_metadata_charge_policy(metadata_charge_policy);
@ -430,12 +433,16 @@ int ClockCacheShard::CalcHashBits(
return FloorLog2((num_entries << 1) - 1);
}
void ClockCacheShard::SetCapacity(size_t /*capacity*/) {
assert(false); // Not supported.
void ClockCacheShard::SetCapacity(size_t capacity) {
if (capacity > table_.GetCapacity()) {
assert(false); // Not supported.
}
table_.SetCapacity(capacity);
table_.ClockRun(detached_usage_);
}
void ClockCacheShard::SetStrictCapacityLimit(bool /*strict_capacity_limit*/) {
assert(false); // Not supported.
void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
strict_capacity_limit_ = strict_capacity_limit;
}
Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
@ -459,27 +466,32 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
Status s = Status::OK();
// Use a local copy to minimize cache synchronization.
size_t detached_usage = detached_usage_;
// Free space with the clock policy until enough space is freed or there are
// no evictable elements.
table_.ClockRun(tmp.total_charge);
table_.ClockRun(tmp.total_charge + detached_usage);
// occupancy_ and usage_ are contended members across concurrent updates
// on the same shard, so we use a single copy to reduce cache synchronization.
// Use local copies to minimize cache synchronization
// (occupancy_ and usage_ are read and written by all insertions).
uint32_t occupancy_local = table_.GetOccupancy();
size_t usage_local = table_.GetUsage();
assert(occupancy_local <= table_.GetOccupancyLimit());
autovector<ClockHandle> deleted;
if ((usage_local + tmp.total_charge > table_.GetCapacity() &&
(strict_capacity_limit_ || handle == nullptr)) ||
occupancy_local > table_.GetOccupancyLimit()) {
size_t total_usage = table_.GetUsage() + detached_usage;
// TODO: Currently we support strict_capacity_limit == false as long as the
// number of pinned elements is below table_.GetOccupancyLimit(). We can
// always support it as follows: whenever we exceed this limit, we dynamically
// allocate a handle and return it (when the user provides a handle pointer,
// of course). Then, Release checks whether the handle was dynamically
// allocated, or is stored in the table.
if (total_usage + tmp.total_charge > table_.GetCapacity() &&
(strict_capacity_limit_ || handle == nullptr)) {
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
deleted.push_back(tmp);
tmp.FreeData();
} else {
if (occupancy_local > table_.GetOccupancyLimit()) {
if (occupancy_local + 1 > table_.GetOccupancyLimit()) {
// TODO: Consider using a distinct status for this case, but usually
// it will be handled the same way as reaching charge capacity limit
s = Status::MemoryLimit(
@ -491,22 +503,33 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
}
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
ClockHandle* h = table_.Insert(&tmp, &deleted, handle != nullptr);
assert(h != nullptr); // The occupancy is way below the table size, so this
// insertion should never fail.
ClockHandle* h;
if (occupancy_local + 1 > table_.GetOccupancyLimit()) {
// Even if the user wishes to overload the cache, we can't insert into
// the hash table. Instead, we dynamically allocate a new handle.
h = new ClockHandle();
*h = tmp;
h->SetDetached();
h->TryExternalRef();
detached_usage_ += h->total_charge;
// TODO: Return special status?
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
autovector<ClockHandle> deleted;
h = table_.Insert(&tmp, &deleted, handle != nullptr);
assert(h != nullptr); // The occupancy is way below the table size, so
// this insertion should never fail.
if (deleted.size() > 0) {
s = Status::OkOverwritten();
}
table_.Free(&deleted);
}
if (handle != nullptr) {
*handle = reinterpret_cast<Cache::Handle*>(h);
}
if (deleted.size() > 0) {
s = Status::OkOverwritten();
}
}
table_.Free(&deleted);
return s;
}
@ -516,7 +539,7 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
bool ClockCacheShard::Ref(Cache::Handle* h) {
ClockHandle* e = reinterpret_cast<ClockHandle*>(h);
assert(e->HasExternalRefs());
assert(e->ExternalRefs() > 0);
return e->TryExternalRef();
}
@ -530,6 +553,20 @@ bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
}
ClockHandle* h = reinterpret_cast<ClockHandle*>(handle);
if (UNLIKELY(h->IsDetached())) {
h->ReleaseExternalRef();
if (h->TryExclusiveRef()) {
// Only the last reference will succeed.
// Don't bother releasing the exclusive ref.
h->FreeData();
detached_usage_ -= h->total_charge;
delete h;
return true;
}
return false;
}
uint32_t refs = h->refs;
bool last_reference = ((refs & ClockHandle::EXTERNAL_REFS) == 1);
bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED;
@ -570,13 +607,14 @@ size_t ClockCacheShard::GetPinnedUsage() const {
table_.ConstApplyToEntriesRange(
[&clock_usage](ClockHandle* h) {
if (h->HasExternalRefs()) {
if (h->ExternalRefs() > 1) {
// We check > 1 because we are holding an external ref.
clock_usage += h->total_charge;
}
},
0, table_.GetTableSize(), true);
return clock_usage;
return clock_usage + detached_usage_;
}
ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,

@ -9,6 +9,8 @@
#pragma once
#include <sys/types.h>
#include <array>
#include <atomic>
#include <cstdint>
@ -28,6 +30,9 @@ namespace ROCKSDB_NAMESPACE {
namespace clock_cache {
// Forward declaration of friend class.
class ClockCacheTest;
// An experimental alternative to LRUCache, using a lock-free, open-addressed
// hash table and clock eviction.
@ -63,10 +68,10 @@ namespace clock_cache {
// can't be immediately deleted. In these cases, the flag will be later read
// and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
// used not only to defer deletions, but also as a barrier for external
// references: once WILL_BE_DELETED is set, lookups (which are the means to
// acquire new external references) will ignore the handle. For this reason,
// when WILL_BE_DELETED is set, we say the handle is invisible (and
// otherwise, that it's visible).
// references: once WILL_BE_DELETED is set, lookups (which are the most
// common way to acquire new external references) will ignore the handle.
// For this reason, when WILL_BE_DELETED is set, we say the handle is
// invisible (and, otherwise, that it's visible).
//
//
// 3. HASHING AND COLLISION RESOLUTION
@ -192,10 +197,10 @@ struct ClockHandle {
size_t total_charge;
std::array<char, kCacheKeySize> key_data;
static constexpr uint8_t kIsElementOffset = 1;
static constexpr uint8_t kClockPriorityOffset = 2;
static constexpr uint8_t kIsHitOffset = 4;
static constexpr uint8_t kCachePriorityOffset = 5;
static constexpr uint8_t kIsElementOffset = 0;
static constexpr uint8_t kClockPriorityOffset = 1;
static constexpr uint8_t kIsHitOffset = 3;
static constexpr uint8_t kCachePriorityOffset = 4;
enum Flags : uint8_t {
// Whether the slot is in use by an element.
@ -252,9 +257,8 @@ struct ClockHandle {
// Whether a thread has an exclusive reference to the slot.
EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
// Whether the handle will be deleted soon. When this bit is set, new
// internal
// or external references to this handle stop being accepted.
// There is an exception: external references can be created from
// internal references to this handle stop being accepted.
// External references may still be granted---they can be created from
// existing external references, or converting from existing internal
// references.
WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
@ -274,6 +278,9 @@ struct ClockHandle {
std::atomic<uint32_t> refs;
// True iff the handle is allocated separately from hash table.
bool detached;
ClockHandle()
: value(nullptr),
deleter(nullptr),
@ -281,7 +288,8 @@ struct ClockHandle {
total_charge(0),
flags(0),
displacements(0),
refs(0) {
refs(0),
detached(false) {
SetWillBeDeleted(false);
SetIsElement(false);
SetClockPriority(ClockPriority::NONE);
@ -300,6 +308,7 @@ struct ClockHandle {
value = other.value;
deleter = other.deleter;
key_data = other.key_data;
hash = other.hash;
total_charge = other.total_charge;
}
@ -350,13 +359,13 @@ struct ClockHandle {
// flags functions.
bool IsElement() const { return flags & IS_ELEMENT; }
bool IsElement() const { return flags & Flags::IS_ELEMENT; }
void SetIsElement(bool is_element) {
if (is_element) {
flags |= IS_ELEMENT;
flags |= Flags::IS_ELEMENT;
} else {
flags &= static_cast<uint8_t>(~IS_ELEMENT);
flags &= static_cast<uint8_t>(~Flags::IS_ELEMENT);
}
}
@ -400,6 +409,10 @@ struct ClockHandle {
flags |= new_priority;
}
bool IsDetached() { return detached; }
void SetDetached() { detached = true; }
inline bool IsEmpty() const {
return !this->IsElement() && this->displacements == 0;
}
@ -424,7 +437,9 @@ struct ClockHandle {
}
}
bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
uint32_t ExternalRefs() const {
return (refs & EXTERNAL_REFS) >> kExternalRefsOffset;
}
// Tries to take an internal ref. Returns true iff it succeeds.
inline bool TryInternalRef() {
@ -437,7 +452,7 @@ struct ClockHandle {
// Tries to take an external ref. Returns true iff it succeeds.
inline bool TryExternalRef() {
if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
if (!((refs += kOneExternalRef) & EXCLUSIVE_REF)) {
return true;
}
refs -= kOneExternalRef;
@ -529,8 +544,8 @@ class ClockHandleTable {
// Makes h non-evictable.
void ClockOff(ClockHandle* h);
// Runs the clock eviction algorithm until there is enough space to
// insert an element with the given charge.
// Runs the clock eviction algorithm until usage_ + charge is at most
// capacity_.
void ClockRun(size_t charge);
// Remove h from the hash table. Requires an exclusive ref to h.
@ -548,8 +563,6 @@ class ClockHandleTable {
RemoveAll(key, hash, probe, deleted);
}
void Free(autovector<ClockHandle>* deleted);
// Tries to remove h from the hash table. If the attempt is successful,
// the function hands over an exclusive ref to h.
bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
@ -558,6 +571,11 @@ class ClockHandleTable {
// success. Requires that the caller thread has no shared ref to h.
bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
// Call this function after an Insert, Remove, RemoveAll, TryRemove
// or SpinTryRemove. It frees the deleted values and updates the hash table
// metadata.
void Free(autovector<ClockHandle>* deleted);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
bool apply_if_will_be_deleted) {
@ -579,12 +597,15 @@ class ClockHandleTable {
bool apply_if_will_be_deleted) const {
for (uint32_t i = index_begin; i < index_end; i++) {
ClockHandle* h = &array_[i];
if (h->TryExclusiveRef()) {
// We take an external ref because we are handing over control
// to a user-defined function, and because the handle will not be
// modified.
if (h->TryExternalRef()) {
if (h->IsElement() &&
(apply_if_will_be_deleted || !h->WillBeDeleted())) {
func(h);
}
h->ReleaseExclusiveRef();
h->ReleaseExternalRef();
}
}
}
@ -601,6 +622,8 @@ class ClockHandleTable {
size_t GetCapacity() const { return capacity_; }
void SetCapacity(size_t capacity) { capacity_ = capacity; }
// Returns x mod 2^{length_bits_}.
uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
@ -652,7 +675,7 @@ class ClockHandleTable {
const uint32_t occupancy_limit_;
// Maximum total charge of all elements stored in the table.
const size_t capacity_;
size_t capacity_;
// We partition the following members into different cache lines
// to avoid false sharing among Lookup, Release, Erase and Insert
@ -745,6 +768,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
private:
friend class ClockCache;
friend class ClockCacheTest;
// Free some space following strict clock policy until enough space
// to hold (usage_ + charge) is freed or there are no evictable elements.
@ -763,6 +787,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
// Whether to reject insertion if cache reaches its full capacity.
std::atomic<bool> strict_capacity_limit_;
// Handles allocated separately from the table.
std::atomic<size_t> detached_usage_;
ClockHandleTable table_;
}; // class ClockCacheShard
@ -797,6 +824,7 @@ class ClockCache
private:
ClockCacheShard* shards_ = nullptr;
int num_shards_;
}; // class ClockCache

@ -299,10 +299,12 @@ int LRUCacheShard::CalcHashBits(
}
void LRUCacheShard::SetCapacity(size_t capacity) {
assert(false); // Not supported. TODO(Guido) Support it?
autovector<LRUHandle> last_reference_list;
{
DMutexLock l(mutex_);
if (capacity > capacity_) {
assert(false); // Not supported.
}
capacity_ = capacity;
EvictFromLRU(0, &last_reference_list);
}

@ -207,6 +207,9 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
ValidateLRUList({"e", "f", "g", "Z", "d"}, 2);
}
// TODO: FastLRUCache and ClockCache use the same tests. We can probably remove
// them from FastLRUCache after ClockCache becomes productive, and we don't plan
// to use or maintain FastLRUCache any more.
namespace fast_lru_cache {
// TODO(guido) Replicate LRU policy tests from LRUCache here.
@ -225,11 +228,11 @@ class FastLRUCacheTest : public testing::Test {
void NewCache(size_t capacity) {
DeleteCache();
cache_ = reinterpret_cast<fast_lru_cache::LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(fast_lru_cache::LRUCacheShard)));
new (cache_) fast_lru_cache::LRUCacheShard(
capacity, 1 /*estimated_value_size*/, false /*strict_capacity_limit*/,
kDontChargeCacheMetadata);
cache_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
new (cache_) LRUCacheShard(capacity, 1 /*estimated_value_size*/,
false /*strict_capacity_limit*/,
kDontChargeCacheMetadata);
}
Status Insert(const std::string& key) {
@ -243,25 +246,23 @@ class FastLRUCacheTest : public testing::Test {
size_t CalcEstimatedHandleChargeWrapper(
size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
return fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
estimated_value_size, metadata_charge_policy);
return LRUCacheShard::CalcEstimatedHandleCharge(estimated_value_size,
metadata_charge_policy);
}
int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
return fast_lru_cache::LRUCacheShard::CalcHashBits(
capacity, estimated_value_size, metadata_charge_policy);
return LRUCacheShard::CalcHashBits(capacity, estimated_value_size,
metadata_charge_policy);
}
// Maximum number of items that a shard can hold.
double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
size_t handle_charge =
fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge(
estimated_value_size, metadata_charge_policy);
return capacity / (fast_lru_cache::kLoadFactor * handle_charge);
size_t handle_charge = LRUCacheShard::CalcEstimatedHandleCharge(
estimated_value_size, metadata_charge_policy);
return capacity / (kLoadFactor * handle_charge);
}
bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) {
if (hash_bits == 0) {
return max_occupancy <= 1;
@ -272,7 +273,7 @@ class FastLRUCacheTest : public testing::Test {
}
private:
fast_lru_cache::LRUCacheShard* cache_ = nullptr;
LRUCacheShard* cache_ = nullptr;
};
TEST_F(FastLRUCacheTest, ValidateKeySize) {
@ -292,7 +293,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
double max_occupancy;
int hash_bits;
CacheMetadataChargePolicy metadata_charge_policy;
// Vary the cache capacity, fix the element charge.
for (int i = 0; i < 2048; i++) {
capacity = i;
@ -304,7 +304,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
}
// Fix the cache capacity, vary the element charge.
for (int i = 0; i < 1024; i++) {
capacity = 1024;
@ -316,7 +315,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
}
// Zero-capacity cache, and only values have charge.
capacity = 0;
estimated_value_size = 1;
@ -324,7 +322,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Zero-capacity cache, and only metadata has charge.
capacity = 0;
estimated_value_size = 0;
@ -332,7 +329,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Small cache, large elements.
capacity = 1024;
estimated_value_size = 8192;
@ -340,7 +336,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Large capacity.
capacity = 31924172;
estimated_value_size = 8192;
@ -402,37 +397,38 @@ class ClockCacheTest : public testing::Test {
void Erase(const std::string& key) { shard_->Erase(key, 0 /*hash*/); }
// void ValidateLRUList(std::vector<std::string> keys,
// size_t num_high_pri_pool_keys = 0) {
// LRUHandle* lru;
// LRUHandle* lru_low_pri;
// cache_->TEST_GetLRUList(&lru, &lru_low_pri);
// LRUHandle* iter = lru;
// bool in_high_pri_pool = false;
// size_t high_pri_pool_keys = 0;
// if (iter == lru_low_pri) {
// in_high_pri_pool = true;
// }
// for (const auto& key : keys) {
// iter = iter->next;
// ASSERT_NE(lru, iter);
// ASSERT_EQ(key, iter->key().ToString());
// ASSERT_EQ(in_high_pri_pool, iter->InHighPriPool());
// if (in_high_pri_pool) {
// high_pri_pool_keys++;
// }
// if (iter == lru_low_pri) {
// ASSERT_FALSE(in_high_pri_pool);
// in_high_pri_pool = true;
// }
// }
// ASSERT_EQ(lru, iter->next);
// ASSERT_TRUE(in_high_pri_pool);
// ASSERT_EQ(num_high_pri_pool_keys, high_pri_pool_keys);
// }
size_t CalcEstimatedHandleChargeWrapper(
size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
return ClockCacheShard::CalcEstimatedHandleCharge(estimated_value_size,
metadata_charge_policy);
}
int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
return ClockCacheShard::CalcHashBits(capacity, estimated_value_size,
metadata_charge_policy);
}
// Maximum number of items that a shard can hold.
double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) {
size_t handle_charge = ClockCacheShard::CalcEstimatedHandleCharge(
estimated_value_size, metadata_charge_policy);
return capacity / (kLoadFactor * handle_charge);
}
bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) {
if (hash_bits == 0) {
return max_occupancy <= 1;
} else {
return (1 << hash_bits >= max_occupancy) &&
(1 << (hash_bits - 1) <= max_occupancy);
}
}
private:
clock_cache::ClockCacheShard* shard_ = nullptr;
ClockCacheShard* shard_ = nullptr;
};
TEST_F(ClockCacheTest, Validate) {
@ -447,31 +443,89 @@ TEST_F(ClockCacheTest, Validate) {
}
TEST_F(ClockCacheTest, ClockPriorityTest) {
clock_cache::ClockHandle handle;
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::NONE);
handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::HIGH);
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::HIGH);
ClockHandle handle;
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE);
handle.SetClockPriority(ClockHandle::ClockPriority::HIGH);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::HIGH);
handle.DecreaseClockPriority();
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM);
handle.DecreaseClockPriority();
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::LOW);
handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::MEDIUM);
handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::NONE);
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::NONE);
handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::LOW);
handle.SetClockPriority(ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM);
handle.SetClockPriority(ClockHandle::ClockPriority::NONE);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE);
handle.SetClockPriority(ClockHandle::ClockPriority::MEDIUM);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM);
handle.DecreaseClockPriority();
handle.DecreaseClockPriority();
EXPECT_EQ(handle.GetClockPriority(),
clock_cache::ClockHandle::ClockPriority::NONE);
EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE);
}
TEST_F(ClockCacheTest, CalcHashBitsTest) {
size_t capacity;
size_t estimated_value_size;
double max_occupancy;
int hash_bits;
CacheMetadataChargePolicy metadata_charge_policy;
// Vary the cache capacity, fix the element charge.
for (int i = 0; i < 2048; i++) {
capacity = i;
estimated_value_size = 0;
metadata_charge_policy = kFullChargeCacheMetadata;
max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
metadata_charge_policy);
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
}
// Fix the cache capacity, vary the element charge.
for (int i = 0; i < 1024; i++) {
capacity = 1024;
estimated_value_size = i;
metadata_charge_policy = kFullChargeCacheMetadata;
max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
metadata_charge_policy);
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
}
// Zero-capacity cache, and only values have charge.
capacity = 0;
estimated_value_size = 1;
metadata_charge_policy = kDontChargeCacheMetadata;
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Zero-capacity cache, and only metadata has charge.
capacity = 0;
estimated_value_size = 0;
metadata_charge_policy = kFullChargeCacheMetadata;
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Small cache, large elements.
capacity = 1024;
estimated_value_size = 8192;
metadata_charge_policy = kFullChargeCacheMetadata;
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
// Large capacity.
capacity = 31924172;
estimated_value_size = 8192;
metadata_charge_policy = kFullChargeCacheMetadata;
max_occupancy =
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
metadata_charge_policy);
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
}
} // namespace clock_cache

@ -116,8 +116,8 @@ default_params = {
"use_direct_reads": lambda: random.randint(0, 1),
"use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
"mock_direct_io": False,
"cache_type": "lru_cache", # fast_lru_cache and clock_cache are currently incompatible
# with stress tests, because they use strict_capacity_limit = false
"cache_type": lambda: random.choice(["lru_cache", "clock_cache"]),
# fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
"use_full_merge_v1": lambda: random.randint(0, 1),
"use_merge": lambda: random.randint(0, 1),
# 999 -> use Bloom API

Loading…
Cancel
Save