diff --git a/cache/cache_test.cc b/cache/cache_test.cc index 0a008a1d9..81a9d412c 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -23,6 +23,11 @@ #include "util/coding.h" #include "util/string_util.h" +// FastLRUCache and ClockCache only support 16-byte keys, so some of +// the tests originally wrote for LRUCache do not work on the other caches. +// Those tests were adapted to use 16-byte keys. We kept the original ones. +// TODO: Remove the original tests if they ever become unused. + namespace ROCKSDB_NAMESPACE { namespace { @@ -58,17 +63,22 @@ int DecodeValue(void* v) { return static_cast(reinterpret_cast(v)); } -const std::string kLRU = "lru"; -const std::string kClock = "clock"; -const std::string kFast = "fast"; - -void dumbDeleter(const Slice& /*key*/, void* /*value*/) {} +void DumbDeleter(const Slice& /*key*/, void* /*value*/) {} -void eraseDeleter(const Slice& /*key*/, void* value) { +void EraseDeleter1(const Slice& /*key*/, void* value) { Cache* cache = reinterpret_cast(value); cache->Erase("foo"); } +void EraseDeleter2(const Slice& /*key*/, void* value) { + Cache* cache = reinterpret_cast(value); + cache->Erase(EncodeKey16Bytes(1234)); +} + +const std::string kLRU = "lru"; +const std::string kClock = "clock"; +const std::string kFast = "fast"; + } // anonymous namespace class CacheTest : public testing::TestWithParam { @@ -223,13 +233,9 @@ class LRUCacheTest : public CacheTest {}; TEST_P(CacheTest, UsageTest) { auto type = GetParam(); - if (type == kFast || type == kClock) { - ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys."); - return; - } // cache is std::shared_ptr and will be automatically cleaned up. - const uint64_t kCapacity = 100000; + const size_t kCapacity = 100000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata); ASSERT_EQ(0, cache->GetUsage()); @@ -239,12 +245,17 @@ TEST_P(CacheTest, UsageTest) { char value[10] = "abcdef"; // make sure everything will be cached for (int i = 1; i < 100; ++i) { - std::string key(i, 'a'); + std::string key; + if (type == kLRU) { + key = std::string(i, 'a'); + } else { + key = EncodeKey(i); + } auto kv_size = key.size() + 5; ASSERT_OK(cache->Insert(key, reinterpret_cast(value), kv_size, - dumbDeleter)); + DumbDeleter)); ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), - kv_size, dumbDeleter)); + kv_size, DumbDeleter)); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); ASSERT_LT(usage, precise_cache->GetUsage()); @@ -256,12 +267,17 @@ TEST_P(CacheTest, UsageTest) { ASSERT_EQ(0, precise_cache->GetUsage()); // make sure the cache will be overloaded - for (uint64_t i = 1; i < kCapacity; ++i) { - auto key = std::to_string(i); + for (size_t i = 1; i < kCapacity; ++i) { + std::string key; + if (type == kLRU) { + key = std::to_string(i); + } else { + key = EncodeKey(static_cast(1000 + i)); + } ASSERT_OK(cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter)); + DumbDeleter)); ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), - key.size() + 5, dumbDeleter)); + key.size() + 5, DumbDeleter)); } // the usage should be close to the capacity @@ -271,15 +287,18 @@ TEST_P(CacheTest, UsageTest) { ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage()); } +// TODO: This test takes longer than expected on ClockCache. This is +// because the values size estimate at construction is too sloppy. +// Fix this. +// Why is it so slow? The cache is constructed with an estimate of 1, but +// then the charge is claimed to be 21. This will cause the hash table +// to be extremely sparse, which in turn means clock needs to scan too +// many slots to find victims. TEST_P(CacheTest, PinnedUsageTest) { auto type = GetParam(); - if (type == kFast || type == kClock) { - ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys."); - return; - } // cache is std::shared_ptr and will be automatically cleaned up. - const uint64_t kCapacity = 200000; + const size_t kCapacity = 200000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata); @@ -292,15 +311,20 @@ TEST_P(CacheTest, PinnedUsageTest) { // Add entries. Unpin some of them after insertion. Then, pin some of them // again. Check GetPinnedUsage(). for (int i = 1; i < 100; ++i) { - std::string key(i, 'a'); + std::string key; + if (type == kLRU) { + key = std::string(i, 'a'); + } else { + key = EncodeKey(i); + } auto kv_size = key.size() + 5; Cache::Handle* handle; Cache::Handle* handle_in_precise_cache; ASSERT_OK(cache->Insert(key, reinterpret_cast(value), kv_size, - dumbDeleter, &handle)); + DumbDeleter, &handle)); assert(handle); ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), - kv_size, dumbDeleter, + kv_size, DumbDeleter, &handle_in_precise_cache)); assert(handle_in_precise_cache); pinned_usage += kv_size; @@ -334,12 +358,17 @@ TEST_P(CacheTest, PinnedUsageTest) { ASSERT_LT(pinned_usage, precise_cache_pinned_usage); // check that overloading the cache does not change the pinned usage - for (uint64_t i = 1; i < 2 * kCapacity; ++i) { - auto key = std::to_string(i); + for (size_t i = 1; i < 2 * kCapacity; ++i) { + std::string key; + if (type == kLRU) { + key = std::to_string(i); + } else { + key = EncodeKey(static_cast(1000 + i)); + } ASSERT_OK(cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter)); + DumbDeleter)); ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), - key.size() + 5, dumbDeleter)); + key.size() + 5, DumbDeleter)); } ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); @@ -447,7 +476,7 @@ TEST_P(CacheTest, EvictionPolicy) { Insert(200, 201); // Frequently used entry must be kept around - for (int i = 0; i < kCacheSize * 2; i++) { + for (int i = 0; i < 2 * kCacheSize; i++) { Insert(1000+i, 2000+i); ASSERT_EQ(101, Lookup(100)); } @@ -500,9 +529,7 @@ TEST_P(CacheTest, EvictionPolicyRef) { Insert(303, 104); // Insert entries much more than cache capacity. - double load_factor = - std::min(fast_lru_cache::kLoadFactor, clock_cache::kLoadFactor); - for (int i = 0; i < 2 * static_cast(kCacheSize / load_factor); i++) { + for (int i = 0; i < 100 * kCacheSize; i++) { Insert(1000 + i, 2000 + i); } @@ -533,31 +560,40 @@ TEST_P(CacheTest, EvictionPolicyRef) { TEST_P(CacheTest, EvictEmptyCache) { auto type = GetParam(); - if (type == kFast || type == kClock) { - ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys."); - return; - } // Insert item large than capacity to trigger eviction on empty cache. auto cache = NewCache(1, 0, false); - ASSERT_OK(cache->Insert("foo", nullptr, 10, dumbDeleter)); + if (type == kLRU) { + ASSERT_OK(cache->Insert("foo", nullptr, 10, DumbDeleter)); + } else { + ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, 10, DumbDeleter)); + } } TEST_P(CacheTest, EraseFromDeleter) { auto type = GetParam(); - if (type == kFast || type == kClock) { - ROCKSDB_GTEST_BYPASS("FastLRUCache and ClockCache require 16-byte keys."); - return; - } // Have deleter which will erase item from cache, which will re-enter // the cache at that point. std::shared_ptr cache = NewCache(10, 0, false); - ASSERT_OK(cache->Insert("foo", nullptr, 1, dumbDeleter)); - ASSERT_OK(cache->Insert("bar", cache.get(), 1, eraseDeleter)); - cache->Erase("bar"); - ASSERT_EQ(nullptr, cache->Lookup("foo")); - ASSERT_EQ(nullptr, cache->Lookup("bar")); + std::string foo, bar; + Cache::DeleterFn erase_deleter; + if (type == kLRU) { + foo = "foo"; + bar = "bar"; + erase_deleter = EraseDeleter1; + } else { + foo = EncodeKey(1234); + bar = EncodeKey(5678); + erase_deleter = EraseDeleter2; + } + + ASSERT_OK(cache->Insert(foo, nullptr, 1, DumbDeleter)); + ASSERT_OK(cache->Insert(bar, cache.get(), 1, erase_deleter)); + + cache->Erase(bar); + ASSERT_EQ(nullptr, cache->Lookup(foo)); + ASSERT_EQ(nullptr, cache->Lookup(bar)); } TEST_P(CacheTest, ErasedHandleState) { @@ -590,9 +626,9 @@ TEST_P(CacheTest, HeavyEntries) { const int kHeavy = 10; int added = 0; int index = 0; - while (added < 2*kCacheSize) { + while (added < 2 * kCacheSize) { const int weight = (index & 1) ? kLight : kHeavy; - Insert(index, 1000+index, weight); + Insert(index, 1000 + index, weight); added += weight; index++; } @@ -603,7 +639,7 @@ TEST_P(CacheTest, HeavyEntries) { int r = Lookup(i); if (r >= 0) { cached_weight += weight; - ASSERT_EQ(1000+i, r); + ASSERT_EQ(1000 + i, r); } } ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10); @@ -615,7 +651,6 @@ TEST_P(CacheTest, NewId) { ASSERT_NE(a, b); } - class Value { public: explicit Value(int v) : v_(v) {} @@ -664,7 +699,8 @@ TEST_P(CacheTest, SetCapacity) { auto type = GetParam(); if (type == kFast || type == kClock) { ROCKSDB_GTEST_BYPASS( - "FastLRUCache and ClockCache don't support capacity adjustments."); + "FastLRUCache and ClockCache don't support arbitrary capacity " + "adjustments."); return; } // test1: increase capacity @@ -716,9 +752,9 @@ TEST_P(CacheTest, SetCapacity) { TEST_P(LRUCacheTest, SetStrictCapacityLimit) { auto type = GetParam(); - if (type == kFast || type == kClock) { + if (type == kFast) { ROCKSDB_GTEST_BYPASS( - "FastLRUCache and ClockCache don't support an unbounded number of " + "FastLRUCache only supports a limited number of " "inserts beyond " "capacity."); return; @@ -775,9 +811,8 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) { TEST_P(CacheTest, OverCapacity) { auto type = GetParam(); - if (type == kFast || type == kClock) { - ROCKSDB_GTEST_BYPASS( - "FastLRUCache and ClockCache don't support capacity adjustments."); + if (type == kClock) { + ROCKSDB_GTEST_BYPASS("Requires LRU eviction policy."); return; } size_t n = 10; diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index aa8e10cd6..51984e183 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -69,10 +69,10 @@ ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) { // updates where it would be possible to combine into one CAS (more metadata // under one atomic field) or maybe two atomic updates (one arithmetic, one // bitwise). Something to think about optimizing. - e->InternalToExternalRef(); e->SetHit(); // The handle is now referenced, so we take it out of clock. ClockOff(e); + e->InternalToExternalRef(); } return e; @@ -312,17 +312,20 @@ void ClockHandleTable::ClockRun(size_t charge) { // hot element, it will be hard to get an exclusive ref. // Do we need a mechanism to prevent an element from sitting // for a long time in cache waiting to be evicted? - assert(charge <= capacity_); autovector deleted; uint32_t max_iterations = - 1 + static_cast(GetTableSize() * kLoadFactor); + ClockHandle::ClockPriority::HIGH * + (1 + + static_cast( + GetTableSize() * + kLoadFactor)); // It may take up to HIGH passes to evict an element. size_t usage_local = usage_; - while (usage_local + charge > capacity_ && max_iterations--) { + size_t capacity_local = capacity_; + while (usage_local + charge > capacity_local && max_iterations--) { uint32_t steps = 1 + static_cast(1 / kLoadFactor); uint32_t clock_pointer_local = (clock_pointer_ += steps) - steps; for (uint32_t i = 0; i < steps; i++) { ClockHandle* h = &array_[ModTableSize(clock_pointer_local + i)]; - if (h->TryExclusiveRef()) { if (h->WillBeDeleted()) { Remove(h, &deleted); @@ -335,7 +338,6 @@ void ClockHandleTable::ClockRun(size_t charge) { // exclusive ref, we know we are in the latter case. This can only // happen when the last external reference to an element was // released, and the element was not immediately removed. - ClockOn(h); } ClockHandle::ClockPriority priority = h->GetClockPriority(); @@ -358,6 +360,7 @@ ClockCacheShard::ClockCacheShard( size_t capacity, size_t estimated_value_size, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy) : strict_capacity_limit_(strict_capacity_limit), + detached_usage_(0), table_(capacity, CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)) { set_metadata_charge_policy(metadata_charge_policy); @@ -430,12 +433,16 @@ int ClockCacheShard::CalcHashBits( return FloorLog2((num_entries << 1) - 1); } -void ClockCacheShard::SetCapacity(size_t /*capacity*/) { - assert(false); // Not supported. +void ClockCacheShard::SetCapacity(size_t capacity) { + if (capacity > table_.GetCapacity()) { + assert(false); // Not supported. + } + table_.SetCapacity(capacity); + table_.ClockRun(detached_usage_); } -void ClockCacheShard::SetStrictCapacityLimit(bool /*strict_capacity_limit*/) { - assert(false); // Not supported. +void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { + strict_capacity_limit_ = strict_capacity_limit; } Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, @@ -459,27 +466,32 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, Status s = Status::OK(); + // Use a local copy to minimize cache synchronization. + size_t detached_usage = detached_usage_; + // Free space with the clock policy until enough space is freed or there are // no evictable elements. - table_.ClockRun(tmp.total_charge); + table_.ClockRun(tmp.total_charge + detached_usage); - // occupancy_ and usage_ are contended members across concurrent updates - // on the same shard, so we use a single copy to reduce cache synchronization. + // Use local copies to minimize cache synchronization + // (occupancy_ and usage_ are read and written by all insertions). uint32_t occupancy_local = table_.GetOccupancy(); - size_t usage_local = table_.GetUsage(); - assert(occupancy_local <= table_.GetOccupancyLimit()); - - autovector deleted; - - if ((usage_local + tmp.total_charge > table_.GetCapacity() && - (strict_capacity_limit_ || handle == nullptr)) || - occupancy_local > table_.GetOccupancyLimit()) { + size_t total_usage = table_.GetUsage() + detached_usage; + + // TODO: Currently we support strict_capacity_limit == false as long as the + // number of pinned elements is below table_.GetOccupancyLimit(). We can + // always support it as follows: whenever we exceed this limit, we dynamically + // allocate a handle and return it (when the user provides a handle pointer, + // of course). Then, Release checks whether the handle was dynamically + // allocated, or is stored in the table. + if (total_usage + tmp.total_charge > table_.GetCapacity() && + (strict_capacity_limit_ || handle == nullptr)) { if (handle == nullptr) { // Don't insert the entry but still return ok, as if the entry inserted // into cache and get evicted immediately. - deleted.push_back(tmp); + tmp.FreeData(); } else { - if (occupancy_local > table_.GetOccupancyLimit()) { + if (occupancy_local + 1 > table_.GetOccupancyLimit()) { // TODO: Consider using a distinct status for this case, but usually // it will be handled the same way as reaching charge capacity limit s = Status::MemoryLimit( @@ -491,22 +503,33 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, } } } else { - // Insert into the cache. Note that the cache might get larger than its - // capacity if not enough space was freed up. - ClockHandle* h = table_.Insert(&tmp, &deleted, handle != nullptr); - assert(h != nullptr); // The occupancy is way below the table size, so this - // insertion should never fail. + ClockHandle* h; + if (occupancy_local + 1 > table_.GetOccupancyLimit()) { + // Even if the user wishes to overload the cache, we can't insert into + // the hash table. Instead, we dynamically allocate a new handle. + h = new ClockHandle(); + *h = tmp; + h->SetDetached(); + h->TryExternalRef(); + detached_usage_ += h->total_charge; + // TODO: Return special status? + } else { + // Insert into the cache. Note that the cache might get larger than its + // capacity if not enough space was freed up. + autovector deleted; + h = table_.Insert(&tmp, &deleted, handle != nullptr); + assert(h != nullptr); // The occupancy is way below the table size, so + // this insertion should never fail. + if (deleted.size() > 0) { + s = Status::OkOverwritten(); + } + table_.Free(&deleted); + } if (handle != nullptr) { *handle = reinterpret_cast(h); } - - if (deleted.size() > 0) { - s = Status::OkOverwritten(); - } } - table_.Free(&deleted); - return s; } @@ -516,7 +539,7 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) { bool ClockCacheShard::Ref(Cache::Handle* h) { ClockHandle* e = reinterpret_cast(h); - assert(e->HasExternalRefs()); + assert(e->ExternalRefs() > 0); return e->TryExternalRef(); } @@ -530,6 +553,20 @@ bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) { } ClockHandle* h = reinterpret_cast(handle); + + if (UNLIKELY(h->IsDetached())) { + h->ReleaseExternalRef(); + if (h->TryExclusiveRef()) { + // Only the last reference will succeed. + // Don't bother releasing the exclusive ref. + h->FreeData(); + detached_usage_ -= h->total_charge; + delete h; + return true; + } + return false; + } + uint32_t refs = h->refs; bool last_reference = ((refs & ClockHandle::EXTERNAL_REFS) == 1); bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED; @@ -570,13 +607,14 @@ size_t ClockCacheShard::GetPinnedUsage() const { table_.ConstApplyToEntriesRange( [&clock_usage](ClockHandle* h) { - if (h->HasExternalRefs()) { + if (h->ExternalRefs() > 1) { + // We check > 1 because we are holding an external ref. clock_usage += h->total_charge; } }, 0, table_.GetTableSize(), true); - return clock_usage; + return clock_usage + detached_usage_; } ClockCache::ClockCache(size_t capacity, size_t estimated_value_size, diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 4bded9cad..64eeb6acc 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -9,6 +9,8 @@ #pragma once +#include + #include #include #include @@ -28,6 +30,9 @@ namespace ROCKSDB_NAMESPACE { namespace clock_cache { +// Forward declaration of friend class. +class ClockCacheTest; + // An experimental alternative to LRUCache, using a lock-free, open-addressed // hash table and clock eviction. @@ -63,10 +68,10 @@ namespace clock_cache { // can't be immediately deleted. In these cases, the flag will be later read // and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is // used not only to defer deletions, but also as a barrier for external -// references: once WILL_BE_DELETED is set, lookups (which are the means to -// acquire new external references) will ignore the handle. For this reason, -// when WILL_BE_DELETED is set, we say the handle is invisible (and -// otherwise, that it's visible). +// references: once WILL_BE_DELETED is set, lookups (which are the most +// common way to acquire new external references) will ignore the handle. +// For this reason, when WILL_BE_DELETED is set, we say the handle is +// invisible (and, otherwise, that it's visible). // // // 3. HASHING AND COLLISION RESOLUTION @@ -192,10 +197,10 @@ struct ClockHandle { size_t total_charge; std::array key_data; - static constexpr uint8_t kIsElementOffset = 1; - static constexpr uint8_t kClockPriorityOffset = 2; - static constexpr uint8_t kIsHitOffset = 4; - static constexpr uint8_t kCachePriorityOffset = 5; + static constexpr uint8_t kIsElementOffset = 0; + static constexpr uint8_t kClockPriorityOffset = 1; + static constexpr uint8_t kIsHitOffset = 3; + static constexpr uint8_t kCachePriorityOffset = 4; enum Flags : uint8_t { // Whether the slot is in use by an element. @@ -252,9 +257,8 @@ struct ClockHandle { // Whether a thread has an exclusive reference to the slot. EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30 // Whether the handle will be deleted soon. When this bit is set, new - // internal - // or external references to this handle stop being accepted. - // There is an exception: external references can be created from + // internal references to this handle stop being accepted. + // External references may still be granted---they can be created from // existing external references, or converting from existing internal // references. WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31 @@ -274,6 +278,9 @@ struct ClockHandle { std::atomic refs; + // True iff the handle is allocated separately from hash table. + bool detached; + ClockHandle() : value(nullptr), deleter(nullptr), @@ -281,7 +288,8 @@ struct ClockHandle { total_charge(0), flags(0), displacements(0), - refs(0) { + refs(0), + detached(false) { SetWillBeDeleted(false); SetIsElement(false); SetClockPriority(ClockPriority::NONE); @@ -300,6 +308,7 @@ struct ClockHandle { value = other.value; deleter = other.deleter; key_data = other.key_data; + hash = other.hash; total_charge = other.total_charge; } @@ -350,13 +359,13 @@ struct ClockHandle { // flags functions. - bool IsElement() const { return flags & IS_ELEMENT; } + bool IsElement() const { return flags & Flags::IS_ELEMENT; } void SetIsElement(bool is_element) { if (is_element) { - flags |= IS_ELEMENT; + flags |= Flags::IS_ELEMENT; } else { - flags &= static_cast(~IS_ELEMENT); + flags &= static_cast(~Flags::IS_ELEMENT); } } @@ -400,6 +409,10 @@ struct ClockHandle { flags |= new_priority; } + bool IsDetached() { return detached; } + + void SetDetached() { detached = true; } + inline bool IsEmpty() const { return !this->IsElement() && this->displacements == 0; } @@ -424,7 +437,9 @@ struct ClockHandle { } } - bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; } + uint32_t ExternalRefs() const { + return (refs & EXTERNAL_REFS) >> kExternalRefsOffset; + } // Tries to take an internal ref. Returns true iff it succeeds. inline bool TryInternalRef() { @@ -437,7 +452,7 @@ struct ClockHandle { // Tries to take an external ref. Returns true iff it succeeds. inline bool TryExternalRef() { - if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) { + if (!((refs += kOneExternalRef) & EXCLUSIVE_REF)) { return true; } refs -= kOneExternalRef; @@ -529,8 +544,8 @@ class ClockHandleTable { // Makes h non-evictable. void ClockOff(ClockHandle* h); - // Runs the clock eviction algorithm until there is enough space to - // insert an element with the given charge. + // Runs the clock eviction algorithm until usage_ + charge is at most + // capacity_. void ClockRun(size_t charge); // Remove h from the hash table. Requires an exclusive ref to h. @@ -548,8 +563,6 @@ class ClockHandleTable { RemoveAll(key, hash, probe, deleted); } - void Free(autovector* deleted); - // Tries to remove h from the hash table. If the attempt is successful, // the function hands over an exclusive ref to h. bool TryRemove(ClockHandle* h, autovector* deleted); @@ -558,6 +571,11 @@ class ClockHandleTable { // success. Requires that the caller thread has no shared ref to h. bool SpinTryRemove(ClockHandle* h, autovector* deleted); + // Call this function after an Insert, Remove, RemoveAll, TryRemove + // or SpinTryRemove. It frees the deleted values and updates the hash table + // metadata. + void Free(autovector* deleted); + template void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end, bool apply_if_will_be_deleted) { @@ -579,12 +597,15 @@ class ClockHandleTable { bool apply_if_will_be_deleted) const { for (uint32_t i = index_begin; i < index_end; i++) { ClockHandle* h = &array_[i]; - if (h->TryExclusiveRef()) { + // We take an external ref because we are handing over control + // to a user-defined function, and because the handle will not be + // modified. + if (h->TryExternalRef()) { if (h->IsElement() && (apply_if_will_be_deleted || !h->WillBeDeleted())) { func(h); } - h->ReleaseExclusiveRef(); + h->ReleaseExternalRef(); } } } @@ -601,6 +622,8 @@ class ClockHandleTable { size_t GetCapacity() const { return capacity_; } + void SetCapacity(size_t capacity) { capacity_ = capacity; } + // Returns x mod 2^{length_bits_}. uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; } @@ -652,7 +675,7 @@ class ClockHandleTable { const uint32_t occupancy_limit_; // Maximum total charge of all elements stored in the table. - const size_t capacity_; + size_t capacity_; // We partition the following members into different cache lines // to avoid false sharing among Lookup, Release, Erase and Insert @@ -745,6 +768,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard { private: friend class ClockCache; + friend class ClockCacheTest; // Free some space following strict clock policy until enough space // to hold (usage_ + charge) is freed or there are no evictable elements. @@ -763,6 +787,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard { // Whether to reject insertion if cache reaches its full capacity. std::atomic strict_capacity_limit_; + // Handles allocated separately from the table. + std::atomic detached_usage_; + ClockHandleTable table_; }; // class ClockCacheShard @@ -797,6 +824,7 @@ class ClockCache private: ClockCacheShard* shards_ = nullptr; + int num_shards_; }; // class ClockCache diff --git a/cache/fast_lru_cache.cc b/cache/fast_lru_cache.cc index a425204b9..817f3be18 100644 --- a/cache/fast_lru_cache.cc +++ b/cache/fast_lru_cache.cc @@ -299,10 +299,12 @@ int LRUCacheShard::CalcHashBits( } void LRUCacheShard::SetCapacity(size_t capacity) { - assert(false); // Not supported. TODO(Guido) Support it? autovector last_reference_list; { DMutexLock l(mutex_); + if (capacity > capacity_) { + assert(false); // Not supported. + } capacity_ = capacity; EvictFromLRU(0, &last_reference_list); } diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index e76ba64f6..6295ffa1e 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -207,6 +207,9 @@ TEST_F(LRUCacheTest, EntriesWithPriority) { ValidateLRUList({"e", "f", "g", "Z", "d"}, 2); } +// TODO: FastLRUCache and ClockCache use the same tests. We can probably remove +// them from FastLRUCache after ClockCache becomes productive, and we don't plan +// to use or maintain FastLRUCache any more. namespace fast_lru_cache { // TODO(guido) Replicate LRU policy tests from LRUCache here. @@ -225,11 +228,11 @@ class FastLRUCacheTest : public testing::Test { void NewCache(size_t capacity) { DeleteCache(); - cache_ = reinterpret_cast( - port::cacheline_aligned_alloc(sizeof(fast_lru_cache::LRUCacheShard))); - new (cache_) fast_lru_cache::LRUCacheShard( - capacity, 1 /*estimated_value_size*/, false /*strict_capacity_limit*/, - kDontChargeCacheMetadata); + cache_ = reinterpret_cast( + port::cacheline_aligned_alloc(sizeof(LRUCacheShard))); + new (cache_) LRUCacheShard(capacity, 1 /*estimated_value_size*/, + false /*strict_capacity_limit*/, + kDontChargeCacheMetadata); } Status Insert(const std::string& key) { @@ -243,25 +246,23 @@ class FastLRUCacheTest : public testing::Test { size_t CalcEstimatedHandleChargeWrapper( size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { - return fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge( - estimated_value_size, metadata_charge_policy); + return LRUCacheShard::CalcEstimatedHandleCharge(estimated_value_size, + metadata_charge_policy); } int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { - return fast_lru_cache::LRUCacheShard::CalcHashBits( - capacity, estimated_value_size, metadata_charge_policy); + return LRUCacheShard::CalcHashBits(capacity, estimated_value_size, + metadata_charge_policy); } // Maximum number of items that a shard can hold. double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { - size_t handle_charge = - fast_lru_cache::LRUCacheShard::CalcEstimatedHandleCharge( - estimated_value_size, metadata_charge_policy); - return capacity / (fast_lru_cache::kLoadFactor * handle_charge); + size_t handle_charge = LRUCacheShard::CalcEstimatedHandleCharge( + estimated_value_size, metadata_charge_policy); + return capacity / (kLoadFactor * handle_charge); } - bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) { if (hash_bits == 0) { return max_occupancy <= 1; @@ -272,7 +273,7 @@ class FastLRUCacheTest : public testing::Test { } private: - fast_lru_cache::LRUCacheShard* cache_ = nullptr; + LRUCacheShard* cache_ = nullptr; }; TEST_F(FastLRUCacheTest, ValidateKeySize) { @@ -292,7 +293,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { double max_occupancy; int hash_bits; CacheMetadataChargePolicy metadata_charge_policy; - // Vary the cache capacity, fix the element charge. for (int i = 0; i < 2048; i++) { capacity = i; @@ -304,7 +304,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { metadata_charge_policy); EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy)); } - // Fix the cache capacity, vary the element charge. for (int i = 0; i < 1024; i++) { capacity = 1024; @@ -316,7 +315,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { metadata_charge_policy); EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy)); } - // Zero-capacity cache, and only values have charge. capacity = 0; estimated_value_size = 1; @@ -324,7 +322,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, metadata_charge_policy); EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); - // Zero-capacity cache, and only metadata has charge. capacity = 0; estimated_value_size = 0; @@ -332,7 +329,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, metadata_charge_policy); EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); - // Small cache, large elements. capacity = 1024; estimated_value_size = 8192; @@ -340,7 +336,6 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, metadata_charge_policy); EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); - // Large capacity. capacity = 31924172; estimated_value_size = 8192; @@ -402,37 +397,38 @@ class ClockCacheTest : public testing::Test { void Erase(const std::string& key) { shard_->Erase(key, 0 /*hash*/); } - // void ValidateLRUList(std::vector keys, - // size_t num_high_pri_pool_keys = 0) { - // LRUHandle* lru; - // LRUHandle* lru_low_pri; - // cache_->TEST_GetLRUList(&lru, &lru_low_pri); - // LRUHandle* iter = lru; - // bool in_high_pri_pool = false; - // size_t high_pri_pool_keys = 0; - // if (iter == lru_low_pri) { - // in_high_pri_pool = true; - // } - // for (const auto& key : keys) { - // iter = iter->next; - // ASSERT_NE(lru, iter); - // ASSERT_EQ(key, iter->key().ToString()); - // ASSERT_EQ(in_high_pri_pool, iter->InHighPriPool()); - // if (in_high_pri_pool) { - // high_pri_pool_keys++; - // } - // if (iter == lru_low_pri) { - // ASSERT_FALSE(in_high_pri_pool); - // in_high_pri_pool = true; - // } - // } - // ASSERT_EQ(lru, iter->next); - // ASSERT_TRUE(in_high_pri_pool); - // ASSERT_EQ(num_high_pri_pool_keys, high_pri_pool_keys); - // } + size_t CalcEstimatedHandleChargeWrapper( + size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy) { + return ClockCacheShard::CalcEstimatedHandleCharge(estimated_value_size, + metadata_charge_policy); + } + + int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy) { + return ClockCacheShard::CalcHashBits(capacity, estimated_value_size, + metadata_charge_policy); + } + + // Maximum number of items that a shard can hold. + double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy) { + size_t handle_charge = ClockCacheShard::CalcEstimatedHandleCharge( + estimated_value_size, metadata_charge_policy); + return capacity / (kLoadFactor * handle_charge); + } + + bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) { + if (hash_bits == 0) { + return max_occupancy <= 1; + } else { + return (1 << hash_bits >= max_occupancy) && + (1 << (hash_bits - 1) <= max_occupancy); + } + } private: - clock_cache::ClockCacheShard* shard_ = nullptr; + ClockCacheShard* shard_ = nullptr; }; TEST_F(ClockCacheTest, Validate) { @@ -447,31 +443,89 @@ TEST_F(ClockCacheTest, Validate) { } TEST_F(ClockCacheTest, ClockPriorityTest) { - clock_cache::ClockHandle handle; - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::NONE); - handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::HIGH); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::HIGH); + ClockHandle handle; + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE); + handle.SetClockPriority(ClockHandle::ClockPriority::HIGH); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::HIGH); handle.DecreaseClockPriority(); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::MEDIUM); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM); handle.DecreaseClockPriority(); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::LOW); - handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::MEDIUM); - handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::NONE); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::NONE); - handle.SetClockPriority(clock_cache::ClockHandle::ClockPriority::MEDIUM); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::MEDIUM); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::LOW); + handle.SetClockPriority(ClockHandle::ClockPriority::MEDIUM); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM); + handle.SetClockPriority(ClockHandle::ClockPriority::NONE); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE); + handle.SetClockPriority(ClockHandle::ClockPriority::MEDIUM); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::MEDIUM); handle.DecreaseClockPriority(); handle.DecreaseClockPriority(); - EXPECT_EQ(handle.GetClockPriority(), - clock_cache::ClockHandle::ClockPriority::NONE); + EXPECT_EQ(handle.GetClockPriority(), ClockHandle::ClockPriority::NONE); +} + +TEST_F(ClockCacheTest, CalcHashBitsTest) { + size_t capacity; + size_t estimated_value_size; + double max_occupancy; + int hash_bits; + CacheMetadataChargePolicy metadata_charge_policy; + + // Vary the cache capacity, fix the element charge. + for (int i = 0; i < 2048; i++) { + capacity = i; + estimated_value_size = 0; + metadata_charge_policy = kFullChargeCacheMetadata; + max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size, + metadata_charge_policy); + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy)); + } + + // Fix the cache capacity, vary the element charge. + for (int i = 0; i < 1024; i++) { + capacity = 1024; + estimated_value_size = i; + metadata_charge_policy = kFullChargeCacheMetadata; + max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size, + metadata_charge_policy); + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy)); + } + + // Zero-capacity cache, and only values have charge. + capacity = 0; + estimated_value_size = 1; + metadata_charge_policy = kDontChargeCacheMetadata; + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); + + // Zero-capacity cache, and only metadata has charge. + capacity = 0; + estimated_value_size = 0; + metadata_charge_policy = kFullChargeCacheMetadata; + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); + + // Small cache, large elements. + capacity = 1024; + estimated_value_size = 8192; + metadata_charge_policy = kFullChargeCacheMetadata; + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */)); + + // Large capacity. + capacity = 31924172; + estimated_value_size = 8192; + metadata_charge_policy = kFullChargeCacheMetadata; + max_occupancy = + CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy); + hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size, + metadata_charge_policy); + EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy)); } } // namespace clock_cache diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index dbb8bc9e9..6edcc429a 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -116,8 +116,8 @@ default_params = { "use_direct_reads": lambda: random.randint(0, 1), "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "mock_direct_io": False, - "cache_type": "lru_cache", # fast_lru_cache and clock_cache are currently incompatible - # with stress tests, because they use strict_capacity_limit = false + "cache_type": lambda: random.choice(["lru_cache", "clock_cache"]), + # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false. "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), # 999 -> use Bloom API