From c3c84b3397a0eaa6450340ecea3b267c0e6c1f3c Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 12 Jul 2023 14:05:34 -0700 Subject: [PATCH] Refactor (Hyper)ClockCache code for upcoming changes (#11572) Summary: Separate out some functionality that will be common to both static and dynamic HCC into BaseClockTable. Table::InsertState and GrowIfNeeded will be used by the dynamic HCC so don't make much sense right now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11572 Test Plan: existing tests. No functional changes intended. Performance test in subsequent PR https://github.com/facebook/rocksdb/issues/11601 Reviewed By: jowlyzhang Differential Revision: D47110496 Pulled By: pdillinger fbshipit-source-id: 379bd433322a42ea28c0043b41ec24956d21e7aa --- cache/clock_cache.cc | 614 ++++++++++++++++++++++++------------------- cache/clock_cache.h | 230 +++++++++------- 2 files changed, 483 insertions(+), 361 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 06cc27a8e..6b1caabfa 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -9,6 +9,7 @@ #include "cache/clock_cache.h" +#include #include #include @@ -118,74 +119,6 @@ inline bool ClockUpdate(ClockHandle& h) { } } -} // namespace - -void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const { - if (helper->del_cb) { - helper->del_cb(value, allocator); - } -} - -HyperClockTable::HyperClockTable( - size_t capacity, bool /*strict_capacity_limit*/, - CacheMetadataChargePolicy metadata_charge_policy, - MemoryAllocator* allocator, - const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, - const Opts& opts) - : length_bits_(CalcHashBits(capacity, opts.estimated_value_size, - metadata_charge_policy)), - length_bits_mask_((size_t{1} << length_bits_) - 1), - occupancy_limit_(static_cast((uint64_t{1} << length_bits_) * - kStrictLoadFactor)), - array_(new HandleImpl[size_t{1} << length_bits_]), - allocator_(allocator), - eviction_callback_(*eviction_callback), - hash_seed_(*hash_seed) { - if (metadata_charge_policy == - CacheMetadataChargePolicy::kFullChargeCacheMetadata) { - usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); - } - - static_assert(sizeof(HandleImpl) == 64U, - "Expecting size / alignment with common cache line size"); -} - -HyperClockTable::~HyperClockTable() { - // Assumes there are no references or active operations on any slot/element - // in the table. - for (size_t i = 0; i < GetTableSize(); i++) { - HandleImpl& h = array_[i]; - switch (h.meta >> ClockHandle::kStateShift) { - case ClockHandle::kStateEmpty: - // noop - break; - case ClockHandle::kStateInvisible: // rare but possible - case ClockHandle::kStateVisible: - assert(GetRefcount(h.meta) == 0); - h.FreeData(allocator_); -#ifndef NDEBUG - Rollback(h.hashed_key, &h); - ReclaimEntryUsage(h.GetTotalCharge()); -#endif - break; - // otherwise - default: - assert(false); - break; - } - } - -#ifndef NDEBUG - for (size_t i = 0; i < GetTableSize(); i++) { - assert(array_[i].displacements.load() == 0); - } -#endif - - assert(usage_.load() == 0 || - usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); - assert(occupancy_ == 0); -} - // If an entry doesn't receive clock updates but is repeatedly referenced & // released, the acquire and release counters could overflow without some // intervention. This is that intervention, which should be inexpensive @@ -259,8 +192,170 @@ inline void CorrectNearOverflow(uint64_t old_meta, } } -inline Status HyperClockTable::ChargeUsageMaybeEvictStrict( - size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { +inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, + uint64_t initial_countdown, bool* already_matches) { + assert(*already_matches == false); + + // Optimistically transition the slot from "empty" to + // "under construction" (no effect on other states) + uint64_t old_meta = h.meta.fetch_or( + uint64_t{ClockHandle::kStateOccupiedBit} << ClockHandle::kStateShift, + std::memory_order_acq_rel); + uint64_t old_state = old_meta >> ClockHandle::kStateShift; + + if (old_state == ClockHandle::kStateEmpty) { + // We've started inserting into an available slot, and taken + // ownership. + return true; + } else if (old_state != ClockHandle::kStateVisible) { + // Slot not usable / touchable now + return false; + } + // Existing, visible entry, which might be a match. + // But first, we need to acquire a ref to read it. In fact, number of + // refs for initial countdown, so that we boost the clock state if + // this is a match. + old_meta = + h.meta.fetch_add(ClockHandle::kAcquireIncrement * initial_countdown, + std::memory_order_acq_rel); + // Like Lookup + if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) { + // Acquired a read reference + if (h.hashed_key == proto.hashed_key) { + // Match. Release in a way that boosts the clock state + old_meta = + h.meta.fetch_add(ClockHandle::kReleaseIncrement * initial_countdown, + std::memory_order_acq_rel); + // Correct for possible (but rare) overflow + CorrectNearOverflow(old_meta, h.meta); + // Insert detached instead (only if return handle needed) + *already_matches = true; + return false; + } else { + // Mismatch. Pretend we never took the reference + old_meta = + h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown, + std::memory_order_acq_rel); + } + } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) == + ClockHandle::kStateInvisible)) { + // Pretend we never took the reference + // WART/FIXME?: there's a tiny chance we release last ref to invisible + // entry here. If that happens, we let eviction take care of it. + old_meta = + h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown, + std::memory_order_acq_rel); + } else { + // For other states, incrementing the acquire counter has no effect + // so we don't need to undo it. + // Slot not usable / touchable now. + } + (void)old_meta; + return false; +} + +inline void FinishSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, + uint64_t initial_countdown, bool keep_ref) { + // Save data fields + ClockHandleBasicData* h_alias = &h; + *h_alias = proto; + + // Transition from "under construction" state to "visible" state + uint64_t new_meta = uint64_t{ClockHandle::kStateVisible} + << ClockHandle::kStateShift; + + // Maybe with an outstanding reference + new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift; + new_meta |= (initial_countdown - keep_ref) + << ClockHandle::kReleaseCounterShift; + +#ifndef NDEBUG + // Save the state transition, with assertion + uint64_t old_meta = h.meta.exchange(new_meta, std::memory_order_release); + assert(old_meta >> ClockHandle::kStateShift == + ClockHandle::kStateConstruction); +#else + // Save the state transition + h.meta.store(new_meta, std::memory_order_release); +#endif +} + +bool TryInsert(const ClockHandleBasicData& proto, ClockHandle& h, + uint64_t initial_countdown, bool keep_ref, + bool* already_matches) { + bool b = BeginSlotInsert(proto, h, initial_countdown, already_matches); + if (b) { + FinishSlotInsert(proto, h, initial_countdown, keep_ref); + } + return b; +} + +} // namespace + +void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const { + if (helper->del_cb) { + helper->del_cb(value, allocator); + } +} + +template +HandleImpl* BaseClockTable::StandaloneInsert( + const ClockHandleBasicData& proto) { + // Heap allocated separate from table + HandleImpl* h = new HandleImpl(); + ClockHandleBasicData* h_alias = h; + *h_alias = proto; + h->SetStandalone(); + // Single reference (standalone entries only created if returning a refed + // Handle back to user) + uint64_t meta = uint64_t{ClockHandle::kStateInvisible} + << ClockHandle::kStateShift; + meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift; + h->meta.store(meta, std::memory_order_release); + // Keep track of how much of usage is standalone + standalone_usage_.fetch_add(proto.GetTotalCharge(), + std::memory_order_relaxed); + return h; +} + +template +typename Table::HandleImpl* BaseClockTable::CreateStandalone( + ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit, + bool allow_uncharged) { + Table& derived = static_cast(*this); + typename Table::InsertState state; + derived.StartInsert(state); + + const size_t total_charge = proto.GetTotalCharge(); + if (strict_capacity_limit) { + Status s = ChargeUsageMaybeEvictStrict( + total_charge, capacity, + /*need_evict_for_occupancy=*/false, state); + if (!s.ok()) { + if (allow_uncharged) { + proto.total_charge = 0; + } else { + return nullptr; + } + } + } else { + // Case strict_capacity_limit == false + bool success = ChargeUsageMaybeEvictNonStrict
( + total_charge, capacity, + /*need_evict_for_occupancy=*/false, state); + if (!success) { + // Force the issue + usage_.fetch_add(total_charge, std::memory_order_relaxed); + } + } + + return StandaloneInsert(proto); +} + +template +Status BaseClockTable::ChargeUsageMaybeEvictStrict( + size_t total_charge, size_t capacity, bool need_evict_for_occupancy, + typename Table::InsertState& state) { if (total_charge > capacity) { return Status::MemoryLimit( "Cache entry too large for a single cache shard: " + @@ -287,7 +382,8 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict( if (request_evict_charge > 0) { size_t evicted_charge = 0; size_t evicted_count = 0; - Evict(request_evict_charge, &evicted_charge, &evicted_count); + static_cast(this)->Evict(request_evict_charge, &evicted_charge, + &evicted_count, state); occupancy_.fetch_sub(evicted_count, std::memory_order_release); if (LIKELY(evicted_charge > need_evict_charge)) { assert(evicted_count > 0); @@ -316,8 +412,10 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict( return Status::OK(); } -inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict( - size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { +template +inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( + size_t total_charge, size_t capacity, bool need_evict_for_occupancy, + typename Table::InsertState& state) { // For simplicity, we consider that either the cache can accept the insert // with no evictions, or we must evict enough to make (at least) enough // space. It could lead to unnecessary failures or excessive evictions in @@ -354,7 +452,8 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict( size_t evicted_charge = 0; size_t evicted_count = 0; if (need_evict_charge > 0) { - Evict(need_evict_charge, &evicted_charge, &evicted_count); + static_cast(this)->Evict(need_evict_charge, &evicted_charge, + &evicted_count, state); // Deal with potential occupancy deficit if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) { assert(evicted_charge == 0); @@ -373,28 +472,17 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict( return true; } -inline HyperClockTable::HandleImpl* HyperClockTable::StandaloneInsert( - const ClockHandleBasicData& proto) { - // Heap allocated separate from table - HandleImpl* h = new HandleImpl(); - ClockHandleBasicData* h_alias = h; - *h_alias = proto; - h->SetStandalone(); - // Single reference (standalone entries only created if returning a refed - // Handle back to user) - uint64_t meta = uint64_t{ClockHandle::kStateInvisible} - << ClockHandle::kStateShift; - meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift; - h->meta.store(meta, std::memory_order_release); - // Keep track of how much of usage is standalone - standalone_usage_.fetch_add(proto.GetTotalCharge(), - std::memory_order_relaxed); - return h; -} +template +Status BaseClockTable::Insert(const ClockHandleBasicData& proto, + typename Table::HandleImpl** handle, + Cache::Priority priority, size_t capacity, + bool strict_capacity_limit) { + using HandleImpl = typename Table::HandleImpl; + Table& derived = static_cast(*this); + + typename Table::InsertState state; + derived.StartInsert(state); -Status HyperClockTable::Insert(const ClockHandleBasicData& proto, - HandleImpl** handle, Cache::Priority priority, - size_t capacity, bool strict_capacity_limit) { // Do we have the available occupancy? Optimistically assume we do // and deal with it if we don't. size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire); @@ -402,23 +490,24 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto, occupancy_.fetch_sub(1, std::memory_order_relaxed); }; // Whether we over-committed and need an eviction to make up for it - bool need_evict_for_occupancy = old_occupancy >= occupancy_limit_; + bool need_evict_for_occupancy = + !derived.GrowIfNeeded(old_occupancy + 1, state); // Usage/capacity handling is somewhat different depending on // strict_capacity_limit, but mostly pessimistic. bool use_standalone_insert = false; const size_t total_charge = proto.GetTotalCharge(); if (strict_capacity_limit) { - Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity, - need_evict_for_occupancy); + Status s = ChargeUsageMaybeEvictStrict
( + total_charge, capacity, need_evict_for_occupancy, state); if (!s.ok()) { revert_occupancy_fn(); return s; } } else { // Case strict_capacity_limit == false - bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity, - need_evict_for_occupancy); + bool success = ChargeUsageMaybeEvictNonStrict
( + total_charge, capacity, need_evict_for_occupancy, state); if (!success) { revert_occupancy_fn(); if (handle == nullptr) { @@ -451,115 +540,17 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto, uint64_t initial_countdown = GetInitialCountdown(priority); assert(initial_countdown > 0); - size_t probe = 0; - HandleImpl* e = FindSlot( - proto.hashed_key, - [&](HandleImpl* h) { - // Optimistically transition the slot from "empty" to - // "under construction" (no effect on other states) - uint64_t old_meta = - h->meta.fetch_or(uint64_t{ClockHandle::kStateOccupiedBit} - << ClockHandle::kStateShift, - std::memory_order_acq_rel); - uint64_t old_state = old_meta >> ClockHandle::kStateShift; - - if (old_state == ClockHandle::kStateEmpty) { - // We've started inserting into an available slot, and taken - // ownership Save data fields - ClockHandleBasicData* h_alias = h; - *h_alias = proto; - - // Transition from "under construction" state to "visible" state - uint64_t new_meta = uint64_t{ClockHandle::kStateVisible} - << ClockHandle::kStateShift; - - // Maybe with an outstanding reference - new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift; - new_meta |= (initial_countdown - (handle != nullptr)) - << ClockHandle::kReleaseCounterShift; + HandleImpl* e = + derived.DoInsert(proto, initial_countdown, handle != nullptr, state); -#ifndef NDEBUG - // Save the state transition, with assertion - old_meta = h->meta.exchange(new_meta, std::memory_order_release); - assert(old_meta >> ClockHandle::kStateShift == - ClockHandle::kStateConstruction); -#else - // Save the state transition - h->meta.store(new_meta, std::memory_order_release); -#endif - return true; - } else if (old_state != ClockHandle::kStateVisible) { - // Slot not usable / touchable now - return false; - } - // Existing, visible entry, which might be a match. - // But first, we need to acquire a ref to read it. In fact, number of - // refs for initial countdown, so that we boost the clock state if - // this is a match. - old_meta = h->meta.fetch_add( - ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); - // Like Lookup - if ((old_meta >> ClockHandle::kStateShift) == - ClockHandle::kStateVisible) { - // Acquired a read reference - if (h->hashed_key == proto.hashed_key) { - // Match. Release in a way that boosts the clock state - old_meta = h->meta.fetch_add( - ClockHandle::kReleaseIncrement * initial_countdown, - std::memory_order_acq_rel); - // Correct for possible (but rare) overflow - CorrectNearOverflow(old_meta, h->meta); - // Insert standalone instead (only if return handle needed) - use_standalone_insert = true; - return true; - } else { - // Mismatch. Pretend we never took the reference - old_meta = h->meta.fetch_sub( - ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); - } - } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) == - ClockHandle::kStateInvisible)) { - // Pretend we never took the reference - // WART: there's a tiny chance we release last ref to invisible - // entry here. If that happens, we let eviction take care of it. - old_meta = h->meta.fetch_sub( - ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); - } else { - // For other states, incrementing the acquire counter has no effect - // so we don't need to undo it. - // Slot not usable / touchable now. - } - (void)old_meta; - return false; - }, - [&](HandleImpl* /*h*/) { return false; }, - [&](HandleImpl* h) { - h->displacements.fetch_add(1, std::memory_order_relaxed); - }, - probe); - if (e == nullptr) { - // Occupancy check and never abort FindSlot above should generally - // prevent this, except it's theoretically possible for other threads - // to evict and replace entries in the right order to hit every slot - // when it is populated. Assuming random hashing, the chance of that - // should be no higher than pow(kStrictLoadFactor, n) for n slots. - // That should be infeasible for roughly n >= 256, so if this assertion - // fails, that suggests something is going wrong. - assert(GetTableSize() < 256); - use_standalone_insert = true; - } - if (!use_standalone_insert) { + if (e) { // Successfully inserted if (handle) { *handle = e; } return Status::OK(); } - // Roll back table insertion - Rollback(proto.hashed_key, e); + // Not inserted revert_occupancy_fn(); // Maybe fall back on standalone insert if (handle == nullptr) { @@ -568,12 +559,14 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto, proto.FreeData(allocator_); return Status::OK(); } + + use_standalone_insert = true; } // Run standalone insert assert(use_standalone_insert); - *handle = StandaloneInsert(proto); + *handle = StandaloneInsert(proto); // The OkOverwritten status is used to count "redundant" insertions into // block cache. This implementation doesn't strictly check for redundant @@ -583,32 +576,146 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto, return Status::OkOverwritten(); } -HyperClockTable::HandleImpl* HyperClockTable::CreateStandalone( - ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit, - bool allow_uncharged) { - const size_t total_charge = proto.GetTotalCharge(); - if (strict_capacity_limit) { - Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity, - /*need_evict_for_occupancy=*/false); - if (!s.ok()) { - if (allow_uncharged) { - proto.total_charge = 0; - } else { - return nullptr; - } - } - } else { - // Case strict_capacity_limit == false - bool success = - ChargeUsageMaybeEvictNonStrict(total_charge, capacity, - /*need_evict_for_occupancy=*/false); - if (!success) { - // Force the issue - usage_.fetch_add(total_charge, std::memory_order_relaxed); +void BaseClockTable::Ref(ClockHandle& h) { + // Increment acquire counter + uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, + std::memory_order_acquire); + + assert((old_meta >> ClockHandle::kStateShift) & + ClockHandle::kStateShareableBit); + // Must have already had a reference + assert(GetRefcount(old_meta) > 0); + (void)old_meta; +} + +#ifndef NDEBUG +void BaseClockTable::TEST_RefN(ClockHandle& h, size_t n) { + // Increment acquire counter + uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement, + std::memory_order_acquire); + + assert((old_meta >> ClockHandle::kStateShift) & + ClockHandle::kStateShareableBit); + (void)old_meta; +} + +void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) { + assert(n > 0); + + // Like n-1 Releases, but assumes one more will happen in the caller to take + // care of anything like erasing an unreferenced, invisible entry. + uint64_t old_meta = h->meta.fetch_add( + (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire); + assert((old_meta >> ClockHandle::kStateShift) & + ClockHandle::kStateShareableBit); + (void)old_meta; +} +#endif + +HyperClockTable::HyperClockTable( + size_t capacity, bool /*strict_capacity_limit*/, + CacheMetadataChargePolicy metadata_charge_policy, + MemoryAllocator* allocator, + const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, + const Opts& opts) + : BaseClockTable(metadata_charge_policy, allocator, eviction_callback, + hash_seed), + length_bits_(CalcHashBits(capacity, opts.estimated_value_size, + metadata_charge_policy)), + length_bits_mask_((size_t{1} << length_bits_) - 1), + occupancy_limit_(static_cast((uint64_t{1} << length_bits_) * + kStrictLoadFactor)), + array_(new HandleImpl[size_t{1} << length_bits_]) { + if (metadata_charge_policy == + CacheMetadataChargePolicy::kFullChargeCacheMetadata) { + usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); + } + + static_assert(sizeof(HandleImpl) == 64U, + "Expecting size / alignment with common cache line size"); +} + +HyperClockTable::~HyperClockTable() { + // Assumes there are no references or active operations on any slot/element + // in the table. + for (size_t i = 0; i < GetTableSize(); i++) { + HandleImpl& h = array_[i]; + switch (h.meta >> ClockHandle::kStateShift) { + case ClockHandle::kStateEmpty: + // noop + break; + case ClockHandle::kStateInvisible: // rare but possible + case ClockHandle::kStateVisible: + assert(GetRefcount(h.meta) == 0); + h.FreeData(allocator_); +#ifndef NDEBUG + Rollback(h.hashed_key, &h); + ReclaimEntryUsage(h.GetTotalCharge()); +#endif + break; + // otherwise + default: + assert(false); + break; } } - return StandaloneInsert(proto); +#ifndef NDEBUG + for (size_t i = 0; i < GetTableSize(); i++) { + assert(array_[i].displacements.load() == 0); + } +#endif + + assert(usage_.load() == 0 || + usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); + assert(occupancy_ == 0); +} + +void HyperClockTable::StartInsert(InsertState&) {} + +bool HyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) { + return new_occupancy <= occupancy_limit_; +} + +HyperClockTable::HandleImpl* HyperClockTable::DoInsert( + const ClockHandleBasicData& proto, uint64_t initial_countdown, + bool keep_ref, InsertState&) { + size_t probe = 0; + bool already_matches = false; + HandleImpl* e = FindSlot( + proto.hashed_key, + [&](HandleImpl* h) { + // FIXME: simplify and handle in abort_fn below? + bool inserted = + TryInsert(proto, *h, initial_countdown, keep_ref, &already_matches); + return inserted || already_matches; + }, + [&](HandleImpl* /*h*/) { return false; }, + [&](HandleImpl* h) { + h->displacements.fetch_add(1, std::memory_order_relaxed); + }, + probe); + if (e == nullptr) { + // Occupancy check and never abort FindSlot above should generally + // prevent this, except it's theoretically possible for other threads + // to evict and replace entries in the right order to hit every slot + // when it is populated. Assuming random hashing, the chance of that + // should be no higher than pow(kStrictLoadFactor, n) for n slots. + // That should be infeasible for roughly n >= 256, so if this assertion + // fails, that suggests something is going wrong. + assert(GetTableSize() < 256); + // WART/FIXME: need to roll back every slot + already_matches = true; + } + if (!already_matches) { + // Successfully inserted + assert(e); + return e; + } + // Roll back displacements from failed table insertion + Rollback(proto.hashed_key, e); + // Insertion skipped + return nullptr; } HyperClockTable::HandleImpl* HyperClockTable::Lookup( @@ -753,40 +860,17 @@ bool HyperClockTable::Release(HandleImpl* h, bool useful, } } -void HyperClockTable::Ref(HandleImpl& h) { - // Increment acquire counter - uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); - - assert((old_meta >> ClockHandle::kStateShift) & - ClockHandle::kStateShareableBit); - // Must have already had a reference - assert(GetRefcount(old_meta) > 0); - (void)old_meta; -} - -void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) { - // Increment acquire counter - uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement, - std::memory_order_acquire); - - assert((old_meta >> ClockHandle::kStateShift) & - ClockHandle::kStateShareableBit); - (void)old_meta; -} - +#ifndef NDEBUG void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { if (n > 0) { - // Split into n - 1 and 1 steps. - uint64_t old_meta = h->meta.fetch_add( - (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire); - assert((old_meta >> ClockHandle::kStateShift) & - ClockHandle::kStateShareableBit); - (void)old_meta; + // Do n-1 simple releases first + TEST_ReleaseNMinus1(h, n); + // Then the last release might be more involved Release(h, /*useful*/ true, /*erase_if_last_ref*/ false); } } +#endif void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { size_t probe = 0; @@ -978,7 +1062,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) { } inline void HyperClockTable::Evict(size_t requested_charge, - size_t* freed_charge, size_t* freed_count) { + size_t* freed_charge, size_t* freed_count, + InsertState&) { // precondition assert(requested_charge > 0); @@ -1146,18 +1231,15 @@ Status ClockCacheShard
::Insert(const Slice& key, proto.value = value; proto.helper = helper; proto.total_charge = charge; - return table_.Insert(proto, handle, priority, - capacity_.load(std::memory_order_relaxed), - strict_capacity_limit_.load(std::memory_order_relaxed)); + return table_.template Insert
( + proto, handle, priority, capacity_.load(std::memory_order_relaxed), + strict_capacity_limit_.load(std::memory_order_relaxed)); } template -typename ClockCacheShard
::HandleImpl* -ClockCacheShard
::CreateStandalone(const Slice& key, - const UniqueId64x2& hashed_key, - Cache::ObjectPtr obj, - const Cache::CacheItemHelper* helper, - size_t charge, bool allow_uncharged) { +typename Table::HandleImpl* ClockCacheShard
::CreateStandalone( + const Slice& key, const UniqueId64x2& hashed_key, Cache::ObjectPtr obj, + const Cache::CacheItemHelper* helper, size_t charge, bool allow_uncharged) { if (UNLIKELY(key.size() != kCacheKeySize)) { return nullptr; } @@ -1166,7 +1248,7 @@ ClockCacheShard
::CreateStandalone(const Slice& key, proto.value = obj; proto.helper = helper; proto.total_charge = charge; - return table_.CreateStandalone( + return table_.template CreateStandalone
( proto, capacity_.load(std::memory_order_relaxed), strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged); } @@ -1198,6 +1280,7 @@ bool ClockCacheShard
::Release(HandleImpl* handle, bool useful, return table_.Release(handle, useful, erase_if_last_ref); } +#ifndef NDEBUG template void ClockCacheShard
::TEST_RefN(HandleImpl* h, size_t n) { table_.TEST_RefN(*h, n); @@ -1207,6 +1290,7 @@ template void ClockCacheShard
::TEST_ReleaseN(HandleImpl* h, size_t n) { table_.TEST_ReleaseN(h, n); } +#endif template bool ClockCacheShard
::Release(HandleImpl* handle, diff --git a/cache/clock_cache.h b/cache/clock_cache.h index b2578b467..fff3ef43d 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -374,11 +374,123 @@ struct ClockHandle : public ClockHandleBasicData { // See above std::atomic meta{}; - // Anticipating use for SecondaryCache support - void* reserved_for_future_use = nullptr; + // Whether this is a "deteched" handle that is independently allocated + // with `new` (so must be deleted with `delete`). + // TODO: ideally this would be packed into some other data field, such + // as upper bits of total_charge, but that incurs a measurable performance + // regression. + bool standalone = false; + + inline bool IsStandalone() const { return standalone; } + + inline void SetStandalone() { standalone = true; } }; // struct ClockHandle -class HyperClockTable { +class BaseClockTable { + public: + BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy, + MemoryAllocator* allocator, + const Cache::EvictionCallback* eviction_callback, + const uint32_t* hash_seed) + : metadata_charge_policy_(metadata_charge_policy), + allocator_(allocator), + eviction_callback_(*eviction_callback), + hash_seed_(*hash_seed) {} + + // Creates a "standalone" handle for returning from an Insert operation that + // cannot be completed by actually inserting into the table. + // Updates `standalone_usage_` but not `usage_` nor `occupancy_`. + template + HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto); + + template + typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto, + size_t capacity, + bool strict_capacity_limit, + bool allow_uncharged); + + // Helper for updating `usage_` for new entry with given `total_charge` + // and evicting if needed under strict_capacity_limit=true rules. This + // means the operation might fail with Status::MemoryLimit. If + // `need_evict_for_occupancy`, then eviction of at least one entry is + // required, and the operation should fail if not possible. + // NOTE: Otherwise, occupancy_ is not managed in this function + template + Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity, + bool need_evict_for_occupancy, + typename Table::InsertState& state); + + // Helper for updating `usage_` for new entry with given `total_charge` + // and evicting if needed under strict_capacity_limit=false rules. This + // means that updating `usage_` always succeeds even if forced to exceed + // capacity. If `need_evict_for_occupancy`, then eviction of at least one + // entry is required, and the operation should return false if such eviction + // is not possible. `usage_` is not updated in that case. Otherwise, returns + // true, indicating success. + // NOTE: occupancy_ is not managed in this function + template + bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity, + bool need_evict_for_occupancy, + typename Table::InsertState& state); + + template + Status Insert(const ClockHandleBasicData& proto, + typename Table::HandleImpl** handle, Cache::Priority priority, + size_t capacity, bool strict_capacity_limit); + + void Ref(ClockHandle& handle); + + size_t GetOccupancy() const { + return occupancy_.load(std::memory_order_relaxed); + } + + size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); } + + size_t GetStandaloneUsage() const { + return standalone_usage_.load(std::memory_order_relaxed); + } + + uint32_t GetHashSeed() const { return hash_seed_; } + +#ifndef NDEBUG + // Acquire N references + void TEST_RefN(ClockHandle& handle, size_t n); + // Helper for TEST_ReleaseN + void TEST_ReleaseNMinus1(ClockHandle* handle, size_t n); +#endif + + protected: + // We partition the following members into different cache lines + // to avoid false sharing among Lookup, Release, Erase and Insert + // operations in ClockCacheShard. + + // Clock algorithm sweep pointer. + std::atomic clock_pointer_{}; + + ALIGN_AS(CACHE_LINE_SIZE) + // Number of elements in the table. + std::atomic occupancy_{}; + + // Memory usage by entries tracked by the cache (including standalone) + std::atomic usage_{}; + + // Part of usage by standalone entries (not in table) + std::atomic standalone_usage_{}; + + ALIGN_AS(CACHE_LINE_SIZE) + const CacheMetadataChargePolicy metadata_charge_policy_; + + // From Cache, for deleter + MemoryAllocator* const allocator_; + + // A reference to Cache::eviction_callback_ + const Cache::EvictionCallback& eviction_callback_; + + // A reference to ShardedCacheBase::hash_seed_ + const uint32_t& hash_seed_; +}; + +class HyperClockTable : public BaseClockTable { public: // Target size to be exactly a common cache line size (see static_assert in // clock_cache.cc) @@ -387,16 +499,6 @@ class HyperClockTable { // up in this slot or a higher one. std::atomic displacements{}; - // Whether this is a "deteched" handle that is independently allocated - // with `new` (so must be deleted with `delete`). - // TODO: ideally this would be packed into some other data field, such - // as upper bits of total_charge, but that incurs a measurable performance - // regression. - bool standalone = false; - - inline bool IsStandalone() const { return standalone; } - - inline void SetStandalone() { standalone = true; } }; // struct HandleImpl struct Opts { @@ -410,20 +512,28 @@ class HyperClockTable { const uint32_t* hash_seed, const Opts& opts); ~HyperClockTable(); - Status Insert(const ClockHandleBasicData& proto, HandleImpl** handle, - Cache::Priority priority, size_t capacity, - bool strict_capacity_limit); + // For BaseClockTable::Insert + struct InsertState {}; + + void StartInsert(InsertState& state); - HandleImpl* CreateStandalone(ClockHandleBasicData& proto, size_t capacity, - bool strict_capacity_limit, - bool allow_uncharged); + // Returns true iff there is room for the proposed number of entries. + bool GrowIfNeeded(size_t new_occupancy, InsertState& state); + + HandleImpl* DoInsert(const ClockHandleBasicData& proto, + uint64_t initial_countdown, bool take_ref, + InsertState& state); + + // Runs the clock eviction algorithm trying to reclaim at least + // requested_charge. Returns how much is evicted, which could be less + // if it appears impossible to evict the requested amount without blocking. + void Evict(size_t requested_charge, size_t* freed_charge, size_t* freed_count, + InsertState& state); HandleImpl* Lookup(const UniqueId64x2& hashed_key); bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref); - void Ref(HandleImpl& handle); - void Erase(const UniqueId64x2& hashed_key); void ConstApplyToEntriesRange(std::function func, @@ -436,23 +546,11 @@ class HyperClockTable { int GetLengthBits() const { return length_bits_; } - size_t GetOccupancy() const { - return occupancy_.load(std::memory_order_relaxed); - } - size_t GetOccupancyLimit() const { return occupancy_limit_; } - size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); } - - size_t GetStandaloneUsage() const { - return standalone_usage_.load(std::memory_order_relaxed); - } - - uint32_t GetHashSeed() const { return hash_seed_; } - - // Acquire/release N references - void TEST_RefN(HandleImpl& handle, size_t n); - void TEST_ReleaseN(HandleImpl* handle, size_t n); +#ifndef NDEBUG + void TEST_ReleaseN(HandleImpl* h, size_t n); +#endif private: // functions // Returns x mod 2^{length_bits_}. @@ -460,12 +558,6 @@ class HyperClockTable { return static_cast(x) & length_bits_mask_; } - // Runs the clock eviction algorithm trying to reclaim at least - // requested_charge. Returns how much is evicted, which could be less - // if it appears impossible to evict the requested amount without blocking. - inline void Evict(size_t requested_charge, size_t* freed_charge, - size_t* freed_count); - // Returns the first slot in the probe sequence, starting from the given // probe number, with a handle e such that match(e) is true. At every // step, the function first tests whether match(e) holds. If this is false, @@ -494,33 +586,6 @@ class HyperClockTable { // before releasing it so that it can be provided to this function. inline void ReclaimEntryUsage(size_t total_charge); - // Helper for updating `usage_` for new entry with given `total_charge` - // and evicting if needed under strict_capacity_limit=true rules. This - // means the operation might fail with Status::MemoryLimit. If - // `need_evict_for_occupancy`, then eviction of at least one entry is - // required, and the operation should fail if not possible. - // NOTE: Otherwise, occupancy_ is not managed in this function - inline Status ChargeUsageMaybeEvictStrict(size_t total_charge, - size_t capacity, - bool need_evict_for_occupancy); - - // Helper for updating `usage_` for new entry with given `total_charge` - // and evicting if needed under strict_capacity_limit=false rules. This - // means that updating `usage_` always succeeds even if forced to exceed - // capacity. If `need_evict_for_occupancy`, then eviction of at least one - // entry is required, and the operation should return false if such eviction - // is not possible. `usage_` is not updated in that case. Otherwise, returns - // true, indicating success. - // NOTE: occupancy_ is not managed in this function - inline bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, - size_t capacity, - bool need_evict_for_occupancy); - - // Creates a "standalone" handle for returning from an Insert operation that - // cannot be completed by actually inserting into the table. - // Updates `standalone_usage_` but not `usage_` nor `occupancy_`. - inline HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto); - MemoryAllocator* GetAllocator() const { return allocator_; } // Returns the number of bits used to hash an element in the hash @@ -541,33 +606,6 @@ class HyperClockTable { // Array of slots comprising the hash table. const std::unique_ptr array_; - - // From Cache, for deleter - MemoryAllocator* const allocator_; - - // A reference to Cache::eviction_callback_ - const Cache::EvictionCallback& eviction_callback_; - - // A reference to ShardedCacheBase::hash_seed_ - const uint32_t& hash_seed_; - - // We partition the following members into different cache lines - // to avoid false sharing among Lookup, Release, Erase and Insert - // operations in ClockCacheShard. - - ALIGN_AS(CACHE_LINE_SIZE) - // Clock algorithm sweep pointer. - std::atomic clock_pointer_{}; - - ALIGN_AS(CACHE_LINE_SIZE) - // Number of elements in the table. - std::atomic occupancy_{}; - - // Memory usage by entries tracked by the cache (including standalone) - std::atomic usage_{}; - - // Part of usage by standalone entries (not in table) - std::atomic standalone_usage_{}; }; // class HyperClockTable // A single shard of sharded cache.