diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index bd6f4ec28..1ddb8d8ca 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -22,40 +22,129 @@ namespace ROCKSDB_NAMESPACE { -namespace hyper_clock_cache { +namespace clock_cache { +namespace { inline uint64_t GetRefcount(uint64_t meta) { return ((meta >> ClockHandle::kAcquireCounterShift) - (meta >> ClockHandle::kReleaseCounterShift)) & ClockHandle::kCounterMask; } +inline uint64_t GetInitialCountdown(Cache::Priority priority) { + // Set initial clock data from priority + // TODO: configuration parameters for priority handling and clock cycle + // count? + switch (priority) { + case Cache::Priority::HIGH: + return ClockHandle::kHighCountdown; + default: + assert(false); + FALLTHROUGH_INTENDED; + case Cache::Priority::LOW: + return ClockHandle::kLowCountdown; + case Cache::Priority::BOTTOM: + return ClockHandle::kBottomCountdown; + } +} + +inline void FreeDataMarkEmpty(ClockHandle& h) { + // NOTE: in theory there's more room for parallelism if we copy the handle + // data and delay actions like this until after marking the entry as empty, + // but performance tests only show a regression by copying the few words + // of data. + h.FreeData(); + +#ifndef NDEBUG + // Mark slot as empty, with assertion + uint64_t meta = h.meta.exchange(0, std::memory_order_release); + assert(meta >> ClockHandle::kStateShift == ClockHandle::kStateConstruction); +#else + // Mark slot as empty + h.meta.store(0, std::memory_order_release); +#endif +} + +inline bool ClockUpdate(ClockHandle& h) { + uint64_t meta = h.meta.load(std::memory_order_relaxed); + + uint64_t acquire_count = + (meta >> ClockHandle::kAcquireCounterShift) & ClockHandle::kCounterMask; + uint64_t release_count = + (meta >> ClockHandle::kReleaseCounterShift) & ClockHandle::kCounterMask; + // fprintf(stderr, "ClockUpdate @ %p: %lu %lu %u\n", &h, acquire_count, + // release_count, (unsigned)(meta >> ClockHandle::kStateShift)); + if (acquire_count != release_count) { + // Only clock update entries with no outstanding refs + return false; + } + if (!((meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit)) { + // Only clock update Shareable entries + return false; + } + if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) && + acquire_count > 0) { + // Decrement clock + uint64_t new_count = + std::min(acquire_count - 1, uint64_t{ClockHandle::kMaxCountdown} - 1); + // Compare-exchange in the decremented clock info, but + // not aggressively + uint64_t new_meta = + (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) | + (new_count << ClockHandle::kReleaseCounterShift) | + (new_count << ClockHandle::kAcquireCounterShift); + h.meta.compare_exchange_strong(meta, new_meta, std::memory_order_relaxed); + return false; + } + // Otherwise, remove entry (either unreferenced invisible or + // unreferenced and expired visible). + if (h.meta.compare_exchange_strong( + meta, + uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, + std::memory_order_acquire)) { + // Took ownership. + return true; + } else { + // Compare-exchange failing probably + // indicates the entry was used, so skip it in that case. + return false; + } +} + +} // namespace + void ClockHandleBasicData::FreeData() const { if (deleter) { UniqueId64x2 unhashed; - (*deleter)(ClockCacheShard::ReverseHash(hashed_key, &unhashed), value); + (*deleter)( + ClockCacheShard::ReverseHash(hashed_key, &unhashed), + value); } } -static_assert(sizeof(ClockHandle) == 64U, - "Expecting size / alignment with common cache line size"); - -ClockHandleTable::ClockHandleTable(int hash_bits, bool initial_charge_metadata) - : length_bits_(hash_bits), +HyperClockTable::HyperClockTable( + size_t capacity, bool /*strict_capacity_limit*/, + CacheMetadataChargePolicy metadata_charge_policy, const Opts& opts) + : length_bits_(CalcHashBits(capacity, opts.estimated_value_size, + metadata_charge_policy)), length_bits_mask_((size_t{1} << length_bits_) - 1), occupancy_limit_(static_cast((uint64_t{1} << length_bits_) * kStrictLoadFactor)), - array_(new ClockHandle[size_t{1} << length_bits_]) { - if (initial_charge_metadata) { - usage_ += size_t{GetTableSize()} * sizeof(ClockHandle); + array_(new HandleImpl[size_t{1} << length_bits_]) { + if (metadata_charge_policy == + CacheMetadataChargePolicy::kFullChargeCacheMetadata) { + usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); } + + static_assert(sizeof(HandleImpl) == 64U, + "Expecting size / alignment with common cache line size"); } -ClockHandleTable::~ClockHandleTable() { +HyperClockTable::~HyperClockTable() { // Assumes there are no references or active operations on any slot/element // in the table. for (size_t i = 0; i < GetTableSize(); i++) { - ClockHandle& h = array_[i]; + HandleImpl& h = array_[i]; switch (h.meta >> ClockHandle::kStateShift) { case ClockHandle::kStateEmpty: // noop @@ -66,8 +155,7 @@ ClockHandleTable::~ClockHandleTable() { h.FreeData(); #ifndef NDEBUG Rollback(h.hashed_key, &h); - usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); - occupancy_.fetch_sub(1U, std::memory_order_relaxed); + ReclaimEntryUsage(h.GetTotalCharge()); #endif break; // otherwise @@ -84,7 +172,7 @@ ClockHandleTable::~ClockHandleTable() { #endif assert(usage_.load() == 0 || - usage_.load() == size_t{GetTableSize()} * sizeof(ClockHandle)); + usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); assert(occupancy_ == 0); } @@ -161,9 +249,141 @@ inline void CorrectNearOverflow(uint64_t old_meta, } } -Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, - ClockHandle** handle, Cache::Priority priority, - size_t capacity, bool strict_capacity_limit) { +inline Status HyperClockTable::ChargeUsageMaybeEvictStrict( + size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { + if (total_charge > capacity) { + return Status::MemoryLimit( + "Cache entry too large for a single cache shard: " + + std::to_string(total_charge) + " > " + std::to_string(capacity)); + } + // Grab any available capacity, and free up any more required. + size_t old_usage = usage_.load(std::memory_order_relaxed); + size_t new_usage; + if (LIKELY(old_usage != capacity)) { + do { + new_usage = std::min(capacity, old_usage + total_charge); + } while (!usage_.compare_exchange_weak(old_usage, new_usage, + std::memory_order_relaxed)); + } else { + new_usage = old_usage; + } + // How much do we need to evict then? + size_t need_evict_charge = old_usage + total_charge - new_usage; + size_t request_evict_charge = need_evict_charge; + if (UNLIKELY(need_evict_for_occupancy) && request_evict_charge == 0) { + // Require at least 1 eviction. + request_evict_charge = 1; + } + if (request_evict_charge > 0) { + size_t evicted_charge = 0; + size_t evicted_count = 0; + Evict(request_evict_charge, &evicted_charge, &evicted_count); + occupancy_.fetch_sub(evicted_count, std::memory_order_release); + if (LIKELY(evicted_charge > need_evict_charge)) { + assert(evicted_count > 0); + // Evicted more than enough + usage_.fetch_sub(evicted_charge - need_evict_charge, + std::memory_order_relaxed); + } else if (evicted_charge < need_evict_charge || + (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) { + // Roll back to old usage minus evicted + usage_.fetch_sub(evicted_charge + (new_usage - old_usage), + std::memory_order_relaxed); + if (evicted_charge < need_evict_charge) { + return Status::MemoryLimit( + "Insert failed because unable to evict entries to stay within " + "capacity limit."); + } else { + return Status::MemoryLimit( + "Insert failed because unable to evict entries to stay within " + "table occupancy limit."); + } + } + // If we needed to evict something and we are proceeding, we must have + // evicted something. + assert(evicted_count > 0); + } + return Status::OK(); +} + +inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict( + size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { + // For simplicity, we consider that either the cache can accept the insert + // with no evictions, or we must evict enough to make (at least) enough + // space. It could lead to unnecessary failures or excessive evictions in + // some extreme cases, but allows a fast, simple protocol. If we allow a + // race to get us over capacity, then we might never get back to capacity + // limit if the sizes of entries allow each insertion to evict the minimum + // charge. Thus, we should evict some extra if it's not a signifcant + // portion of the shard capacity. This can have the side benefit of + // involving fewer threads in eviction. + size_t old_usage = usage_.load(std::memory_order_relaxed); + size_t need_evict_charge; + // NOTE: if total_charge > old_usage, there isn't yet enough to evict + // `total_charge` amount. Even if we only try to evict `old_usage` amount, + // there's likely something referenced and we would eat CPU looking for + // enough to evict. + if (old_usage + total_charge <= capacity || total_charge > old_usage) { + // Good enough for me (might run over with a race) + need_evict_charge = 0; + } else { + // Try to evict enough space, and maybe some extra + need_evict_charge = total_charge; + if (old_usage > capacity) { + // Not too much to avoid thundering herd while avoiding strict + // synchronization, such as the compare_exchange used with strict + // capacity limit. + need_evict_charge += std::min(capacity / 1024, total_charge) + 1; + } + } + if (UNLIKELY(need_evict_for_occupancy) && need_evict_charge == 0) { + // Special case: require at least 1 eviction if we only have to + // deal with occupancy + need_evict_charge = 1; + } + size_t evicted_charge = 0; + size_t evicted_count = 0; + if (need_evict_charge > 0) { + Evict(need_evict_charge, &evicted_charge, &evicted_count); + // Deal with potential occupancy deficit + if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) { + assert(evicted_charge == 0); + // Can't meet occupancy requirement + return false; + } else { + // Update occupancy for evictions + occupancy_.fetch_sub(evicted_count, std::memory_order_release); + } + } + // Track new usage even if we weren't able to evict enough + usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed); + // No underflow + assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); + // Success + return true; +} + +inline HyperClockTable::HandleImpl* HyperClockTable::DetachedInsert( + const ClockHandleBasicData& proto) { + // Heap allocated separate from table + HandleImpl* h = new HandleImpl(); + ClockHandleBasicData* h_alias = h; + *h_alias = proto; + h->SetDetached(); + // Single reference (detached entries only created if returning a refed + // Handle back to user) + uint64_t meta = uint64_t{ClockHandle::kStateInvisible} + << ClockHandle::kStateShift; + meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift; + h->meta.store(meta, std::memory_order_release); + // Keep track of how much of usage is detached + detached_usage_.fetch_add(proto.GetTotalCharge(), std::memory_order_relaxed); + return h; +} + +Status HyperClockTable::Insert(const ClockHandleBasicData& proto, + HandleImpl** handle, Cache::Priority priority, + size_t capacity, bool strict_capacity_limit) { // Do we have the available occupancy? Optimistically assume we do // and deal with it if we don't. size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire); @@ -176,124 +396,31 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, // Usage/capacity handling is somewhat different depending on // strict_capacity_limit, but mostly pessimistic. bool use_detached_insert = false; - const size_t total_charge = proto.total_charge; + const size_t total_charge = proto.GetTotalCharge(); if (strict_capacity_limit) { - if (total_charge > capacity) { - assert(!use_detached_insert); + Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity, + need_evict_for_occupancy); + if (!s.ok()) { revert_occupancy_fn(); - return Status::MemoryLimit( - "Cache entry too large for a single cache shard: " + - std::to_string(total_charge) + " > " + std::to_string(capacity)); - } - // Grab any available capacity, and free up any more required. - size_t old_usage = usage_.load(std::memory_order_relaxed); - size_t new_usage; - if (LIKELY(old_usage != capacity)) { - do { - new_usage = std::min(capacity, old_usage + total_charge); - } while (!usage_.compare_exchange_weak(old_usage, new_usage, - std::memory_order_relaxed)); - } else { - new_usage = old_usage; - } - // How much do we need to evict then? - size_t need_evict_charge = old_usage + total_charge - new_usage; - size_t request_evict_charge = need_evict_charge; - if (UNLIKELY(need_evict_for_occupancy) && request_evict_charge == 0) { - // Require at least 1 eviction. - request_evict_charge = 1; - } - if (request_evict_charge > 0) { - size_t evicted_charge = 0; - size_t evicted_count = 0; - Evict(request_evict_charge, &evicted_charge, &evicted_count); - occupancy_.fetch_sub(evicted_count, std::memory_order_release); - if (LIKELY(evicted_charge > need_evict_charge)) { - assert(evicted_count > 0); - // Evicted more than enough - usage_.fetch_sub(evicted_charge - need_evict_charge, - std::memory_order_relaxed); - } else if (evicted_charge < need_evict_charge || - (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) { - // Roll back to old usage minus evicted - usage_.fetch_sub(evicted_charge + (new_usage - old_usage), - std::memory_order_relaxed); - assert(!use_detached_insert); - revert_occupancy_fn(); - if (evicted_charge < need_evict_charge) { - return Status::MemoryLimit( - "Insert failed because unable to evict entries to stay within " - "capacity limit."); - } else { - return Status::MemoryLimit( - "Insert failed because unable to evict entries to stay within " - "table occupancy limit."); - } - } - // If we needed to evict something and we are proceeding, we must have - // evicted something. - assert(evicted_count > 0); + return s; } } else { // Case strict_capacity_limit == false - - // For simplicity, we consider that either the cache can accept the insert - // with no evictions, or we must evict enough to make (at least) enough - // space. It could lead to unnecessary failures or excessive evictions in - // some extreme cases, but allows a fast, simple protocol. If we allow a - // race to get us over capacity, then we might never get back to capacity - // limit if the sizes of entries allow each insertion to evict the minimum - // charge. Thus, we should evict some extra if it's not a signifcant - // portion of the shard capacity. This can have the side benefit of - // involving fewer threads in eviction. - size_t old_usage = usage_.load(std::memory_order_relaxed); - size_t need_evict_charge; - // NOTE: if total_charge > old_usage, there isn't yet enough to evict - // `total_charge` amount. Even if we only try to evict `old_usage` amount, - // there's likely something referenced and we would eat CPU looking for - // enough to evict. - if (old_usage + total_charge <= capacity || total_charge > old_usage) { - // Good enough for me (might run over with a race) - need_evict_charge = 0; - } else { - // Try to evict enough space, and maybe some extra - need_evict_charge = total_charge; - if (old_usage > capacity) { - // Not too much to avoid thundering herd while avoiding strict - // synchronization - need_evict_charge += std::min(capacity / 1024, total_charge) + 1; - } - } - if (UNLIKELY(need_evict_for_occupancy) && need_evict_charge == 0) { - // Special case: require at least 1 eviction if we only have to - // deal with occupancy - need_evict_charge = 1; - } - size_t evicted_charge = 0; - size_t evicted_count = 0; - if (need_evict_charge > 0) { - Evict(need_evict_charge, &evicted_charge, &evicted_count); - // Deal with potential occupancy deficit - if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) { - assert(evicted_charge == 0); - revert_occupancy_fn(); - if (handle == nullptr) { - // Don't insert the entry but still return ok, as if the entry - // inserted into cache and evicted immediately. - proto.FreeData(); - return Status::OK(); - } else { - use_detached_insert = true; - } + bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity, + need_evict_for_occupancy); + if (!success) { + revert_occupancy_fn(); + if (handle == nullptr) { + // Don't insert the entry but still return ok, as if the entry + // inserted into cache and evicted immediately. + proto.FreeData(); + return Status::OK(); } else { - // Update occupancy for evictions - occupancy_.fetch_sub(evicted_count, std::memory_order_release); + // Need to track usage of fallback detached insert + usage_.fetch_add(total_charge, std::memory_order_relaxed); + use_detached_insert = true; } } - // Track new usage even if we weren't able to evict enough - usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed); - // No underflow - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); } auto revert_usage_fn = [&]() { usage_.fetch_sub(total_charge, std::memory_order_relaxed); @@ -310,30 +437,13 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, // * Have to insert into a suboptimal location (more probes) so that the // old entry can be kept around as well. - // Set initial clock data from priority - // TODO: configuration parameters for priority handling and clock cycle - // count? - uint64_t initial_countdown; - switch (priority) { - case Cache::Priority::HIGH: - initial_countdown = ClockHandle::kHighCountdown; - break; - default: - assert(false); - FALLTHROUGH_INTENDED; - case Cache::Priority::LOW: - initial_countdown = ClockHandle::kLowCountdown; - break; - case Cache::Priority::BOTTOM: - initial_countdown = ClockHandle::kBottomCountdown; - break; - } + uint64_t initial_countdown = GetInitialCountdown(priority); assert(initial_countdown > 0); size_t probe = 0; - ClockHandle* e = FindSlot( + HandleImpl* e = FindSlot( proto.hashed_key, - [&](ClockHandle* h) { + [&](HandleImpl* h) { // Optimistically transition the slot from "empty" to // "under construction" (no effect on other states) uint64_t old_meta = @@ -414,8 +524,8 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, (void)old_meta; return false; }, - [&](ClockHandle* /*h*/) { return false; }, - [&](ClockHandle* h) { + [&](HandleImpl* /*h*/) { return false; }, + [&](HandleImpl* h) { h->displacements.fetch_add(1, std::memory_order_relaxed); }, probe); @@ -452,20 +562,8 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, // Run detached insert assert(use_detached_insert); - ClockHandle* h = new ClockHandle(); - ClockHandleBasicData* h_alias = h; - *h_alias = proto; - h->detached = true; - // Single reference (detached entries only created if returning a refed - // Handle back to user) - uint64_t meta = uint64_t{ClockHandle::kStateInvisible} - << ClockHandle::kStateShift; - meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift; - h->meta.store(meta, std::memory_order_release); - // Keep track of usage - detached_usage_.fetch_add(total_charge, std::memory_order_relaxed); + *handle = DetachedInsert(proto); - *handle = h; // The OkOverwritten status is used to count "redundant" insertions into // block cache. This implementation doesn't strictly check for redundant // insertions, but we instead are probably interested in how many insertions @@ -474,11 +572,12 @@ Status ClockHandleTable::Insert(const ClockHandleBasicData& proto, return Status::OkOverwritten(); } -ClockHandle* ClockHandleTable::Lookup(const UniqueId64x2& hashed_key) { +HyperClockTable::HandleImpl* HyperClockTable::Lookup( + const UniqueId64x2& hashed_key) { size_t probe = 0; - ClockHandle* e = FindSlot( + HandleImpl* e = FindSlot( hashed_key, - [&](ClockHandle* h) { + [&](HandleImpl* h) { // Mostly branch-free version (similar performance) /* uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, @@ -532,16 +631,16 @@ ClockHandle* ClockHandleTable::Lookup(const UniqueId64x2& hashed_key) { (void)old_meta; return false; }, - [&](ClockHandle* h) { + [&](HandleImpl* h) { return h->displacements.load(std::memory_order_relaxed) == 0; }, - [&](ClockHandle* /*h*/) {}, probe); + [&](HandleImpl* /*h*/) {}, probe); return e; } -bool ClockHandleTable::Release(ClockHandle* h, bool useful, - bool erase_if_last_ref) { +bool HyperClockTable::Release(HandleImpl* h, bool useful, + bool erase_if_last_ref) { // In contrast with LRUCache's Release, this function won't delete the handle // when the cache is above capacity and the reference is the last one. Space // is only freed up by EvictFromClock (called by Insert when space is needed) @@ -595,29 +694,18 @@ bool ClockHandleTable::Release(ClockHandle* h, bool useful, uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, std::memory_order_acquire)); // Took ownership - // TODO? Delay freeing? - h->FreeData(); - size_t total_charge = h->total_charge; - if (UNLIKELY(h->detached)) { + size_t total_charge = h->GetTotalCharge(); + if (UNLIKELY(h->IsDetached())) { + h->FreeData(); // Delete detached handle delete h; detached_usage_.fetch_sub(total_charge, std::memory_order_relaxed); + usage_.fetch_sub(total_charge, std::memory_order_relaxed); } else { - UniqueId64x2 hashed_key = h->hashed_key; -#ifndef NDEBUG - // Mark slot as empty, with assertion - old_meta = h->meta.exchange(0, std::memory_order_release); - assert(old_meta >> ClockHandle::kStateShift == - ClockHandle::kStateConstruction); -#else - // Mark slot as empty - h->meta.store(0, std::memory_order_release); -#endif - occupancy_.fetch_sub(1U, std::memory_order_release); - Rollback(hashed_key, h); + Rollback(h->hashed_key, h); + FreeDataMarkEmpty(*h); + ReclaimEntryUsage(total_charge); } - usage_.fetch_sub(total_charge, std::memory_order_relaxed); - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); return true; } else { // Correct for possible (but rare) overflow @@ -626,7 +714,7 @@ bool ClockHandleTable::Release(ClockHandle* h, bool useful, } } -void ClockHandleTable::Ref(ClockHandle& h) { +void HyperClockTable::Ref(HandleImpl& h) { // Increment acquire counter uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, std::memory_order_acquire); @@ -638,7 +726,7 @@ void ClockHandleTable::Ref(ClockHandle& h) { (void)old_meta; } -void ClockHandleTable::TEST_RefN(ClockHandle& h, size_t n) { +void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) { // Increment acquire counter uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement, std::memory_order_acquire); @@ -648,7 +736,7 @@ void ClockHandleTable::TEST_RefN(ClockHandle& h, size_t n) { (void)old_meta; } -void ClockHandleTable::TEST_ReleaseN(ClockHandle* h, size_t n) { +void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { if (n > 0) { // Split into n - 1 and 1 steps. uint64_t old_meta = h->meta.fetch_add( @@ -661,11 +749,11 @@ void ClockHandleTable::TEST_ReleaseN(ClockHandle* h, size_t n) { } } -void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) { +void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { size_t probe = 0; (void)FindSlot( hashed_key, - [&](ClockHandle* h) { + [&](HandleImpl* h) { // Could be multiple entries in rare cases. Erase them all. // Optimistically increment acquire counter uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, @@ -699,20 +787,11 @@ void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) { std::memory_order_acq_rel)) { // Took ownership assert(hashed_key == h->hashed_key); - // TODO? Delay freeing? - h->FreeData(); - usage_.fetch_sub(h->total_charge, std::memory_order_relaxed); - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); -#ifndef NDEBUG - // Mark slot as empty, with assertion - old_meta = h->meta.exchange(0, std::memory_order_release); - assert(old_meta >> ClockHandle::kStateShift == - ClockHandle::kStateConstruction); -#else - // Mark slot as empty - h->meta.store(0, std::memory_order_release); -#endif - occupancy_.fetch_sub(1U, std::memory_order_release); + size_t total_charge = h->GetTotalCharge(); + FreeDataMarkEmpty(*h); + ReclaimEntryUsage(total_charge); + // We already have a copy of hashed_key in this case, so OK to + // delay Rollback until after releasing the entry Rollback(hashed_key, h); break; } @@ -735,14 +814,14 @@ void ClockHandleTable::Erase(const UniqueId64x2& hashed_key) { } return false; }, - [&](ClockHandle* h) { + [&](HandleImpl* h) { return h->displacements.load(std::memory_order_relaxed) == 0; }, - [&](ClockHandle* /*h*/) {}, probe); + [&](HandleImpl* /*h*/) {}, probe); } -void ClockHandleTable::ConstApplyToEntriesRange( - std::function func, size_t index_begin, +void HyperClockTable::ConstApplyToEntriesRange( + std::function func, size_t index_begin, size_t index_end, bool apply_if_will_be_deleted) const { uint64_t check_state_mask = ClockHandle::kStateShareableBit; if (!apply_if_will_be_deleted) { @@ -750,7 +829,7 @@ void ClockHandleTable::ConstApplyToEntriesRange( } for (size_t i = index_begin; i < index_end; i++) { - ClockHandle& h = array_[i]; + HandleImpl& h = array_[i]; // Note: to avoid using compare_exchange, we have to be extra careful. uint64_t old_meta = h.meta.load(std::memory_order_relaxed); @@ -782,9 +861,9 @@ void ClockHandleTable::ConstApplyToEntriesRange( } } -void ClockHandleTable::EraseUnRefEntries() { +void HyperClockTable::EraseUnRefEntries() { for (size_t i = 0; i <= this->length_bits_mask_; i++) { - ClockHandle& h = array_[i]; + HandleImpl& h = array_[i]; uint64_t old_meta = h.meta.load(std::memory_order_relaxed); if (old_meta & (uint64_t{ClockHandle::kStateShareableBit} @@ -795,28 +874,18 @@ void ClockHandleTable::EraseUnRefEntries() { << ClockHandle::kStateShift, std::memory_order_acquire)) { // Took ownership - UniqueId64x2 hashed_key = h.hashed_key; - h.FreeData(); - usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); -#ifndef NDEBUG - // Mark slot as empty, with assertion - old_meta = h.meta.exchange(0, std::memory_order_release); - assert(old_meta >> ClockHandle::kStateShift == - ClockHandle::kStateConstruction); -#else - // Mark slot as empty - h.meta.store(0, std::memory_order_release); -#endif - occupancy_.fetch_sub(1U, std::memory_order_release); - Rollback(hashed_key, &h); + size_t total_charge = h.GetTotalCharge(); + Rollback(h.hashed_key, &h); + FreeDataMarkEmpty(h); + ReclaimEntryUsage(total_charge); } } } -ClockHandle* ClockHandleTable::FindSlot( - const UniqueId64x2& hashed_key, std::function match_fn, - std::function abort_fn, - std::function update_fn, size_t& probe) { +inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot( + const UniqueId64x2& hashed_key, std::function match_fn, + std::function abort_fn, + std::function update_fn, size_t& probe) { // NOTE: upper 32 bits of hashed_key[0] is used for sharding // // We use double-hashing probing. Every probe in the sequence is a @@ -832,7 +901,7 @@ ClockHandle* ClockHandleTable::FindSlot( size_t increment = static_cast(hashed_key[0]) | 1U; size_t current = ModTableSize(base + probe * increment); while (probe <= length_bits_mask_) { - ClockHandle* h = &array_[current]; + HandleImpl* h = &array_[current]; if (match_fn(h)) { probe++; return h; @@ -848,8 +917,8 @@ ClockHandle* ClockHandleTable::FindSlot( return nullptr; } -void ClockHandleTable::Rollback(const UniqueId64x2& hashed_key, - const ClockHandle* h) { +inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key, + const HandleImpl* h) { size_t current = ModTableSize(hashed_key[1]); size_t increment = static_cast(hashed_key[0]) | 1U; while (&array_[current] != h) { @@ -858,8 +927,19 @@ void ClockHandleTable::Rollback(const UniqueId64x2& hashed_key, } } -void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge, - size_t* freed_count) { +inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) { + auto old_occupancy = occupancy_.fetch_sub(1U, std::memory_order_release); + (void)old_occupancy; + // No underflow + assert(old_occupancy > 0); + auto old_usage = usage_.fetch_sub(total_charge, std::memory_order_relaxed); + (void)old_usage; + // No underflow + assert(old_usage >= total_charge); +} + +inline void HyperClockTable::Evict(size_t requested_charge, + size_t* freed_charge, size_t* freed_count) { // precondition assert(requested_charge > 0); @@ -880,64 +960,13 @@ void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge, for (;;) { for (size_t i = 0; i < step_size; i++) { - ClockHandle& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))]; - uint64_t meta = h.meta.load(std::memory_order_relaxed); - - uint64_t acquire_count = (meta >> ClockHandle::kAcquireCounterShift) & - ClockHandle::kCounterMask; - uint64_t release_count = (meta >> ClockHandle::kReleaseCounterShift) & - ClockHandle::kCounterMask; - if (acquire_count != release_count) { - // Only clock update entries with no outstanding refs - continue; - } - if (!((meta >> ClockHandle::kStateShift) & - ClockHandle::kStateShareableBit)) { - // Only clock update Shareable entries - continue; - } - if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) && - acquire_count > 0) { - // Decrement clock - uint64_t new_count = std::min(acquire_count - 1, - uint64_t{ClockHandle::kMaxCountdown} - 1); - // Compare-exchange in the decremented clock info, but - // not aggressively - uint64_t new_meta = - (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) | - (new_count << ClockHandle::kReleaseCounterShift) | - (new_count << ClockHandle::kAcquireCounterShift); - h.meta.compare_exchange_strong(meta, new_meta, - std::memory_order_relaxed); - continue; - } - // Otherwise, remove entry (either unreferenced invisible or - // unreferenced and expired visible). Compare-exchange failing probably - // indicates the entry was used, so skip it in that case. - if (h.meta.compare_exchange_strong( - meta, - uint64_t{ClockHandle::kStateConstruction} - << ClockHandle::kStateShift, - std::memory_order_acquire)) { - // Took ownership. - // Save info about h to minimize dependences between atomic updates - // (e.g. fully relaxed Rollback after h released by marking empty) - const UniqueId64x2 h_hashed_key = h.hashed_key; - size_t h_total_charge = h.total_charge; - // TODO? Delay freeing? - h.FreeData(); -#ifndef NDEBUG - // Mark slot as empty, with assertion - meta = h.meta.exchange(0, std::memory_order_release); - assert(meta >> ClockHandle::kStateShift == - ClockHandle::kStateConstruction); -#else - // Mark slot as empty - h.meta.store(0, std::memory_order_release); -#endif + HandleImpl& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))]; + bool evicting = ClockUpdate(h); + if (evicting) { + Rollback(h.hashed_key, &h); + *freed_charge += h.GetTotalCharge(); *freed_count += 1; - *freed_charge += h_total_charge; - Rollback(h_hashed_key, &h); + FreeDataMarkEmpty(h); } } @@ -955,23 +984,26 @@ void ClockHandleTable::Evict(size_t requested_charge, size_t* freed_charge, } } -ClockCacheShard::ClockCacheShard( - size_t capacity, size_t estimated_value_size, bool strict_capacity_limit, - CacheMetadataChargePolicy metadata_charge_policy) +template +ClockCacheShard::ClockCacheShard( + size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + const typename Table::Opts& opts) : CacheShardBase(metadata_charge_policy), - table_( - CalcHashBits(capacity, estimated_value_size, metadata_charge_policy), - /*initial_charge_metadata*/ metadata_charge_policy == - kFullChargeCacheMetadata), + table_(capacity, strict_capacity_limit, metadata_charge_policy, opts), capacity_(capacity), strict_capacity_limit_(strict_capacity_limit) { // Initial charge metadata should not exceed capacity - assert(table_.GetUsage() <= capacity_ || capacity_ < sizeof(ClockHandle)); + assert(table_.GetUsage() <= capacity_ || capacity_ < sizeof(HandleImpl)); } -void ClockCacheShard::EraseUnRefEntries() { table_.EraseUnRefEntries(); } +template +void ClockCacheShard
::EraseUnRefEntries() { + table_.EraseUnRefEntries(); +} -void ClockCacheShard::ApplyToSomeEntries( +template +void ClockCacheShard
::ApplyToSomeEntries( const std::function& callback, size_t average_entries_per_lock, size_t* state) { @@ -997,20 +1029,20 @@ void ClockCacheShard::ApplyToSomeEntries( } table_.ConstApplyToEntriesRange( - [callback](const ClockHandle& h) { + [callback](const HandleImpl& h) { UniqueId64x2 unhashed; - callback(ReverseHash(h.hashed_key, &unhashed), h.value, h.total_charge, - h.deleter); + callback(ReverseHash(h.hashed_key, &unhashed), h.value, + h.GetTotalCharge(), h.deleter); }, index_begin, index_end, false); } -int ClockCacheShard::CalcHashBits( +int HyperClockTable::CalcHashBits( size_t capacity, size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { double average_slot_charge = estimated_value_size * kLoadFactor; if (metadata_charge_policy == kFullChargeCacheMetadata) { - average_slot_charge += sizeof(ClockHandle); + average_slot_charge += sizeof(HandleImpl); } assert(average_slot_charge > 0.0); uint64_t num_slots = @@ -1020,28 +1052,34 @@ int ClockCacheShard::CalcHashBits( if (metadata_charge_policy == kFullChargeCacheMetadata) { // For very small estimated value sizes, it's possible to overshoot while (hash_bits > 0 && - uint64_t{sizeof(ClockHandle)} << hash_bits > capacity) { + uint64_t{sizeof(HandleImpl)} << hash_bits > capacity) { hash_bits--; } } return hash_bits; } -void ClockCacheShard::SetCapacity(size_t capacity) { +template +void ClockCacheShard
::SetCapacity(size_t capacity) { capacity_.store(capacity, std::memory_order_relaxed); // next Insert will take care of any necessary evictions } -void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { +template +void ClockCacheShard
::SetStrictCapacityLimit( + bool strict_capacity_limit) { strict_capacity_limit_.store(strict_capacity_limit, std::memory_order_relaxed); // next Insert will take care of any necessary evictions } -Status ClockCacheShard::Insert(const Slice& key, const UniqueId64x2& hashed_key, - void* value, size_t charge, - Cache::DeleterFn deleter, ClockHandle** handle, - Cache::Priority priority) { +template +Status ClockCacheShard
::Insert(const Slice& key, + const UniqueId64x2& hashed_key, + void* value, size_t charge, + Cache::DeleterFn deleter, + HandleImpl** handle, + Cache::Priority priority) { if (UNLIKELY(key.size() != kCacheKeySize)) { return Status::NotSupported("ClockCache only supports key size " + std::to_string(kCacheKeySize) + "B"); @@ -1051,22 +1089,23 @@ Status ClockCacheShard::Insert(const Slice& key, const UniqueId64x2& hashed_key, proto.value = value; proto.deleter = deleter; proto.total_charge = charge; - Status s = - table_.Insert(proto, reinterpret_cast(handle), priority, - capacity_.load(std::memory_order_relaxed), - strict_capacity_limit_.load(std::memory_order_relaxed)); + Status s = table_.Insert( + proto, handle, priority, capacity_.load(std::memory_order_relaxed), + strict_capacity_limit_.load(std::memory_order_relaxed)); return s; } -ClockHandle* ClockCacheShard::Lookup(const Slice& key, - const UniqueId64x2& hashed_key) { +template +typename ClockCacheShard
::HandleImpl* ClockCacheShard
::Lookup( + const Slice& key, const UniqueId64x2& hashed_key) { if (UNLIKELY(key.size() != kCacheKeySize)) { return nullptr; } return table_.Lookup(hashed_key); } -bool ClockCacheShard::Ref(ClockHandle* h) { +template +bool ClockCacheShard
::Ref(HandleImpl* h) { if (h == nullptr) { return false; } @@ -1074,36 +1113,47 @@ bool ClockCacheShard::Ref(ClockHandle* h) { return true; } -bool ClockCacheShard::Release(ClockHandle* handle, bool useful, - bool erase_if_last_ref) { +template +bool ClockCacheShard
::Release(HandleImpl* handle, bool useful, + bool erase_if_last_ref) { if (handle == nullptr) { return false; } return table_.Release(handle, useful, erase_if_last_ref); } -void ClockCacheShard::TEST_RefN(ClockHandle* h, size_t n) { +template +void ClockCacheShard
::TEST_RefN(HandleImpl* h, size_t n) { table_.TEST_RefN(*h, n); } -void ClockCacheShard::TEST_ReleaseN(ClockHandle* h, size_t n) { +template +void ClockCacheShard
::TEST_ReleaseN(HandleImpl* h, size_t n) { table_.TEST_ReleaseN(h, n); } -bool ClockCacheShard::Release(ClockHandle* handle, bool erase_if_last_ref) { +template +bool ClockCacheShard
::Release(HandleImpl* handle, + bool erase_if_last_ref) { return Release(handle, /*useful=*/true, erase_if_last_ref); } -void ClockCacheShard::Erase(const Slice& key, const UniqueId64x2& hashed_key) { +template +void ClockCacheShard
::Erase(const Slice& key, + const UniqueId64x2& hashed_key) { if (UNLIKELY(key.size() != kCacheKeySize)) { return; } table_.Erase(hashed_key); } -size_t ClockCacheShard::GetUsage() const { return table_.GetUsage(); } +template +size_t ClockCacheShard
::GetUsage() const { + return table_.GetUsage(); +} -size_t ClockCacheShard::GetPinnedUsage() const { +template +size_t ClockCacheShard
::GetPinnedUsage() const { // Computes the pinned usage by scanning the whole hash table. This // is slow, but avoids keeping an exact counter on the clock usage, // i.e., the number of not externally referenced elements. @@ -1114,15 +1164,15 @@ size_t ClockCacheShard::GetPinnedUsage() const { const bool charge_metadata = metadata_charge_policy_ == kFullChargeCacheMetadata; table_.ConstApplyToEntriesRange( - [&table_pinned_usage, charge_metadata](const ClockHandle& h) { + [&table_pinned_usage, charge_metadata](const HandleImpl& h) { uint64_t meta = h.meta.load(std::memory_order_relaxed); uint64_t refcount = GetRefcount(meta); // Holding one ref for ConstApplyToEntriesRange assert(refcount > 0); if (refcount > 1) { - table_pinned_usage += h.total_charge; + table_pinned_usage += h.GetTotalCharge(); if (charge_metadata) { - table_pinned_usage += sizeof(ClockHandle); + table_pinned_usage += sizeof(HandleImpl); } } }, @@ -1131,14 +1181,19 @@ size_t ClockCacheShard::GetPinnedUsage() const { return table_pinned_usage + table_.GetDetachedUsage(); } -size_t ClockCacheShard::GetOccupancyCount() const { +template +size_t ClockCacheShard
::GetOccupancyCount() const { return table_.GetOccupancy(); } -size_t ClockCacheShard::GetTableAddressCount() const { +template +size_t ClockCacheShard
::GetTableAddressCount() const { return table_.GetTableSize(); } +// Explicit instantiation +template class ClockCacheShard; + HyperClockCache::HyperClockCache( size_t capacity, size_t estimated_value_size, int num_shard_bits, bool strict_capacity_limit, @@ -1151,26 +1206,28 @@ HyperClockCache::HyperClockCache( // TODO: should not need to go through two levels of pointer indirection to // get to table entries size_t per_shard = GetPerShardCapacity(); - InitShards([=](ClockCacheShard* cs) { - new (cs) ClockCacheShard(per_shard, estimated_value_size, - strict_capacity_limit, metadata_charge_policy); + InitShards([=](Shard* cs) { + HyperClockTable::Opts opts; + opts.estimated_value_size = estimated_value_size; + new (cs) + Shard(per_shard, strict_capacity_limit, metadata_charge_policy, opts); }); } void* HyperClockCache::Value(Handle* handle) { - return reinterpret_cast(handle)->value; + return reinterpret_cast(handle)->value; } size_t HyperClockCache::GetCharge(Handle* handle) const { - return reinterpret_cast(handle)->total_charge; + return reinterpret_cast(handle)->GetTotalCharge(); } Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const { - auto h = reinterpret_cast(handle); + auto h = reinterpret_cast(handle); return h->deleter; } -} // namespace hyper_clock_cache +} // namespace clock_cache // DEPRECATED (see public API) std::shared_ptr NewClockCache( @@ -1193,7 +1250,7 @@ std::shared_ptr HyperClockCacheOptions::MakeSharedCache() const { constexpr size_t min_shard_size = 32U * 1024U * 1024U; my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size); } - return std::make_shared( + return std::make_shared( capacity, estimated_entry_charge, my_num_shard_bits, strict_capacity_limit, metadata_charge_policy, memory_allocator); } diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 53a9de5f0..21a598ac4 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -27,7 +27,7 @@ namespace ROCKSDB_NAMESPACE { -namespace hyper_clock_cache { +namespace clock_cache { // Forward declaration of friend class. class ClockCacheTest; @@ -311,6 +311,14 @@ struct ClockHandleBasicData { UniqueId64x2 hashed_key = kNullUniqueId64x2; size_t total_charge = 0; + // For total_charge_and_flags + // "Detached" means the handle is allocated separately from hash table. + static constexpr uint64_t kFlagDetached = uint64_t{1} << 63; + // Extract just the total charge + static constexpr uint64_t kTotalChargeMask = kFlagDetached - 1; + + inline size_t GetTotalCharge() const { return total_charge; } + // Calls deleter (if non-null) on cache key and value void FreeData() const; @@ -318,9 +326,7 @@ struct ClockHandleBasicData { const UniqueId64x2& GetHash() const { return hashed_key; } }; -// Target size to be exactly a common cache line size (see static_assert in -// clock_cache.cc) -struct ALIGN_AS(64U) ClockHandle : public ClockHandleBasicData { +struct ClockHandle : public ClockHandleBasicData { // Constants for handling the atomic `meta` word, which tracks most of the // state of the handle. The meta word looks like this: // low bits high bits @@ -372,32 +378,54 @@ struct ALIGN_AS(64U) ClockHandle : public ClockHandleBasicData { // See above std::atomic meta{}; - // The number of elements that hash to this slot or a lower one, but wind - // up in this slot or a higher one. - std::atomic displacements{}; - // True iff the handle is allocated separately from hash table. - bool detached = false; + // Anticipating use for SecondaryCache support + void* reserved_for_future_use = nullptr; }; // struct ClockHandle -class ClockHandleTable { +class HyperClockTable { public: - explicit ClockHandleTable(int hash_bits, bool initial_charge_metadata); - ~ClockHandleTable(); + // Target size to be exactly a common cache line size (see static_assert in + // clock_cache.cc) + struct ALIGN_AS(64U) HandleImpl : public ClockHandle { + // The number of elements that hash to this slot or a lower one, but wind + // up in this slot or a higher one. + std::atomic displacements{}; + + // Whether this is a "deteched" handle that is independently allocated + // with `new` (so must be deleted with `delete`). + // TODO: ideally this would be packed into some other data field, such + // as upper bits of total_charge, but that incurs a measurable performance + // regression. + bool detached = false; + + inline bool IsDetached() const { return detached; } + + inline void SetDetached() { detached = true; } + }; // struct HandleImpl + + struct Opts { + size_t estimated_value_size; + }; + + HyperClockTable(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + const Opts& opts); + ~HyperClockTable(); - Status Insert(const ClockHandleBasicData& proto, ClockHandle** handle, + Status Insert(const ClockHandleBasicData& proto, HandleImpl** handle, Cache::Priority priority, size_t capacity, bool strict_capacity_limit); - ClockHandle* Lookup(const UniqueId64x2& hashed_key); + HandleImpl* Lookup(const UniqueId64x2& hashed_key); - bool Release(ClockHandle* handle, bool useful, bool erase_if_last_ref); + bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref); - void Ref(ClockHandle& handle); + void Ref(HandleImpl& handle); void Erase(const UniqueId64x2& hashed_key); - void ConstApplyToEntriesRange(std::function func, + void ConstApplyToEntriesRange(std::function func, size_t index_begin, size_t index_end, bool apply_if_will_be_deleted) const; @@ -407,8 +435,6 @@ class ClockHandleTable { int GetLengthBits() const { return length_bits_; } - size_t GetOccupancyLimit() const { return occupancy_limit_; } - size_t GetOccupancy() const { return occupancy_.load(std::memory_order_relaxed); } @@ -420,8 +446,8 @@ class ClockHandleTable { } // Acquire/release N references - void TEST_RefN(ClockHandle& handle, size_t n); - void TEST_ReleaseN(ClockHandle* handle, size_t n); + void TEST_RefN(HandleImpl& handle, size_t n); + void TEST_ReleaseN(HandleImpl* handle, size_t n); private: // functions // Returns x mod 2^{length_bits_}. @@ -432,8 +458,8 @@ class ClockHandleTable { // Runs the clock eviction algorithm trying to reclaim at least // requested_charge. Returns how much is evicted, which could be less // if it appears impossible to evict the requested amount without blocking. - void Evict(size_t requested_charge, size_t* freed_charge, - size_t* freed_count); + inline void Evict(size_t requested_charge, size_t* freed_charge, + size_t* freed_count); // Returns the first slot in the probe sequence, starting from the given // probe number, with a handle e such that match(e) is true. At every @@ -446,15 +472,54 @@ class ClockHandleTable { // value of probe is one more than the last non-aborting probe during the // call. This is so that that the variable can be used to keep track of // progress across consecutive calls to FindSlot. - inline ClockHandle* FindSlot(const UniqueId64x2& hashed_key, - std::function match, - std::function stop, - std::function update, - size_t& probe); + inline HandleImpl* FindSlot(const UniqueId64x2& hashed_key, + std::function match, + std::function stop, + std::function update, + size_t& probe); // Re-decrement all displacements in probe path starting from beginning // until (not including) the given handle - void Rollback(const UniqueId64x2& hashed_key, const ClockHandle* h); + inline void Rollback(const UniqueId64x2& hashed_key, const HandleImpl* h); + + // Subtracts `total_charge` from `usage_` and 1 from `occupancy_`. + // Ideally this comes after releasing the entry itself so that we + // actually have the available occupancy/usage that is claimed. + // However, that means total_charge has to be saved from the handle + // before releasing it so that it can be provided to this function. + inline void ReclaimEntryUsage(size_t total_charge); + + // Helper for updating `usage_` for new entry with given `total_charge` + // and evicting if needed under strict_capacity_limit=true rules. This + // means the operation might fail with Status::MemoryLimit. If + // `need_evict_for_occupancy`, then eviction of at least one entry is + // required, and the operation should fail if not possible. + // NOTE: Otherwise, occupancy_ is not managed in this function + inline Status ChargeUsageMaybeEvictStrict(size_t total_charge, + size_t capacity, + bool need_evict_for_occupancy); + + // Helper for updating `usage_` for new entry with given `total_charge` + // and evicting if needed under strict_capacity_limit=false rules. This + // means that updating `usage_` always succeeds even if forced to exceed + // capacity. If `need_evict_for_occupancy`, then eviction of at least one + // entry is required, and the operation should return false if such eviction + // is not possible. `usage_` is not updated in that case. Otherwise, returns + // true, indicating success. + // NOTE: occupancy_ is not managed in this function + inline bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, + size_t capacity, + bool need_evict_for_occupancy); + + // Creates a "detached" handle for returning from an Insert operation that + // cannot be completed by actually inserting into the table. + // Updates `detached_usage_` but not `usage_` nor `occupancy_`. + inline HandleImpl* DetachedInsert(const ClockHandleBasicData& proto); + + // Returns the number of bits used to hash an element in the hash + // table. + static int CalcHashBits(size_t capacity, size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy); private: // data // Number of hash bits used for table index. @@ -468,7 +533,7 @@ class ClockHandleTable { const size_t occupancy_limit_; // Array of slots comprising the hash table. - const std::unique_ptr array_; + const std::unique_ptr array_; // We partition the following members into different cache lines // to avoid false sharing among Lookup, Release, Erase and Insert @@ -487,17 +552,18 @@ class ClockHandleTable { // Part of usage by detached entries (not in table) std::atomic detached_usage_{}; -}; // class ClockHandleTable +}; // class HyperClockTable // A single shard of sharded cache. +template class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { public: - ClockCacheShard(size_t capacity, size_t estimated_value_size, - bool strict_capacity_limit, - CacheMetadataChargePolicy metadata_charge_policy); + ClockCacheShard(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + const typename Table::Opts& opts); // For CacheShard concept - using HandleImpl = ClockHandle; + using HandleImpl = typename Table::HandleImpl; // Hash is lossless hash of 128-bit key using HashVal = UniqueId64x2; using HashCref = const HashVal&; @@ -532,16 +598,16 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { void SetStrictCapacityLimit(bool strict_capacity_limit); Status Insert(const Slice& key, const UniqueId64x2& hashed_key, void* value, - size_t charge, Cache::DeleterFn deleter, ClockHandle** handle, + size_t charge, Cache::DeleterFn deleter, HandleImpl** handle, Cache::Priority priority); - ClockHandle* Lookup(const Slice& key, const UniqueId64x2& hashed_key); + HandleImpl* Lookup(const Slice& key, const UniqueId64x2& hashed_key); - bool Release(ClockHandle* handle, bool useful, bool erase_if_last_ref); + bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref); - bool Release(ClockHandle* handle, bool erase_if_last_ref = false); + bool Release(HandleImpl* handle, bool erase_if_last_ref = false); - bool Ref(ClockHandle* handle); + bool Ref(HandleImpl* handle); void Erase(const Slice& key, const UniqueId64x2& hashed_key); @@ -565,40 +631,29 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { // SecondaryCache not yet supported Status Insert(const Slice& key, const UniqueId64x2& hashed_key, void* value, const Cache::CacheItemHelper* helper, size_t charge, - ClockHandle** handle, Cache::Priority priority) { + HandleImpl** handle, Cache::Priority priority) { return Insert(key, hashed_key, value, charge, helper->del_cb, handle, priority); } - ClockHandle* Lookup(const Slice& key, const UniqueId64x2& hashed_key, - const Cache::CacheItemHelper* /*helper*/, - const Cache::CreateCallback& /*create_cb*/, - Cache::Priority /*priority*/, bool /*wait*/, - Statistics* /*stats*/) { + HandleImpl* Lookup(const Slice& key, const UniqueId64x2& hashed_key, + const Cache::CacheItemHelper* /*helper*/, + const Cache::CreateCallback& /*create_cb*/, + Cache::Priority /*priority*/, bool /*wait*/, + Statistics* /*stats*/) { return Lookup(key, hashed_key); } - bool IsReady(ClockHandle* /*handle*/) { return true; } + bool IsReady(HandleImpl* /*handle*/) { return true; } - void Wait(ClockHandle* /*handle*/) {} + void Wait(HandleImpl* /*handle*/) {} // Acquire/release N references - void TEST_RefN(ClockHandle* handle, size_t n); - void TEST_ReleaseN(ClockHandle* handle, size_t n); - - private: // functions - friend class ClockCache; - friend class ClockCacheTest; - - ClockHandle* DetachedInsert(const ClockHandleBasicData& h); - - // Returns the number of bits used to hash an element in the hash - // table. - static int CalcHashBits(size_t capacity, size_t estimated_value_size, - CacheMetadataChargePolicy metadata_charge_policy); + void TEST_RefN(HandleImpl* handle, size_t n); + void TEST_ReleaseN(HandleImpl* handle, size_t n); private: // data - ClockHandleTable table_; + Table table_; // Maximum total charge of all elements stored in the table. std::atomic capacity_; @@ -611,8 +666,10 @@ class HyperClockCache #ifdef NDEBUG final #endif - : public ShardedCache { + : public ShardedCache> { public: + using Shard = ClockCacheShard; + HyperClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy, @@ -627,6 +684,6 @@ class HyperClockCache DeleterFn GetDeleter(Handle* handle) const override; }; // class HyperClockCache -} // namespace hyper_clock_cache +} // namespace clock_cache } // namespace ROCKSDB_NAMESPACE diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index fbf336f87..e5039394b 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -506,10 +506,14 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) { } // namespace fast_lru_cache -namespace hyper_clock_cache { +namespace clock_cache { class ClockCacheTest : public testing::Test { public: + using Shard = HyperClockCache::Shard; + using Table = HyperClockTable; + using HandleImpl = Shard::HandleImpl; + ClockCacheTest() {} ~ClockCacheTest() override { DeleteShard(); } @@ -523,10 +527,13 @@ class ClockCacheTest : public testing::Test { void NewShard(size_t capacity, bool strict_capacity_limit = true) { DeleteShard(); - shard_ = reinterpret_cast( - port::cacheline_aligned_alloc(sizeof(ClockCacheShard))); - new (shard_) ClockCacheShard(capacity, 1, strict_capacity_limit, - kDontChargeCacheMetadata); + shard_ = + reinterpret_cast(port::cacheline_aligned_alloc(sizeof(Shard))); + + Table::Opts opts; + opts.estimated_value_size = 1; + new (shard_) + Shard(capacity, strict_capacity_limit, kDontChargeCacheMetadata, opts); } Status Insert(const UniqueId64x2& hashed_key, @@ -580,7 +587,7 @@ class ClockCacheTest : public testing::Test { return {(static_cast(key) << 56) + 1234U, 5678U}; } - ClockCacheShard* shard_ = nullptr; + Shard* shard_ = nullptr; }; TEST_F(ClockCacheTest, Misc) { @@ -604,7 +611,8 @@ TEST_F(ClockCacheTest, Misc) { } TEST_F(ClockCacheTest, Limits) { - NewShard(3, false /*strict_capacity_limit*/); + constexpr size_t kCapacity = 3; + NewShard(kCapacity, false /*strict_capacity_limit*/); for (bool strict_capacity_limit : {false, true, false}) { SCOPED_TRACE("strict_capacity_limit = " + std::to_string(strict_capacity_limit)); @@ -628,7 +636,7 @@ TEST_F(ClockCacheTest, Limits) { // Single entry fills capacity { - ClockHandle* h; + HandleImpl* h; ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, 3 /*charge*/, nullptr /*deleter*/, &h, Cache::Priority::LOW)); @@ -644,15 +652,17 @@ TEST_F(ClockCacheTest, Limits) { shard_->Release(h, false /*useful*/, false /*erase_if_last_ref*/); } - // Insert more than table size can handle (cleverly using zero-charge - // entries) to exceed occupancy limit. + // Insert more than table size can handle to exceed occupancy limit. + // (Cleverly using mostly zero-charge entries, but some non-zero to + // verify usage tracking on detached entries.) { size_t n = shard_->GetTableAddressCount() + 1; - std::unique_ptr ha { new ClockHandle* [n] {} }; + std::unique_ptr ha { new HandleImpl* [n] {} }; Status s; for (size_t i = 0; i < n && s.ok(); ++i) { hkey[1] = i; - s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, 0 /*charge*/, + s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, + (i + kCapacity < n) ? 0 : 1 /*charge*/, nullptr /*deleter*/, &ha[i], Cache::Priority::LOW); if (i == 0) { EXPECT_OK(s); @@ -798,7 +808,7 @@ void IncrementIntDeleter(const Slice& /*key*/, void* value) { // Testing calls to CorrectNearOverflow in Release TEST_F(ClockCacheTest, ClockCounterOverflowTest) { NewShard(6, /*strict_capacity_limit*/ false); - ClockHandle* h; + HandleImpl* h; int deleted = 0; UniqueId64x2 hkey = TestHashedKey('x'); ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, &deleted, 1, @@ -840,18 +850,18 @@ TEST_F(ClockCacheTest, CollidingInsertEraseTest) { Slice key2 = TestKey(hkey2); UniqueId64x2 hkey3 = TestHashedKey('z'); Slice key3 = TestKey(hkey3); - ClockHandle* h1; + HandleImpl* h1; ASSERT_OK(shard_->Insert(key1, hkey1, &deleted, 1, IncrementIntDeleter, &h1, Cache::Priority::HIGH)); - ClockHandle* h2; + HandleImpl* h2; ASSERT_OK(shard_->Insert(key2, hkey2, &deleted, 1, IncrementIntDeleter, &h2, Cache::Priority::HIGH)); - ClockHandle* h3; + HandleImpl* h3; ASSERT_OK(shard_->Insert(key3, hkey3, &deleted, 1, IncrementIntDeleter, &h3, Cache::Priority::HIGH)); // Can repeatedly lookup+release despite the hash collision - ClockHandle* tmp_h; + HandleImpl* tmp_h; for (bool erase_if_last_ref : {true, false}) { // but not last ref tmp_h = shard_->Lookup(key1, hkey1); ASSERT_EQ(h1, tmp_h); @@ -999,7 +1009,7 @@ TEST_F(ClockCacheTest, TableSizesTest) { } } -} // namespace hyper_clock_cache +} // namespace clock_cache class TestSecondaryCache : public SecondaryCache { public: