Refactor (Hyper)ClockCache code for upcoming changes (#11572)

Summary:
Separate out some functionality that will be common to both static and dynamic HCC into BaseClockTable. Table::InsertState and GrowIfNeeded will be used by the dynamic HCC so don't make much sense right now.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11572

Test Plan:
existing tests. No functional changes intended.

Performance test in subsequent PR https://github.com/facebook/rocksdb/issues/11601

Reviewed By: jowlyzhang

Differential Revision: D47110496

Pulled By: pdillinger

fbshipit-source-id: 379bd433322a42ea28c0043b41ec24956d21e7aa
oxigraph-main
Peter Dillinger 1 year ago committed by Facebook GitHub Bot
parent 854eb76a8c
commit c3c84b3397
  1. 614
      cache/clock_cache.cc
  2. 230
      cache/clock_cache.h

614
cache/clock_cache.cc vendored

@ -9,6 +9,7 @@
#include "cache/clock_cache.h" #include "cache/clock_cache.h"
#include <cassert>
#include <functional> #include <functional>
#include <numeric> #include <numeric>
@ -118,74 +119,6 @@ inline bool ClockUpdate(ClockHandle& h) {
} }
} }
} // namespace
void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
if (helper->del_cb) {
helper->del_cb(value, allocator);
}
}
HyperClockTable::HyperClockTable(
size_t capacity, bool /*strict_capacity_limit*/,
CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
const Opts& opts)
: length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
metadata_charge_policy)),
length_bits_mask_((size_t{1} << length_bits_) - 1),
occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
kStrictLoadFactor)),
array_(new HandleImpl[size_t{1} << length_bits_]),
allocator_(allocator),
eviction_callback_(*eviction_callback),
hash_seed_(*hash_seed) {
if (metadata_charge_policy ==
CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
}
static_assert(sizeof(HandleImpl) == 64U,
"Expecting size / alignment with common cache line size");
}
HyperClockTable::~HyperClockTable() {
// Assumes there are no references or active operations on any slot/element
// in the table.
for (size_t i = 0; i < GetTableSize(); i++) {
HandleImpl& h = array_[i];
switch (h.meta >> ClockHandle::kStateShift) {
case ClockHandle::kStateEmpty:
// noop
break;
case ClockHandle::kStateInvisible: // rare but possible
case ClockHandle::kStateVisible:
assert(GetRefcount(h.meta) == 0);
h.FreeData(allocator_);
#ifndef NDEBUG
Rollback(h.hashed_key, &h);
ReclaimEntryUsage(h.GetTotalCharge());
#endif
break;
// otherwise
default:
assert(false);
break;
}
}
#ifndef NDEBUG
for (size_t i = 0; i < GetTableSize(); i++) {
assert(array_[i].displacements.load() == 0);
}
#endif
assert(usage_.load() == 0 ||
usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
assert(occupancy_ == 0);
}
// If an entry doesn't receive clock updates but is repeatedly referenced & // If an entry doesn't receive clock updates but is repeatedly referenced &
// released, the acquire and release counters could overflow without some // released, the acquire and release counters could overflow without some
// intervention. This is that intervention, which should be inexpensive // intervention. This is that intervention, which should be inexpensive
@ -259,8 +192,170 @@ inline void CorrectNearOverflow(uint64_t old_meta,
} }
} }
inline Status HyperClockTable::ChargeUsageMaybeEvictStrict( inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { uint64_t initial_countdown, bool* already_matches) {
assert(*already_matches == false);
// Optimistically transition the slot from "empty" to
// "under construction" (no effect on other states)
uint64_t old_meta = h.meta.fetch_or(
uint64_t{ClockHandle::kStateOccupiedBit} << ClockHandle::kStateShift,
std::memory_order_acq_rel);
uint64_t old_state = old_meta >> ClockHandle::kStateShift;
if (old_state == ClockHandle::kStateEmpty) {
// We've started inserting into an available slot, and taken
// ownership.
return true;
} else if (old_state != ClockHandle::kStateVisible) {
// Slot not usable / touchable now
return false;
}
// Existing, visible entry, which might be a match.
// But first, we need to acquire a ref to read it. In fact, number of
// refs for initial countdown, so that we boost the clock state if
// this is a match.
old_meta =
h.meta.fetch_add(ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
// Like Lookup
if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) {
// Acquired a read reference
if (h.hashed_key == proto.hashed_key) {
// Match. Release in a way that boosts the clock state
old_meta =
h.meta.fetch_add(ClockHandle::kReleaseIncrement * initial_countdown,
std::memory_order_acq_rel);
// Correct for possible (but rare) overflow
CorrectNearOverflow(old_meta, h.meta);
// Insert detached instead (only if return handle needed)
*already_matches = true;
return false;
} else {
// Mismatch. Pretend we never took the reference
old_meta =
h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
}
} else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
ClockHandle::kStateInvisible)) {
// Pretend we never took the reference
// WART/FIXME?: there's a tiny chance we release last ref to invisible
// entry here. If that happens, we let eviction take care of it.
old_meta =
h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
} else {
// For other states, incrementing the acquire counter has no effect
// so we don't need to undo it.
// Slot not usable / touchable now.
}
(void)old_meta;
return false;
}
inline void FinishSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
uint64_t initial_countdown, bool keep_ref) {
// Save data fields
ClockHandleBasicData* h_alias = &h;
*h_alias = proto;
// Transition from "under construction" state to "visible" state
uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
<< ClockHandle::kStateShift;
// Maybe with an outstanding reference
new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
new_meta |= (initial_countdown - keep_ref)
<< ClockHandle::kReleaseCounterShift;
#ifndef NDEBUG
// Save the state transition, with assertion
uint64_t old_meta = h.meta.exchange(new_meta, std::memory_order_release);
assert(old_meta >> ClockHandle::kStateShift ==
ClockHandle::kStateConstruction);
#else
// Save the state transition
h.meta.store(new_meta, std::memory_order_release);
#endif
}
bool TryInsert(const ClockHandleBasicData& proto, ClockHandle& h,
uint64_t initial_countdown, bool keep_ref,
bool* already_matches) {
bool b = BeginSlotInsert(proto, h, initial_countdown, already_matches);
if (b) {
FinishSlotInsert(proto, h, initial_countdown, keep_ref);
}
return b;
}
} // namespace
void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
if (helper->del_cb) {
helper->del_cb(value, allocator);
}
}
template <class HandleImpl>
HandleImpl* BaseClockTable::StandaloneInsert(
const ClockHandleBasicData& proto) {
// Heap allocated separate from table
HandleImpl* h = new HandleImpl();
ClockHandleBasicData* h_alias = h;
*h_alias = proto;
h->SetStandalone();
// Single reference (standalone entries only created if returning a refed
// Handle back to user)
uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
<< ClockHandle::kStateShift;
meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
h->meta.store(meta, std::memory_order_release);
// Keep track of how much of usage is standalone
standalone_usage_.fetch_add(proto.GetTotalCharge(),
std::memory_order_relaxed);
return h;
}
template <class Table>
typename Table::HandleImpl* BaseClockTable::CreateStandalone(
ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit,
bool allow_uncharged) {
Table& derived = static_cast<Table&>(*this);
typename Table::InsertState state;
derived.StartInsert(state);
const size_t total_charge = proto.GetTotalCharge();
if (strict_capacity_limit) {
Status s = ChargeUsageMaybeEvictStrict<Table>(
total_charge, capacity,
/*need_evict_for_occupancy=*/false, state);
if (!s.ok()) {
if (allow_uncharged) {
proto.total_charge = 0;
} else {
return nullptr;
}
}
} else {
// Case strict_capacity_limit == false
bool success = ChargeUsageMaybeEvictNonStrict<Table>(
total_charge, capacity,
/*need_evict_for_occupancy=*/false, state);
if (!success) {
// Force the issue
usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
}
return StandaloneInsert<typename Table::HandleImpl>(proto);
}
template <class Table>
Status BaseClockTable::ChargeUsageMaybeEvictStrict(
size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
typename Table::InsertState& state) {
if (total_charge > capacity) { if (total_charge > capacity) {
return Status::MemoryLimit( return Status::MemoryLimit(
"Cache entry too large for a single cache shard: " + "Cache entry too large for a single cache shard: " +
@ -287,7 +382,8 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
if (request_evict_charge > 0) { if (request_evict_charge > 0) {
size_t evicted_charge = 0; size_t evicted_charge = 0;
size_t evicted_count = 0; size_t evicted_count = 0;
Evict(request_evict_charge, &evicted_charge, &evicted_count); static_cast<Table*>(this)->Evict(request_evict_charge, &evicted_charge,
&evicted_count, state);
occupancy_.fetch_sub(evicted_count, std::memory_order_release); occupancy_.fetch_sub(evicted_count, std::memory_order_release);
if (LIKELY(evicted_charge > need_evict_charge)) { if (LIKELY(evicted_charge > need_evict_charge)) {
assert(evicted_count > 0); assert(evicted_count > 0);
@ -316,8 +412,10 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
return Status::OK(); return Status::OK();
} }
inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict( template <class Table>
size_t total_charge, size_t capacity, bool need_evict_for_occupancy) { inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict(
size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
typename Table::InsertState& state) {
// For simplicity, we consider that either the cache can accept the insert // For simplicity, we consider that either the cache can accept the insert
// with no evictions, or we must evict enough to make (at least) enough // with no evictions, or we must evict enough to make (at least) enough
// space. It could lead to unnecessary failures or excessive evictions in // space. It could lead to unnecessary failures or excessive evictions in
@ -354,7 +452,8 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
size_t evicted_charge = 0; size_t evicted_charge = 0;
size_t evicted_count = 0; size_t evicted_count = 0;
if (need_evict_charge > 0) { if (need_evict_charge > 0) {
Evict(need_evict_charge, &evicted_charge, &evicted_count); static_cast<Table*>(this)->Evict(need_evict_charge, &evicted_charge,
&evicted_count, state);
// Deal with potential occupancy deficit // Deal with potential occupancy deficit
if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) { if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) {
assert(evicted_charge == 0); assert(evicted_charge == 0);
@ -373,28 +472,17 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
return true; return true;
} }
inline HyperClockTable::HandleImpl* HyperClockTable::StandaloneInsert( template <class Table>
const ClockHandleBasicData& proto) { Status BaseClockTable::Insert(const ClockHandleBasicData& proto,
// Heap allocated separate from table typename Table::HandleImpl** handle,
HandleImpl* h = new HandleImpl(); Cache::Priority priority, size_t capacity,
ClockHandleBasicData* h_alias = h; bool strict_capacity_limit) {
*h_alias = proto; using HandleImpl = typename Table::HandleImpl;
h->SetStandalone(); Table& derived = static_cast<Table&>(*this);
// Single reference (standalone entries only created if returning a refed
// Handle back to user) typename Table::InsertState state;
uint64_t meta = uint64_t{ClockHandle::kStateInvisible} derived.StartInsert(state);
<< ClockHandle::kStateShift;
meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
h->meta.store(meta, std::memory_order_release);
// Keep track of how much of usage is standalone
standalone_usage_.fetch_add(proto.GetTotalCharge(),
std::memory_order_relaxed);
return h;
}
Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
HandleImpl** handle, Cache::Priority priority,
size_t capacity, bool strict_capacity_limit) {
// Do we have the available occupancy? Optimistically assume we do // Do we have the available occupancy? Optimistically assume we do
// and deal with it if we don't. // and deal with it if we don't.
size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire); size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire);
@ -402,23 +490,24 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
occupancy_.fetch_sub(1, std::memory_order_relaxed); occupancy_.fetch_sub(1, std::memory_order_relaxed);
}; };
// Whether we over-committed and need an eviction to make up for it // Whether we over-committed and need an eviction to make up for it
bool need_evict_for_occupancy = old_occupancy >= occupancy_limit_; bool need_evict_for_occupancy =
!derived.GrowIfNeeded(old_occupancy + 1, state);
// Usage/capacity handling is somewhat different depending on // Usage/capacity handling is somewhat different depending on
// strict_capacity_limit, but mostly pessimistic. // strict_capacity_limit, but mostly pessimistic.
bool use_standalone_insert = false; bool use_standalone_insert = false;
const size_t total_charge = proto.GetTotalCharge(); const size_t total_charge = proto.GetTotalCharge();
if (strict_capacity_limit) { if (strict_capacity_limit) {
Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity, Status s = ChargeUsageMaybeEvictStrict<Table>(
need_evict_for_occupancy); total_charge, capacity, need_evict_for_occupancy, state);
if (!s.ok()) { if (!s.ok()) {
revert_occupancy_fn(); revert_occupancy_fn();
return s; return s;
} }
} else { } else {
// Case strict_capacity_limit == false // Case strict_capacity_limit == false
bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity, bool success = ChargeUsageMaybeEvictNonStrict<Table>(
need_evict_for_occupancy); total_charge, capacity, need_evict_for_occupancy, state);
if (!success) { if (!success) {
revert_occupancy_fn(); revert_occupancy_fn();
if (handle == nullptr) { if (handle == nullptr) {
@ -451,115 +540,17 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
uint64_t initial_countdown = GetInitialCountdown(priority); uint64_t initial_countdown = GetInitialCountdown(priority);
assert(initial_countdown > 0); assert(initial_countdown > 0);
size_t probe = 0; HandleImpl* e =
HandleImpl* e = FindSlot( derived.DoInsert(proto, initial_countdown, handle != nullptr, state);
proto.hashed_key,
[&](HandleImpl* h) {
// Optimistically transition the slot from "empty" to
// "under construction" (no effect on other states)
uint64_t old_meta =
h->meta.fetch_or(uint64_t{ClockHandle::kStateOccupiedBit}
<< ClockHandle::kStateShift,
std::memory_order_acq_rel);
uint64_t old_state = old_meta >> ClockHandle::kStateShift;
if (old_state == ClockHandle::kStateEmpty) {
// We've started inserting into an available slot, and taken
// ownership Save data fields
ClockHandleBasicData* h_alias = h;
*h_alias = proto;
// Transition from "under construction" state to "visible" state
uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
<< ClockHandle::kStateShift;
// Maybe with an outstanding reference
new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
new_meta |= (initial_countdown - (handle != nullptr))
<< ClockHandle::kReleaseCounterShift;
#ifndef NDEBUG if (e) {
// Save the state transition, with assertion
old_meta = h->meta.exchange(new_meta, std::memory_order_release);
assert(old_meta >> ClockHandle::kStateShift ==
ClockHandle::kStateConstruction);
#else
// Save the state transition
h->meta.store(new_meta, std::memory_order_release);
#endif
return true;
} else if (old_state != ClockHandle::kStateVisible) {
// Slot not usable / touchable now
return false;
}
// Existing, visible entry, which might be a match.
// But first, we need to acquire a ref to read it. In fact, number of
// refs for initial countdown, so that we boost the clock state if
// this is a match.
old_meta = h->meta.fetch_add(
ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
// Like Lookup
if ((old_meta >> ClockHandle::kStateShift) ==
ClockHandle::kStateVisible) {
// Acquired a read reference
if (h->hashed_key == proto.hashed_key) {
// Match. Release in a way that boosts the clock state
old_meta = h->meta.fetch_add(
ClockHandle::kReleaseIncrement * initial_countdown,
std::memory_order_acq_rel);
// Correct for possible (but rare) overflow
CorrectNearOverflow(old_meta, h->meta);
// Insert standalone instead (only if return handle needed)
use_standalone_insert = true;
return true;
} else {
// Mismatch. Pretend we never took the reference
old_meta = h->meta.fetch_sub(
ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
}
} else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
ClockHandle::kStateInvisible)) {
// Pretend we never took the reference
// WART: there's a tiny chance we release last ref to invisible
// entry here. If that happens, we let eviction take care of it.
old_meta = h->meta.fetch_sub(
ClockHandle::kAcquireIncrement * initial_countdown,
std::memory_order_acq_rel);
} else {
// For other states, incrementing the acquire counter has no effect
// so we don't need to undo it.
// Slot not usable / touchable now.
}
(void)old_meta;
return false;
},
[&](HandleImpl* /*h*/) { return false; },
[&](HandleImpl* h) {
h->displacements.fetch_add(1, std::memory_order_relaxed);
},
probe);
if (e == nullptr) {
// Occupancy check and never abort FindSlot above should generally
// prevent this, except it's theoretically possible for other threads
// to evict and replace entries in the right order to hit every slot
// when it is populated. Assuming random hashing, the chance of that
// should be no higher than pow(kStrictLoadFactor, n) for n slots.
// That should be infeasible for roughly n >= 256, so if this assertion
// fails, that suggests something is going wrong.
assert(GetTableSize() < 256);
use_standalone_insert = true;
}
if (!use_standalone_insert) {
// Successfully inserted // Successfully inserted
if (handle) { if (handle) {
*handle = e; *handle = e;
} }
return Status::OK(); return Status::OK();
} }
// Roll back table insertion // Not inserted
Rollback(proto.hashed_key, e);
revert_occupancy_fn(); revert_occupancy_fn();
// Maybe fall back on standalone insert // Maybe fall back on standalone insert
if (handle == nullptr) { if (handle == nullptr) {
@ -568,12 +559,14 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
proto.FreeData(allocator_); proto.FreeData(allocator_);
return Status::OK(); return Status::OK();
} }
use_standalone_insert = true;
} }
// Run standalone insert // Run standalone insert
assert(use_standalone_insert); assert(use_standalone_insert);
*handle = StandaloneInsert(proto); *handle = StandaloneInsert<HandleImpl>(proto);
// The OkOverwritten status is used to count "redundant" insertions into // The OkOverwritten status is used to count "redundant" insertions into
// block cache. This implementation doesn't strictly check for redundant // block cache. This implementation doesn't strictly check for redundant
@ -583,32 +576,146 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
return Status::OkOverwritten(); return Status::OkOverwritten();
} }
HyperClockTable::HandleImpl* HyperClockTable::CreateStandalone( void BaseClockTable::Ref(ClockHandle& h) {
ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit, // Increment acquire counter
bool allow_uncharged) { uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
const size_t total_charge = proto.GetTotalCharge(); std::memory_order_acquire);
if (strict_capacity_limit) {
Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity, assert((old_meta >> ClockHandle::kStateShift) &
/*need_evict_for_occupancy=*/false); ClockHandle::kStateShareableBit);
if (!s.ok()) { // Must have already had a reference
if (allow_uncharged) { assert(GetRefcount(old_meta) > 0);
proto.total_charge = 0; (void)old_meta;
} else { }
return nullptr;
} #ifndef NDEBUG
} void BaseClockTable::TEST_RefN(ClockHandle& h, size_t n) {
} else { // Increment acquire counter
// Case strict_capacity_limit == false uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
bool success = std::memory_order_acquire);
ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
/*need_evict_for_occupancy=*/false); assert((old_meta >> ClockHandle::kStateShift) &
if (!success) { ClockHandle::kStateShareableBit);
// Force the issue (void)old_meta;
usage_.fetch_add(total_charge, std::memory_order_relaxed); }
void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) {
assert(n > 0);
// Like n-1 Releases, but assumes one more will happen in the caller to take
// care of anything like erasing an unreferenced, invisible entry.
uint64_t old_meta = h->meta.fetch_add(
(n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
assert((old_meta >> ClockHandle::kStateShift) &
ClockHandle::kStateShareableBit);
(void)old_meta;
}
#endif
HyperClockTable::HyperClockTable(
size_t capacity, bool /*strict_capacity_limit*/,
CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
const Opts& opts)
: BaseClockTable(metadata_charge_policy, allocator, eviction_callback,
hash_seed),
length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
metadata_charge_policy)),
length_bits_mask_((size_t{1} << length_bits_) - 1),
occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
kStrictLoadFactor)),
array_(new HandleImpl[size_t{1} << length_bits_]) {
if (metadata_charge_policy ==
CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
}
static_assert(sizeof(HandleImpl) == 64U,
"Expecting size / alignment with common cache line size");
}
HyperClockTable::~HyperClockTable() {
// Assumes there are no references or active operations on any slot/element
// in the table.
for (size_t i = 0; i < GetTableSize(); i++) {
HandleImpl& h = array_[i];
switch (h.meta >> ClockHandle::kStateShift) {
case ClockHandle::kStateEmpty:
// noop
break;
case ClockHandle::kStateInvisible: // rare but possible
case ClockHandle::kStateVisible:
assert(GetRefcount(h.meta) == 0);
h.FreeData(allocator_);
#ifndef NDEBUG
Rollback(h.hashed_key, &h);
ReclaimEntryUsage(h.GetTotalCharge());
#endif
break;
// otherwise
default:
assert(false);
break;
} }
} }
return StandaloneInsert(proto); #ifndef NDEBUG
for (size_t i = 0; i < GetTableSize(); i++) {
assert(array_[i].displacements.load() == 0);
}
#endif
assert(usage_.load() == 0 ||
usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
assert(occupancy_ == 0);
}
void HyperClockTable::StartInsert(InsertState&) {}
bool HyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) {
return new_occupancy <= occupancy_limit_;
}
HyperClockTable::HandleImpl* HyperClockTable::DoInsert(
const ClockHandleBasicData& proto, uint64_t initial_countdown,
bool keep_ref, InsertState&) {
size_t probe = 0;
bool already_matches = false;
HandleImpl* e = FindSlot(
proto.hashed_key,
[&](HandleImpl* h) {
// FIXME: simplify and handle in abort_fn below?
bool inserted =
TryInsert(proto, *h, initial_countdown, keep_ref, &already_matches);
return inserted || already_matches;
},
[&](HandleImpl* /*h*/) { return false; },
[&](HandleImpl* h) {
h->displacements.fetch_add(1, std::memory_order_relaxed);
},
probe);
if (e == nullptr) {
// Occupancy check and never abort FindSlot above should generally
// prevent this, except it's theoretically possible for other threads
// to evict and replace entries in the right order to hit every slot
// when it is populated. Assuming random hashing, the chance of that
// should be no higher than pow(kStrictLoadFactor, n) for n slots.
// That should be infeasible for roughly n >= 256, so if this assertion
// fails, that suggests something is going wrong.
assert(GetTableSize() < 256);
// WART/FIXME: need to roll back every slot
already_matches = true;
}
if (!already_matches) {
// Successfully inserted
assert(e);
return e;
}
// Roll back displacements from failed table insertion
Rollback(proto.hashed_key, e);
// Insertion skipped
return nullptr;
} }
HyperClockTable::HandleImpl* HyperClockTable::Lookup( HyperClockTable::HandleImpl* HyperClockTable::Lookup(
@ -753,40 +860,17 @@ bool HyperClockTable::Release(HandleImpl* h, bool useful,
} }
} }
void HyperClockTable::Ref(HandleImpl& h) { #ifndef NDEBUG
// Increment acquire counter
uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
std::memory_order_acquire);
assert((old_meta >> ClockHandle::kStateShift) &
ClockHandle::kStateShareableBit);
// Must have already had a reference
assert(GetRefcount(old_meta) > 0);
(void)old_meta;
}
void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) {
// Increment acquire counter
uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
std::memory_order_acquire);
assert((old_meta >> ClockHandle::kStateShift) &
ClockHandle::kStateShareableBit);
(void)old_meta;
}
void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) {
if (n > 0) { if (n > 0) {
// Split into n - 1 and 1 steps. // Do n-1 simple releases first
uint64_t old_meta = h->meta.fetch_add( TEST_ReleaseNMinus1(h, n);
(n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
assert((old_meta >> ClockHandle::kStateShift) &
ClockHandle::kStateShareableBit);
(void)old_meta;
// Then the last release might be more involved
Release(h, /*useful*/ true, /*erase_if_last_ref*/ false); Release(h, /*useful*/ true, /*erase_if_last_ref*/ false);
} }
} }
#endif
void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
size_t probe = 0; size_t probe = 0;
@ -978,7 +1062,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) {
} }
inline void HyperClockTable::Evict(size_t requested_charge, inline void HyperClockTable::Evict(size_t requested_charge,
size_t* freed_charge, size_t* freed_count) { size_t* freed_charge, size_t* freed_count,
InsertState&) {
// precondition // precondition
assert(requested_charge > 0); assert(requested_charge > 0);
@ -1146,18 +1231,15 @@ Status ClockCacheShard<Table>::Insert(const Slice& key,
proto.value = value; proto.value = value;
proto.helper = helper; proto.helper = helper;
proto.total_charge = charge; proto.total_charge = charge;
return table_.Insert(proto, handle, priority, return table_.template Insert<Table>(
capacity_.load(std::memory_order_relaxed), proto, handle, priority, capacity_.load(std::memory_order_relaxed),
strict_capacity_limit_.load(std::memory_order_relaxed)); strict_capacity_limit_.load(std::memory_order_relaxed));
} }
template <class Table> template <class Table>
typename ClockCacheShard<Table>::HandleImpl* typename Table::HandleImpl* ClockCacheShard<Table>::CreateStandalone(
ClockCacheShard<Table>::CreateStandalone(const Slice& key, const Slice& key, const UniqueId64x2& hashed_key, Cache::ObjectPtr obj,
const UniqueId64x2& hashed_key, const Cache::CacheItemHelper* helper, size_t charge, bool allow_uncharged) {
Cache::ObjectPtr obj,
const Cache::CacheItemHelper* helper,
size_t charge, bool allow_uncharged) {
if (UNLIKELY(key.size() != kCacheKeySize)) { if (UNLIKELY(key.size() != kCacheKeySize)) {
return nullptr; return nullptr;
} }
@ -1166,7 +1248,7 @@ ClockCacheShard<Table>::CreateStandalone(const Slice& key,
proto.value = obj; proto.value = obj;
proto.helper = helper; proto.helper = helper;
proto.total_charge = charge; proto.total_charge = charge;
return table_.CreateStandalone( return table_.template CreateStandalone<Table>(
proto, capacity_.load(std::memory_order_relaxed), proto, capacity_.load(std::memory_order_relaxed),
strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged); strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged);
} }
@ -1198,6 +1280,7 @@ bool ClockCacheShard<Table>::Release(HandleImpl* handle, bool useful,
return table_.Release(handle, useful, erase_if_last_ref); return table_.Release(handle, useful, erase_if_last_ref);
} }
#ifndef NDEBUG
template <class Table> template <class Table>
void ClockCacheShard<Table>::TEST_RefN(HandleImpl* h, size_t n) { void ClockCacheShard<Table>::TEST_RefN(HandleImpl* h, size_t n) {
table_.TEST_RefN(*h, n); table_.TEST_RefN(*h, n);
@ -1207,6 +1290,7 @@ template <class Table>
void ClockCacheShard<Table>::TEST_ReleaseN(HandleImpl* h, size_t n) { void ClockCacheShard<Table>::TEST_ReleaseN(HandleImpl* h, size_t n) {
table_.TEST_ReleaseN(h, n); table_.TEST_ReleaseN(h, n);
} }
#endif
template <class Table> template <class Table>
bool ClockCacheShard<Table>::Release(HandleImpl* handle, bool ClockCacheShard<Table>::Release(HandleImpl* handle,

230
cache/clock_cache.h vendored

@ -374,11 +374,123 @@ struct ClockHandle : public ClockHandleBasicData {
// See above // See above
std::atomic<uint64_t> meta{}; std::atomic<uint64_t> meta{};
// Anticipating use for SecondaryCache support // Whether this is a "deteched" handle that is independently allocated
void* reserved_for_future_use = nullptr; // with `new` (so must be deleted with `delete`).
// TODO: ideally this would be packed into some other data field, such
// as upper bits of total_charge, but that incurs a measurable performance
// regression.
bool standalone = false;
inline bool IsStandalone() const { return standalone; }
inline void SetStandalone() { standalone = true; }
}; // struct ClockHandle }; // struct ClockHandle
class HyperClockTable { class BaseClockTable {
public:
BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback,
const uint32_t* hash_seed)
: metadata_charge_policy_(metadata_charge_policy),
allocator_(allocator),
eviction_callback_(*eviction_callback),
hash_seed_(*hash_seed) {}
// Creates a "standalone" handle for returning from an Insert operation that
// cannot be completed by actually inserting into the table.
// Updates `standalone_usage_` but not `usage_` nor `occupancy_`.
template <class HandleImpl>
HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto);
template <class Table>
typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto,
size_t capacity,
bool strict_capacity_limit,
bool allow_uncharged);
// Helper for updating `usage_` for new entry with given `total_charge`
// and evicting if needed under strict_capacity_limit=true rules. This
// means the operation might fail with Status::MemoryLimit. If
// `need_evict_for_occupancy`, then eviction of at least one entry is
// required, and the operation should fail if not possible.
// NOTE: Otherwise, occupancy_ is not managed in this function
template <class Table>
Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity,
bool need_evict_for_occupancy,
typename Table::InsertState& state);
// Helper for updating `usage_` for new entry with given `total_charge`
// and evicting if needed under strict_capacity_limit=false rules. This
// means that updating `usage_` always succeeds even if forced to exceed
// capacity. If `need_evict_for_occupancy`, then eviction of at least one
// entry is required, and the operation should return false if such eviction
// is not possible. `usage_` is not updated in that case. Otherwise, returns
// true, indicating success.
// NOTE: occupancy_ is not managed in this function
template <class Table>
bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity,
bool need_evict_for_occupancy,
typename Table::InsertState& state);
template <class Table>
Status Insert(const ClockHandleBasicData& proto,
typename Table::HandleImpl** handle, Cache::Priority priority,
size_t capacity, bool strict_capacity_limit);
void Ref(ClockHandle& handle);
size_t GetOccupancy() const {
return occupancy_.load(std::memory_order_relaxed);
}
size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); }
size_t GetStandaloneUsage() const {
return standalone_usage_.load(std::memory_order_relaxed);
}
uint32_t GetHashSeed() const { return hash_seed_; }
#ifndef NDEBUG
// Acquire N references
void TEST_RefN(ClockHandle& handle, size_t n);
// Helper for TEST_ReleaseN
void TEST_ReleaseNMinus1(ClockHandle* handle, size_t n);
#endif
protected:
// We partition the following members into different cache lines
// to avoid false sharing among Lookup, Release, Erase and Insert
// operations in ClockCacheShard.
// Clock algorithm sweep pointer.
std::atomic<uint64_t> clock_pointer_{};
ALIGN_AS(CACHE_LINE_SIZE)
// Number of elements in the table.
std::atomic<size_t> occupancy_{};
// Memory usage by entries tracked by the cache (including standalone)
std::atomic<size_t> usage_{};
// Part of usage by standalone entries (not in table)
std::atomic<size_t> standalone_usage_{};
ALIGN_AS(CACHE_LINE_SIZE)
const CacheMetadataChargePolicy metadata_charge_policy_;
// From Cache, for deleter
MemoryAllocator* const allocator_;
// A reference to Cache::eviction_callback_
const Cache::EvictionCallback& eviction_callback_;
// A reference to ShardedCacheBase::hash_seed_
const uint32_t& hash_seed_;
};
class HyperClockTable : public BaseClockTable {
public: public:
// Target size to be exactly a common cache line size (see static_assert in // Target size to be exactly a common cache line size (see static_assert in
// clock_cache.cc) // clock_cache.cc)
@ -387,16 +499,6 @@ class HyperClockTable {
// up in this slot or a higher one. // up in this slot or a higher one.
std::atomic<uint32_t> displacements{}; std::atomic<uint32_t> displacements{};
// Whether this is a "deteched" handle that is independently allocated
// with `new` (so must be deleted with `delete`).
// TODO: ideally this would be packed into some other data field, such
// as upper bits of total_charge, but that incurs a measurable performance
// regression.
bool standalone = false;
inline bool IsStandalone() const { return standalone; }
inline void SetStandalone() { standalone = true; }
}; // struct HandleImpl }; // struct HandleImpl
struct Opts { struct Opts {
@ -410,20 +512,28 @@ class HyperClockTable {
const uint32_t* hash_seed, const Opts& opts); const uint32_t* hash_seed, const Opts& opts);
~HyperClockTable(); ~HyperClockTable();
Status Insert(const ClockHandleBasicData& proto, HandleImpl** handle, // For BaseClockTable::Insert
Cache::Priority priority, size_t capacity, struct InsertState {};
bool strict_capacity_limit);
void StartInsert(InsertState& state);
HandleImpl* CreateStandalone(ClockHandleBasicData& proto, size_t capacity, // Returns true iff there is room for the proposed number of entries.
bool strict_capacity_limit, bool GrowIfNeeded(size_t new_occupancy, InsertState& state);
bool allow_uncharged);
HandleImpl* DoInsert(const ClockHandleBasicData& proto,
uint64_t initial_countdown, bool take_ref,
InsertState& state);
// Runs the clock eviction algorithm trying to reclaim at least
// requested_charge. Returns how much is evicted, which could be less
// if it appears impossible to evict the requested amount without blocking.
void Evict(size_t requested_charge, size_t* freed_charge, size_t* freed_count,
InsertState& state);
HandleImpl* Lookup(const UniqueId64x2& hashed_key); HandleImpl* Lookup(const UniqueId64x2& hashed_key);
bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref); bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref);
void Ref(HandleImpl& handle);
void Erase(const UniqueId64x2& hashed_key); void Erase(const UniqueId64x2& hashed_key);
void ConstApplyToEntriesRange(std::function<void(const HandleImpl&)> func, void ConstApplyToEntriesRange(std::function<void(const HandleImpl&)> func,
@ -436,23 +546,11 @@ class HyperClockTable {
int GetLengthBits() const { return length_bits_; } int GetLengthBits() const { return length_bits_; }
size_t GetOccupancy() const {
return occupancy_.load(std::memory_order_relaxed);
}
size_t GetOccupancyLimit() const { return occupancy_limit_; } size_t GetOccupancyLimit() const { return occupancy_limit_; }
size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); } #ifndef NDEBUG
void TEST_ReleaseN(HandleImpl* h, size_t n);
size_t GetStandaloneUsage() const { #endif
return standalone_usage_.load(std::memory_order_relaxed);
}
uint32_t GetHashSeed() const { return hash_seed_; }
// Acquire/release N references
void TEST_RefN(HandleImpl& handle, size_t n);
void TEST_ReleaseN(HandleImpl* handle, size_t n);
private: // functions private: // functions
// Returns x mod 2^{length_bits_}. // Returns x mod 2^{length_bits_}.
@ -460,12 +558,6 @@ class HyperClockTable {
return static_cast<size_t>(x) & length_bits_mask_; return static_cast<size_t>(x) & length_bits_mask_;
} }
// Runs the clock eviction algorithm trying to reclaim at least
// requested_charge. Returns how much is evicted, which could be less
// if it appears impossible to evict the requested amount without blocking.
inline void Evict(size_t requested_charge, size_t* freed_charge,
size_t* freed_count);
// Returns the first slot in the probe sequence, starting from the given // Returns the first slot in the probe sequence, starting from the given
// probe number, with a handle e such that match(e) is true. At every // probe number, with a handle e such that match(e) is true. At every
// step, the function first tests whether match(e) holds. If this is false, // step, the function first tests whether match(e) holds. If this is false,
@ -494,33 +586,6 @@ class HyperClockTable {
// before releasing it so that it can be provided to this function. // before releasing it so that it can be provided to this function.
inline void ReclaimEntryUsage(size_t total_charge); inline void ReclaimEntryUsage(size_t total_charge);
// Helper for updating `usage_` for new entry with given `total_charge`
// and evicting if needed under strict_capacity_limit=true rules. This
// means the operation might fail with Status::MemoryLimit. If
// `need_evict_for_occupancy`, then eviction of at least one entry is
// required, and the operation should fail if not possible.
// NOTE: Otherwise, occupancy_ is not managed in this function
inline Status ChargeUsageMaybeEvictStrict(size_t total_charge,
size_t capacity,
bool need_evict_for_occupancy);
// Helper for updating `usage_` for new entry with given `total_charge`
// and evicting if needed under strict_capacity_limit=false rules. This
// means that updating `usage_` always succeeds even if forced to exceed
// capacity. If `need_evict_for_occupancy`, then eviction of at least one
// entry is required, and the operation should return false if such eviction
// is not possible. `usage_` is not updated in that case. Otherwise, returns
// true, indicating success.
// NOTE: occupancy_ is not managed in this function
inline bool ChargeUsageMaybeEvictNonStrict(size_t total_charge,
size_t capacity,
bool need_evict_for_occupancy);
// Creates a "standalone" handle for returning from an Insert operation that
// cannot be completed by actually inserting into the table.
// Updates `standalone_usage_` but not `usage_` nor `occupancy_`.
inline HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto);
MemoryAllocator* GetAllocator() const { return allocator_; } MemoryAllocator* GetAllocator() const { return allocator_; }
// Returns the number of bits used to hash an element in the hash // Returns the number of bits used to hash an element in the hash
@ -541,33 +606,6 @@ class HyperClockTable {
// Array of slots comprising the hash table. // Array of slots comprising the hash table.
const std::unique_ptr<HandleImpl[]> array_; const std::unique_ptr<HandleImpl[]> array_;
// From Cache, for deleter
MemoryAllocator* const allocator_;
// A reference to Cache::eviction_callback_
const Cache::EvictionCallback& eviction_callback_;
// A reference to ShardedCacheBase::hash_seed_
const uint32_t& hash_seed_;
// We partition the following members into different cache lines
// to avoid false sharing among Lookup, Release, Erase and Insert
// operations in ClockCacheShard.
ALIGN_AS(CACHE_LINE_SIZE)
// Clock algorithm sweep pointer.
std::atomic<uint64_t> clock_pointer_{};
ALIGN_AS(CACHE_LINE_SIZE)
// Number of elements in the table.
std::atomic<size_t> occupancy_{};
// Memory usage by entries tracked by the cache (including standalone)
std::atomic<size_t> usage_{};
// Part of usage by standalone entries (not in table)
std::atomic<size_t> standalone_usage_{};
}; // class HyperClockTable }; // class HyperClockTable
// A single shard of sharded cache. // A single shard of sharded cache.

Loading…
Cancel
Save