From c3c84b3397a0eaa6450340ecea3b267c0e6c1f3c Mon Sep 17 00:00:00 2001
From: Peter Dillinger <peterd@fb.com>
Date: Wed, 12 Jul 2023 14:05:34 -0700
Subject: [PATCH] Refactor (Hyper)ClockCache code for upcoming changes (#11572)

Summary:
Separate out some functionality that will be common to both static and dynamic HCC into BaseClockTable. Table::InsertState and GrowIfNeeded will be used by the dynamic HCC so don't make much sense right now.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11572

Test Plan:
existing tests. No functional changes intended.

Performance test in subsequent PR https://github.com/facebook/rocksdb/issues/11601

Reviewed By: jowlyzhang

Differential Revision: D47110496

Pulled By: pdillinger

fbshipit-source-id: 379bd433322a42ea28c0043b41ec24956d21e7aa
---
 cache/clock_cache.cc | 614 ++++++++++++++++++++++++-------------------
 cache/clock_cache.h  | 230 +++++++++-------
 2 files changed, 483 insertions(+), 361 deletions(-)
diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc
index 06cc27a8e..6b1caabfa 100644
--- a/cache/clock_cache.cc
+++ b/cache/clock_cache.cc
@@ -9,6 +9,7 @@
 
 #include "cache/clock_cache.h"
 
+#include <cassert>
 #include <functional>
 #include <numeric>
 
@@ -118,74 +119,6 @@ inline bool ClockUpdate(ClockHandle& h) {
   }
 }
 
-}  // namespace
-
-void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
-  if (helper->del_cb) {
-    helper->del_cb(value, allocator);
-  }
-}
-
-HyperClockTable::HyperClockTable(
-    size_t capacity, bool /*strict_capacity_limit*/,
-    CacheMetadataChargePolicy metadata_charge_policy,
-    MemoryAllocator* allocator,
-    const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
-    const Opts& opts)
-    : length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
-                                metadata_charge_policy)),
-      length_bits_mask_((size_t{1} << length_bits_) - 1),
-      occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
-                                           kStrictLoadFactor)),
-      array_(new HandleImpl[size_t{1} << length_bits_]),
-      allocator_(allocator),
-      eviction_callback_(*eviction_callback),
-      hash_seed_(*hash_seed) {
-  if (metadata_charge_policy ==
-      CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
-    usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
-  }
-
-  static_assert(sizeof(HandleImpl) == 64U,
-                "Expecting size / alignment with common cache line size");
-}
-
-HyperClockTable::~HyperClockTable() {
-  // Assumes there are no references or active operations on any slot/element
-  // in the table.
-  for (size_t i = 0; i < GetTableSize(); i++) {
-    HandleImpl& h = array_[i];
-    switch (h.meta >> ClockHandle::kStateShift) {
-      case ClockHandle::kStateEmpty:
-        // noop
-        break;
-      case ClockHandle::kStateInvisible:  // rare but possible
-      case ClockHandle::kStateVisible:
-        assert(GetRefcount(h.meta) == 0);
-        h.FreeData(allocator_);
-#ifndef NDEBUG
-        Rollback(h.hashed_key, &h);
-        ReclaimEntryUsage(h.GetTotalCharge());
-#endif
-        break;
-      // otherwise
-      default:
-        assert(false);
-        break;
-    }
-  }
-
-#ifndef NDEBUG
-  for (size_t i = 0; i < GetTableSize(); i++) {
-    assert(array_[i].displacements.load() == 0);
-  }
-#endif
-
-  assert(usage_.load() == 0 ||
-         usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
-  assert(occupancy_ == 0);
-}
-
 // If an entry doesn't receive clock updates but is repeatedly referenced &
 // released, the acquire and release counters could overflow without some
 // intervention. This is that intervention, which should be inexpensive
@@ -259,8 +192,170 @@ inline void CorrectNearOverflow(uint64_t old_meta,
   }
 }
 
-inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
-    size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
+inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
+                            uint64_t initial_countdown, bool* already_matches) {
+  assert(*already_matches == false);
+
+  // Optimistically transition the slot from "empty" to
+  // "under construction" (no effect on other states)
+  uint64_t old_meta = h.meta.fetch_or(
+      uint64_t{ClockHandle::kStateOccupiedBit} << ClockHandle::kStateShift,
+      std::memory_order_acq_rel);
+  uint64_t old_state = old_meta >> ClockHandle::kStateShift;
+
+  if (old_state == ClockHandle::kStateEmpty) {
+    // We've started inserting into an available slot, and taken
+    // ownership.
+    return true;
+  } else if (old_state != ClockHandle::kStateVisible) {
+    // Slot not usable / touchable now
+    return false;
+  }
+  // Existing, visible entry, which might be a match.
+  // But first, we need to acquire a ref to read it. In fact, number of
+  // refs for initial countdown, so that we boost the clock state if
+  // this is a match.
+  old_meta =
+      h.meta.fetch_add(ClockHandle::kAcquireIncrement * initial_countdown,
+                       std::memory_order_acq_rel);
+  // Like Lookup
+  if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) {
+    // Acquired a read reference
+    if (h.hashed_key == proto.hashed_key) {
+      // Match. Release in a way that boosts the clock state
+      old_meta =
+          h.meta.fetch_add(ClockHandle::kReleaseIncrement * initial_countdown,
+                           std::memory_order_acq_rel);
+      // Correct for possible (but rare) overflow
+      CorrectNearOverflow(old_meta, h.meta);
+      // Insert detached instead (only if return handle needed)
+      *already_matches = true;
+      return false;
+    } else {
+      // Mismatch. Pretend we never took the reference
+      old_meta =
+          h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown,
+                           std::memory_order_acq_rel);
+    }
+  } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
+                      ClockHandle::kStateInvisible)) {
+    // Pretend we never took the reference
+    // WART/FIXME?: there's a tiny chance we release last ref to invisible
+    // entry here. If that happens, we let eviction take care of it.
+    old_meta =
+        h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown,
+                         std::memory_order_acq_rel);
+  } else {
+    // For other states, incrementing the acquire counter has no effect
+    // so we don't need to undo it.
+    // Slot not usable / touchable now.
+  }
+  (void)old_meta;
+  return false;
+}
+
+inline void FinishSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
+                             uint64_t initial_countdown, bool keep_ref) {
+  // Save data fields
+  ClockHandleBasicData* h_alias = &h;
+  *h_alias = proto;
+
+  // Transition from "under construction" state to "visible" state
+  uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
+                      << ClockHandle::kStateShift;
+
+  // Maybe with an outstanding reference
+  new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
+  new_meta |= (initial_countdown - keep_ref)
+              << ClockHandle::kReleaseCounterShift;
+
+#ifndef NDEBUG
+  // Save the state transition, with assertion
+  uint64_t old_meta = h.meta.exchange(new_meta, std::memory_order_release);
+  assert(old_meta >> ClockHandle::kStateShift ==
+         ClockHandle::kStateConstruction);
+#else
+  // Save the state transition
+  h.meta.store(new_meta, std::memory_order_release);
+#endif
+}
+
+bool TryInsert(const ClockHandleBasicData& proto, ClockHandle& h,
+               uint64_t initial_countdown, bool keep_ref,
+               bool* already_matches) {
+  bool b = BeginSlotInsert(proto, h, initial_countdown, already_matches);
+  if (b) {
+    FinishSlotInsert(proto, h, initial_countdown, keep_ref);
+  }
+  return b;
+}
+
+}  // namespace
+
+void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
+  if (helper->del_cb) {
+    helper->del_cb(value, allocator);
+  }
+}
+
+template <class HandleImpl>
+HandleImpl* BaseClockTable::StandaloneInsert(
+    const ClockHandleBasicData& proto) {
+  // Heap allocated separate from table
+  HandleImpl* h = new HandleImpl();
+  ClockHandleBasicData* h_alias = h;
+  *h_alias = proto;
+  h->SetStandalone();
+  // Single reference (standalone entries only created if returning a refed
+  // Handle back to user)
+  uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
+                  << ClockHandle::kStateShift;
+  meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
+  h->meta.store(meta, std::memory_order_release);
+  // Keep track of how much of usage is standalone
+  standalone_usage_.fetch_add(proto.GetTotalCharge(),
+                              std::memory_order_relaxed);
+  return h;
+}
+
+template <class Table>
+typename Table::HandleImpl* BaseClockTable::CreateStandalone(
+    ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit,
+    bool allow_uncharged) {
+  Table& derived = static_cast<Table&>(*this);
+  typename Table::InsertState state;
+  derived.StartInsert(state);
+
+  const size_t total_charge = proto.GetTotalCharge();
+  if (strict_capacity_limit) {
+    Status s = ChargeUsageMaybeEvictStrict<Table>(
+        total_charge, capacity,
+        /*need_evict_for_occupancy=*/false, state);
+    if (!s.ok()) {
+      if (allow_uncharged) {
+        proto.total_charge = 0;
+      } else {
+        return nullptr;
+      }
+    }
+  } else {
+    // Case strict_capacity_limit == false
+    bool success = ChargeUsageMaybeEvictNonStrict<Table>(
+        total_charge, capacity,
+        /*need_evict_for_occupancy=*/false, state);
+    if (!success) {
+      // Force the issue
+      usage_.fetch_add(total_charge, std::memory_order_relaxed);
+    }
+  }
+
+  return StandaloneInsert<typename Table::HandleImpl>(proto);
+}
+
+template <class Table>
+Status BaseClockTable::ChargeUsageMaybeEvictStrict(
+    size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
+    typename Table::InsertState& state) {
   if (total_charge > capacity) {
     return Status::MemoryLimit(
         "Cache entry too large for a single cache shard: " +
@@ -287,7 +382,8 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
   if (request_evict_charge > 0) {
     size_t evicted_charge = 0;
     size_t evicted_count = 0;
-    Evict(request_evict_charge, &evicted_charge, &evicted_count);
+    static_cast<Table*>(this)->Evict(request_evict_charge, &evicted_charge,
+                                     &evicted_count, state);
     occupancy_.fetch_sub(evicted_count, std::memory_order_release);
     if (LIKELY(evicted_charge > need_evict_charge)) {
       assert(evicted_count > 0);
@@ -316,8 +412,10 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
   return Status::OK();
 }
 
-inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
-    size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
+template <class Table>
+inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict(
+    size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
+    typename Table::InsertState& state) {
   // For simplicity, we consider that either the cache can accept the insert
   // with no evictions, or we must evict enough to make (at least) enough
   // space. It could lead to unnecessary failures or excessive evictions in
@@ -354,7 +452,8 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
   size_t evicted_charge = 0;
   size_t evicted_count = 0;
   if (need_evict_charge > 0) {
-    Evict(need_evict_charge, &evicted_charge, &evicted_count);
+    static_cast<Table*>(this)->Evict(need_evict_charge, &evicted_charge,
+                                     &evicted_count, state);
     // Deal with potential occupancy deficit
     if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) {
       assert(evicted_charge == 0);
@@ -373,28 +472,17 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
   return true;
 }
 
-inline HyperClockTable::HandleImpl* HyperClockTable::StandaloneInsert(
-    const ClockHandleBasicData& proto) {
-  // Heap allocated separate from table
-  HandleImpl* h = new HandleImpl();
-  ClockHandleBasicData* h_alias = h;
-  *h_alias = proto;
-  h->SetStandalone();
-  // Single reference (standalone entries only created if returning a refed
-  // Handle back to user)
-  uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
-                  << ClockHandle::kStateShift;
-  meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
-  h->meta.store(meta, std::memory_order_release);
-  // Keep track of how much of usage is standalone
-  standalone_usage_.fetch_add(proto.GetTotalCharge(),
-                              std::memory_order_relaxed);
-  return h;
-}
+template <class Table>
+Status BaseClockTable::Insert(const ClockHandleBasicData& proto,
+                              typename Table::HandleImpl** handle,
+                              Cache::Priority priority, size_t capacity,
+                              bool strict_capacity_limit) {
+  using HandleImpl = typename Table::HandleImpl;
+  Table& derived = static_cast<Table&>(*this);
+
+  typename Table::InsertState state;
+  derived.StartInsert(state);
 
-Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
-                               HandleImpl** handle, Cache::Priority priority,
-                               size_t capacity, bool strict_capacity_limit) {
   // Do we have the available occupancy? Optimistically assume we do
   // and deal with it if we don't.
   size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire);
@@ -402,23 +490,24 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
     occupancy_.fetch_sub(1, std::memory_order_relaxed);
   };
   // Whether we over-committed and need an eviction to make up for it
-  bool need_evict_for_occupancy = old_occupancy >= occupancy_limit_;
+  bool need_evict_for_occupancy =
+      !derived.GrowIfNeeded(old_occupancy + 1, state);
 
   // Usage/capacity handling is somewhat different depending on
   // strict_capacity_limit, but mostly pessimistic.
   bool use_standalone_insert = false;
   const size_t total_charge = proto.GetTotalCharge();
   if (strict_capacity_limit) {
-    Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity,
-                                           need_evict_for_occupancy);
+    Status s = ChargeUsageMaybeEvictStrict<Table>(
+        total_charge, capacity, need_evict_for_occupancy, state);
     if (!s.ok()) {
       revert_occupancy_fn();
       return s;
     }
   } else {
     // Case strict_capacity_limit == false
-    bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
-                                                  need_evict_for_occupancy);
+    bool success = ChargeUsageMaybeEvictNonStrict<Table>(
+        total_charge, capacity, need_evict_for_occupancy, state);
     if (!success) {
       revert_occupancy_fn();
       if (handle == nullptr) {
@@ -451,115 +540,17 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
     uint64_t initial_countdown = GetInitialCountdown(priority);
     assert(initial_countdown > 0);
 
-    size_t probe = 0;
-    HandleImpl* e = FindSlot(
-        proto.hashed_key,
-        [&](HandleImpl* h) {
-          // Optimistically transition the slot from "empty" to
-          // "under construction" (no effect on other states)
-          uint64_t old_meta =
-              h->meta.fetch_or(uint64_t{ClockHandle::kStateOccupiedBit}
-                                   << ClockHandle::kStateShift,
-                               std::memory_order_acq_rel);
-          uint64_t old_state = old_meta >> ClockHandle::kStateShift;
-
-          if (old_state == ClockHandle::kStateEmpty) {
-            // We've started inserting into an available slot, and taken
-            // ownership Save data fields
-            ClockHandleBasicData* h_alias = h;
-            *h_alias = proto;
-
-            // Transition from "under construction" state to "visible" state
-            uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
-                                << ClockHandle::kStateShift;
-
-            // Maybe with an outstanding reference
-            new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
-            new_meta |= (initial_countdown - (handle != nullptr))
-                        << ClockHandle::kReleaseCounterShift;
+    HandleImpl* e =
+        derived.DoInsert(proto, initial_countdown, handle != nullptr, state);
 
-#ifndef NDEBUG
-            // Save the state transition, with assertion
-            old_meta = h->meta.exchange(new_meta, std::memory_order_release);
-            assert(old_meta >> ClockHandle::kStateShift ==
-                   ClockHandle::kStateConstruction);
-#else
-            // Save the state transition
-            h->meta.store(new_meta, std::memory_order_release);
-#endif
-            return true;
-          } else if (old_state != ClockHandle::kStateVisible) {
-            // Slot not usable / touchable now
-            return false;
-          }
-          // Existing, visible entry, which might be a match.
-          // But first, we need to acquire a ref to read it. In fact, number of
-          // refs for initial countdown, so that we boost the clock state if
-          // this is a match.
-          old_meta = h->meta.fetch_add(
-              ClockHandle::kAcquireIncrement * initial_countdown,
-              std::memory_order_acq_rel);
-          // Like Lookup
-          if ((old_meta >> ClockHandle::kStateShift) ==
-              ClockHandle::kStateVisible) {
-            // Acquired a read reference
-            if (h->hashed_key == proto.hashed_key) {
-              // Match. Release in a way that boosts the clock state
-              old_meta = h->meta.fetch_add(
-                  ClockHandle::kReleaseIncrement * initial_countdown,
-                  std::memory_order_acq_rel);
-              // Correct for possible (but rare) overflow
-              CorrectNearOverflow(old_meta, h->meta);
-              // Insert standalone instead (only if return handle needed)
-              use_standalone_insert = true;
-              return true;
-            } else {
-              // Mismatch. Pretend we never took the reference
-              old_meta = h->meta.fetch_sub(
-                  ClockHandle::kAcquireIncrement * initial_countdown,
-                  std::memory_order_acq_rel);
-            }
-          } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
-                              ClockHandle::kStateInvisible)) {
-            // Pretend we never took the reference
-            // WART: there's a tiny chance we release last ref to invisible
-            // entry here. If that happens, we let eviction take care of it.
-            old_meta = h->meta.fetch_sub(
-                ClockHandle::kAcquireIncrement * initial_countdown,
-                std::memory_order_acq_rel);
-          } else {
-            // For other states, incrementing the acquire counter has no effect
-            // so we don't need to undo it.
-            // Slot not usable / touchable now.
-          }
-          (void)old_meta;
-          return false;
-        },
-        [&](HandleImpl* /*h*/) { return false; },
-        [&](HandleImpl* h) {
-          h->displacements.fetch_add(1, std::memory_order_relaxed);
-        },
-        probe);
-    if (e == nullptr) {
-      // Occupancy check and never abort FindSlot above should generally
-      // prevent this, except it's theoretically possible for other threads
-      // to evict and replace entries in the right order to hit every slot
-      // when it is populated. Assuming random hashing, the chance of that
-      // should be no higher than pow(kStrictLoadFactor, n) for n slots.
-      // That should be infeasible for roughly n >= 256, so if this assertion
-      // fails, that suggests something is going wrong.
-      assert(GetTableSize() < 256);
-      use_standalone_insert = true;
-    }
-    if (!use_standalone_insert) {
+    if (e) {
       // Successfully inserted
       if (handle) {
         *handle = e;
       }
       return Status::OK();
     }
-    // Roll back table insertion
-    Rollback(proto.hashed_key, e);
+    // Not inserted
     revert_occupancy_fn();
     // Maybe fall back on standalone insert
     if (handle == nullptr) {
@@ -568,12 +559,14 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
       proto.FreeData(allocator_);
       return Status::OK();
     }
+
+    use_standalone_insert = true;
   }
 
   // Run standalone insert
   assert(use_standalone_insert);
 
-  *handle = StandaloneInsert(proto);
+  *handle = StandaloneInsert<HandleImpl>(proto);
 
   // The OkOverwritten status is used to count "redundant" insertions into
   // block cache. This implementation doesn't strictly check for redundant
@@ -583,32 +576,146 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
   return Status::OkOverwritten();
 }
 
-HyperClockTable::HandleImpl* HyperClockTable::CreateStandalone(
-    ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit,
-    bool allow_uncharged) {
-  const size_t total_charge = proto.GetTotalCharge();
-  if (strict_capacity_limit) {
-    Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity,
-                                           /*need_evict_for_occupancy=*/false);
-    if (!s.ok()) {
-      if (allow_uncharged) {
-        proto.total_charge = 0;
-      } else {
-        return nullptr;
-      }
-    }
-  } else {
-    // Case strict_capacity_limit == false
-    bool success =
-        ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
-                                       /*need_evict_for_occupancy=*/false);
-    if (!success) {
-      // Force the issue
-      usage_.fetch_add(total_charge, std::memory_order_relaxed);
+void BaseClockTable::Ref(ClockHandle& h) {
+  // Increment acquire counter
+  uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
+                                       std::memory_order_acquire);
+
+  assert((old_meta >> ClockHandle::kStateShift) &
+         ClockHandle::kStateShareableBit);
+  // Must have already had a reference
+  assert(GetRefcount(old_meta) > 0);
+  (void)old_meta;
+}
+
+#ifndef NDEBUG
+void BaseClockTable::TEST_RefN(ClockHandle& h, size_t n) {
+  // Increment acquire counter
+  uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
+                                       std::memory_order_acquire);
+
+  assert((old_meta >> ClockHandle::kStateShift) &
+         ClockHandle::kStateShareableBit);
+  (void)old_meta;
+}
+
+void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) {
+  assert(n > 0);
+
+  // Like n-1 Releases, but assumes one more will happen in the caller to take
+  // care of anything like erasing an unreferenced, invisible entry.
+  uint64_t old_meta = h->meta.fetch_add(
+      (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
+  assert((old_meta >> ClockHandle::kStateShift) &
+         ClockHandle::kStateShareableBit);
+  (void)old_meta;
+}
+#endif
+
+HyperClockTable::HyperClockTable(
+    size_t capacity, bool /*strict_capacity_limit*/,
+    CacheMetadataChargePolicy metadata_charge_policy,
+    MemoryAllocator* allocator,
+    const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
+    const Opts& opts)
+    : BaseClockTable(metadata_charge_policy, allocator, eviction_callback,
+                     hash_seed),
+      length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
+                                metadata_charge_policy)),
+      length_bits_mask_((size_t{1} << length_bits_) - 1),
+      occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
+                                           kStrictLoadFactor)),
+      array_(new HandleImpl[size_t{1} << length_bits_]) {
+  if (metadata_charge_policy ==
+      CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
+    usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
+  }
+
+  static_assert(sizeof(HandleImpl) == 64U,
+                "Expecting size / alignment with common cache line size");
+}
+
+HyperClockTable::~HyperClockTable() {
+  // Assumes there are no references or active operations on any slot/element
+  // in the table.
+  for (size_t i = 0; i < GetTableSize(); i++) {
+    HandleImpl& h = array_[i];
+    switch (h.meta >> ClockHandle::kStateShift) {
+      case ClockHandle::kStateEmpty:
+        // noop
+        break;
+      case ClockHandle::kStateInvisible:  // rare but possible
+      case ClockHandle::kStateVisible:
+        assert(GetRefcount(h.meta) == 0);
+        h.FreeData(allocator_);
+#ifndef NDEBUG
+        Rollback(h.hashed_key, &h);
+        ReclaimEntryUsage(h.GetTotalCharge());
+#endif
+        break;
+      // otherwise
+      default:
+        assert(false);
+        break;
     }
   }
 
-  return StandaloneInsert(proto);
+#ifndef NDEBUG
+  for (size_t i = 0; i < GetTableSize(); i++) {
+    assert(array_[i].displacements.load() == 0);
+  }
+#endif
+
+  assert(usage_.load() == 0 ||
+         usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
+  assert(occupancy_ == 0);
+}
+
+void HyperClockTable::StartInsert(InsertState&) {}
+
+bool HyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) {
+  return new_occupancy <= occupancy_limit_;
+}
+
+HyperClockTable::HandleImpl* HyperClockTable::DoInsert(
+    const ClockHandleBasicData& proto, uint64_t initial_countdown,
+    bool keep_ref, InsertState&) {
+  size_t probe = 0;
+  bool already_matches = false;
+  HandleImpl* e = FindSlot(
+      proto.hashed_key,
+      [&](HandleImpl* h) {
+        // FIXME: simplify and handle in abort_fn below?
+        bool inserted =
+            TryInsert(proto, *h, initial_countdown, keep_ref, &already_matches);
+        return inserted || already_matches;
+      },
+      [&](HandleImpl* /*h*/) { return false; },
+      [&](HandleImpl* h) {
+        h->displacements.fetch_add(1, std::memory_order_relaxed);
+      },
+      probe);
+  if (e == nullptr) {
+    // Occupancy check and never abort FindSlot above should generally
+    // prevent this, except it's theoretically possible for other threads
+    // to evict and replace entries in the right order to hit every slot
+    // when it is populated. Assuming random hashing, the chance of that
+    // should be no higher than pow(kStrictLoadFactor, n) for n slots.
+    // That should be infeasible for roughly n >= 256, so if this assertion
+    // fails, that suggests something is going wrong.
+    assert(GetTableSize() < 256);
+    // WART/FIXME: need to roll back every slot
+    already_matches = true;
+  }
+  if (!already_matches) {
+    // Successfully inserted
+    assert(e);
+    return e;
+  }
+  // Roll back displacements from failed table insertion
+  Rollback(proto.hashed_key, e);
+  // Insertion skipped
+  return nullptr;
 }
 
 HyperClockTable::HandleImpl* HyperClockTable::Lookup(
@@ -753,40 +860,17 @@ bool HyperClockTable::Release(HandleImpl* h, bool useful,
   }
 }
 
-void HyperClockTable::Ref(HandleImpl& h) {
-  // Increment acquire counter
-  uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
-                                       std::memory_order_acquire);
-
-  assert((old_meta >> ClockHandle::kStateShift) &
-         ClockHandle::kStateShareableBit);
-  // Must have already had a reference
-  assert(GetRefcount(old_meta) > 0);
-  (void)old_meta;
-}
-
-void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) {
-  // Increment acquire counter
-  uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
-                                       std::memory_order_acquire);
-
-  assert((old_meta >> ClockHandle::kStateShift) &
-         ClockHandle::kStateShareableBit);
-  (void)old_meta;
-}
-
+#ifndef NDEBUG
 void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) {
   if (n > 0) {
-    // Split into n - 1 and 1 steps.
-    uint64_t old_meta = h->meta.fetch_add(
-        (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
-    assert((old_meta >> ClockHandle::kStateShift) &
-           ClockHandle::kStateShareableBit);
-    (void)old_meta;
+    // Do n-1 simple releases first
+    TEST_ReleaseNMinus1(h, n);
 
+    // Then the last release might be more involved
     Release(h, /*useful*/ true, /*erase_if_last_ref*/ false);
   }
 }
+#endif
 
 void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
   size_t probe = 0;
@@ -978,7 +1062,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) {
 }
 
 inline void HyperClockTable::Evict(size_t requested_charge,
-                                   size_t* freed_charge, size_t* freed_count) {
+                                   size_t* freed_charge, size_t* freed_count,
+                                   InsertState&) {
   // precondition
   assert(requested_charge > 0);
 
@@ -1146,18 +1231,15 @@ Status ClockCacheShard<Table>::Insert(const Slice& key,
   proto.value = value;
   proto.helper = helper;
   proto.total_charge = charge;
-  return table_.Insert(proto, handle, priority,
-                       capacity_.load(std::memory_order_relaxed),
-                       strict_capacity_limit_.load(std::memory_order_relaxed));
+  return table_.template Insert<Table>(
+      proto, handle, priority, capacity_.load(std::memory_order_relaxed),
+      strict_capacity_limit_.load(std::memory_order_relaxed));
 }
 
 template <class Table>
-typename ClockCacheShard<Table>::HandleImpl*
-ClockCacheShard<Table>::CreateStandalone(const Slice& key,
-                                         const UniqueId64x2& hashed_key,
-                                         Cache::ObjectPtr obj,
-                                         const Cache::CacheItemHelper* helper,
-                                         size_t charge, bool allow_uncharged) {
+typename Table::HandleImpl* ClockCacheShard<Table>::CreateStandalone(
+    const Slice& key, const UniqueId64x2& hashed_key, Cache::ObjectPtr obj,
+    const Cache::CacheItemHelper* helper, size_t charge, bool allow_uncharged) {
   if (UNLIKELY(key.size() != kCacheKeySize)) {
     return nullptr;
   }
@@ -1166,7 +1248,7 @@ ClockCacheShard<Table>::CreateStandalone(const Slice& key,
   proto.value = obj;
   proto.helper = helper;
   proto.total_charge = charge;
-  return table_.CreateStandalone(
+  return table_.template CreateStandalone<Table>(
       proto, capacity_.load(std::memory_order_relaxed),
       strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged);
 }
@@ -1198,6 +1280,7 @@ bool ClockCacheShard<Table>::Release(HandleImpl* handle, bool useful,
   return table_.Release(handle, useful, erase_if_last_ref);
 }
 
+#ifndef NDEBUG
 template <class Table>
 void ClockCacheShard<Table>::TEST_RefN(HandleImpl* h, size_t n) {
   table_.TEST_RefN(*h, n);
@@ -1207,6 +1290,7 @@ template <class Table>
 void ClockCacheShard<Table>::TEST_ReleaseN(HandleImpl* h, size_t n) {
   table_.TEST_ReleaseN(h, n);
 }
+#endif
 
 template <class Table>
 bool ClockCacheShard<Table>::Release(HandleImpl* handle,
diff --git a/cache/clock_cache.h b/cache/clock_cache.h
index b2578b467..fff3ef43d 100644
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@@ -374,11 +374,123 @@ struct ClockHandle : public ClockHandleBasicData {
   // See above
   std::atomic<uint64_t> meta{};
 
-  // Anticipating use for SecondaryCache support
-  void* reserved_for_future_use = nullptr;
+  // Whether this is a "deteched" handle that is independently allocated
+  // with `new` (so must be deleted with `delete`).
+  // TODO: ideally this would be packed into some other data field, such
+  // as upper bits of total_charge, but that incurs a measurable performance
+  // regression.
+  bool standalone = false;
+
+  inline bool IsStandalone() const { return standalone; }
+
+  inline void SetStandalone() { standalone = true; }
 };  // struct ClockHandle
 
-class HyperClockTable {
+class BaseClockTable {
+ public:
+  BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy,
+                 MemoryAllocator* allocator,
+                 const Cache::EvictionCallback* eviction_callback,
+                 const uint32_t* hash_seed)
+      : metadata_charge_policy_(metadata_charge_policy),
+        allocator_(allocator),
+        eviction_callback_(*eviction_callback),
+        hash_seed_(*hash_seed) {}
+
+  // Creates a "standalone" handle for returning from an Insert operation that
+  // cannot be completed by actually inserting into the table.
+  // Updates `standalone_usage_` but not `usage_` nor `occupancy_`.
+  template <class HandleImpl>
+  HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto);
+
+  template <class Table>
+  typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto,
+                                               size_t capacity,
+                                               bool strict_capacity_limit,
+                                               bool allow_uncharged);
+
+  // Helper for updating `usage_` for new entry with given `total_charge`
+  // and evicting if needed under strict_capacity_limit=true rules. This
+  // means the operation might fail with Status::MemoryLimit. If
+  // `need_evict_for_occupancy`, then eviction of at least one entry is
+  // required, and the operation should fail if not possible.
+  // NOTE: Otherwise, occupancy_ is not managed in this function
+  template <class Table>
+  Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity,
+                                     bool need_evict_for_occupancy,
+                                     typename Table::InsertState& state);
+
+  // Helper for updating `usage_` for new entry with given `total_charge`
+  // and evicting if needed under strict_capacity_limit=false rules. This
+  // means that updating `usage_` always succeeds even if forced to exceed
+  // capacity. If `need_evict_for_occupancy`, then eviction of at least one
+  // entry is required, and the operation should return false if such eviction
+  // is not possible. `usage_` is not updated in that case. Otherwise, returns
+  // true, indicating success.
+  // NOTE: occupancy_ is not managed in this function
+  template <class Table>
+  bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity,
+                                      bool need_evict_for_occupancy,
+                                      typename Table::InsertState& state);
+
+  template <class Table>
+  Status Insert(const ClockHandleBasicData& proto,
+                typename Table::HandleImpl** handle, Cache::Priority priority,
+                size_t capacity, bool strict_capacity_limit);
+
+  void Ref(ClockHandle& handle);
+
+  size_t GetOccupancy() const {
+    return occupancy_.load(std::memory_order_relaxed);
+  }
+
+  size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); }
+
+  size_t GetStandaloneUsage() const {
+    return standalone_usage_.load(std::memory_order_relaxed);
+  }
+
+  uint32_t GetHashSeed() const { return hash_seed_; }
+
+#ifndef NDEBUG
+  // Acquire N references
+  void TEST_RefN(ClockHandle& handle, size_t n);
+  // Helper for TEST_ReleaseN
+  void TEST_ReleaseNMinus1(ClockHandle* handle, size_t n);
+#endif
+
+ protected:
+  // We partition the following members into different cache lines
+  // to avoid false sharing among Lookup, Release, Erase and Insert
+  // operations in ClockCacheShard.
+
+  // Clock algorithm sweep pointer.
+  std::atomic<uint64_t> clock_pointer_{};
+
+  ALIGN_AS(CACHE_LINE_SIZE)
+  // Number of elements in the table.
+  std::atomic<size_t> occupancy_{};
+
+  // Memory usage by entries tracked by the cache (including standalone)
+  std::atomic<size_t> usage_{};
+
+  // Part of usage by standalone entries (not in table)
+  std::atomic<size_t> standalone_usage_{};
+
+  ALIGN_AS(CACHE_LINE_SIZE)
+  const CacheMetadataChargePolicy metadata_charge_policy_;
+
+  // From Cache, for deleter
+  MemoryAllocator* const allocator_;
+
+  // A reference to Cache::eviction_callback_
+  const Cache::EvictionCallback& eviction_callback_;
+
+  // A reference to ShardedCacheBase::hash_seed_
+  const uint32_t& hash_seed_;
+};
+
+class HyperClockTable : public BaseClockTable {
  public:
   // Target size to be exactly a common cache line size (see static_assert in
   // clock_cache.cc)
@@ -387,16 +499,6 @@ class HyperClockTable {
     // up in this slot or a higher one.
     std::atomic<uint32_t> displacements{};
 
-    // Whether this is a "deteched" handle that is independently allocated
-    // with `new` (so must be deleted with `delete`).
-    // TODO: ideally this would be packed into some other data field, such
-    // as upper bits of total_charge, but that incurs a measurable performance
-    // regression.
-    bool standalone = false;
-
-    inline bool IsStandalone() const { return standalone; }
-
-    inline void SetStandalone() { standalone = true; }
   };  // struct HandleImpl
 
   struct Opts {
@@ -410,20 +512,28 @@ class HyperClockTable {
                   const uint32_t* hash_seed, const Opts& opts);
   ~HyperClockTable();
 
-  Status Insert(const ClockHandleBasicData& proto, HandleImpl** handle,
-                Cache::Priority priority, size_t capacity,
-                bool strict_capacity_limit);
+  // For BaseClockTable::Insert
+  struct InsertState {};
+
+  void StartInsert(InsertState& state);
 
-  HandleImpl* CreateStandalone(ClockHandleBasicData& proto, size_t capacity,
-                               bool strict_capacity_limit,
-                               bool allow_uncharged);
+  // Returns true iff there is room for the proposed number of entries.
+  bool GrowIfNeeded(size_t new_occupancy, InsertState& state);
+
+  HandleImpl* DoInsert(const ClockHandleBasicData& proto,
+                       uint64_t initial_countdown, bool take_ref,
+                       InsertState& state);
+
+  // Runs the clock eviction algorithm trying to reclaim at least
+  // requested_charge. Returns how much is evicted, which could be less
+  // if it appears impossible to evict the requested amount without blocking.
+  void Evict(size_t requested_charge, size_t* freed_charge, size_t* freed_count,
+             InsertState& state);
 
   HandleImpl* Lookup(const UniqueId64x2& hashed_key);
 
   bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref);
 
-  void Ref(HandleImpl& handle);
-
   void Erase(const UniqueId64x2& hashed_key);
 
   void ConstApplyToEntriesRange(std::function<void(const HandleImpl&)> func,
@@ -436,23 +546,11 @@ class HyperClockTable {
 
   int GetLengthBits() const { return length_bits_; }
 
-  size_t GetOccupancy() const {
-    return occupancy_.load(std::memory_order_relaxed);
-  }
-
   size_t GetOccupancyLimit() const { return occupancy_limit_; }
 
-  size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); }
-
-  size_t GetStandaloneUsage() const {
-    return standalone_usage_.load(std::memory_order_relaxed);
-  }
-
-  uint32_t GetHashSeed() const { return hash_seed_; }
-
-  // Acquire/release N references
-  void TEST_RefN(HandleImpl& handle, size_t n);
-  void TEST_ReleaseN(HandleImpl* handle, size_t n);
+#ifndef NDEBUG
+  void TEST_ReleaseN(HandleImpl* h, size_t n);
+#endif
 
  private:  // functions
   // Returns x mod 2^{length_bits_}.
@@ -460,12 +558,6 @@ class HyperClockTable {
     return static_cast<size_t>(x) & length_bits_mask_;
   }
 
-  // Runs the clock eviction algorithm trying to reclaim at least
-  // requested_charge. Returns how much is evicted, which could be less
-  // if it appears impossible to evict the requested amount without blocking.
-  inline void Evict(size_t requested_charge, size_t* freed_charge,
-                    size_t* freed_count);
-
   // Returns the first slot in the probe sequence, starting from the given
   // probe number, with a handle e such that match(e) is true. At every
   // step, the function first tests whether match(e) holds. If this is false,
@@ -494,33 +586,6 @@ class HyperClockTable {
   // before releasing it so that it can be provided to this function.
   inline void ReclaimEntryUsage(size_t total_charge);
 
-  // Helper for updating `usage_` for new entry with given `total_charge`
-  // and evicting if needed under strict_capacity_limit=true rules. This
-  // means the operation might fail with Status::MemoryLimit. If
-  // `need_evict_for_occupancy`, then eviction of at least one entry is
-  // required, and the operation should fail if not possible.
-  // NOTE: Otherwise, occupancy_ is not managed in this function
-  inline Status ChargeUsageMaybeEvictStrict(size_t total_charge,
-                                            size_t capacity,
-                                            bool need_evict_for_occupancy);
-
-  // Helper for updating `usage_` for new entry with given `total_charge`
-  // and evicting if needed under strict_capacity_limit=false rules. This
-  // means that updating `usage_` always succeeds even if forced to exceed
-  // capacity. If `need_evict_for_occupancy`, then eviction of at least one
-  // entry is required, and the operation should return false if such eviction
-  // is not possible. `usage_` is not updated in that case. Otherwise, returns
-  // true, indicating success.
-  // NOTE: occupancy_ is not managed in this function
-  inline bool ChargeUsageMaybeEvictNonStrict(size_t total_charge,
-                                             size_t capacity,
-                                             bool need_evict_for_occupancy);
-
-  // Creates a "standalone" handle for returning from an Insert operation that
-  // cannot be completed by actually inserting into the table.
-  // Updates `standalone_usage_` but not `usage_` nor `occupancy_`.
-  inline HandleImpl* StandaloneInsert(const ClockHandleBasicData& proto);
-
   MemoryAllocator* GetAllocator() const { return allocator_; }
 
   // Returns the number of bits used to hash an element in the hash
@@ -541,33 +606,6 @@ class HyperClockTable {
 
   // Array of slots comprising the hash table.
   const std::unique_ptr<HandleImpl[]> array_;
-
-  // From Cache, for deleter
-  MemoryAllocator* const allocator_;
-
-  // A reference to Cache::eviction_callback_
-  const Cache::EvictionCallback& eviction_callback_;
-
-  // A reference to ShardedCacheBase::hash_seed_
-  const uint32_t& hash_seed_;
-
-  // We partition the following members into different cache lines
-  // to avoid false sharing among Lookup, Release, Erase and Insert
-  // operations in ClockCacheShard.
-
-  ALIGN_AS(CACHE_LINE_SIZE)
-  // Clock algorithm sweep pointer.
-  std::atomic<uint64_t> clock_pointer_{};
-
-  ALIGN_AS(CACHE_LINE_SIZE)
-  // Number of elements in the table.
-  std::atomic<size_t> occupancy_{};
-
-  // Memory usage by entries tracked by the cache (including standalone)
-  std::atomic<size_t> usage_{};
-
-  // Part of usage by standalone entries (not in table)
-  std::atomic<size_t> standalone_usage_{};
 };  // class HyperClockTable
 
 // A single shard of sharded cache.