diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc
index 3f55acf6b..ad2c5f4af 100644
--- a/cache/clock_cache.cc
+++ b/cache/clock_cache.cc
@@ -37,133 +37,191 @@ ClockHandleTable::ClockHandleTable(int hash_bits)
 }
 
 ClockHandleTable::~ClockHandleTable() {
-  ApplyToEntriesRange([](ClockHandle* h) { h->FreeData(); }, 0, GetTableSize());
+  ApplyToEntriesRange([](ClockHandle* h) { h->FreeData(); }, 0, GetTableSize(),
+                      true);
 }
 
-ClockHandle* ClockHandleTable::Lookup(const Slice& key) {
-  int probe = 0;
-  int slot = FindVisibleElement(key, probe, 0);
+ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) {
+  uint32_t probe = 0;
+  int slot = FindElement(key, hash, probe);
   return (slot == -1) ? nullptr : &array_[slot];
 }
 
 ClockHandle* ClockHandleTable::Insert(ClockHandle* h, ClockHandle** old) {
-  int probe = 0;
-  int slot =
-      FindVisibleElementOrAvailableSlot(h->key(), probe, 1 /*displacement*/);
+  uint32_t probe = 0;
+  int slot = FindElementOrAvailableSlot(h->key(), h->hash, probe);
   *old = nullptr;
   if (slot == -1) {
+    // The key is not already present, and there's no available slot to place
+    // the new copy.
     return nullptr;
   }
 
-  if (array_[slot].IsEmpty() || array_[slot].IsTombstone()) {
-    bool empty = array_[slot].IsEmpty();
-    Assign(slot, h);
+  if (!array_[slot].IsElement()) {
+    // The slot is empty or is a tombstone.
     ClockHandle* new_entry = &array_[slot];
-    if (empty) {
-      // This used to be an empty slot.
+    new_entry->InternalToExclusiveRef();
+    Assign(new_entry, h);
+    if (new_entry->displacements == 0) {
+      // The slot was empty.
       return new_entry;
     }
     // It used to be a tombstone, so there may already be a copy of the
     // key in the table.
-    slot = FindVisibleElement(h->key(), probe, 0 /*displacement*/);
+    slot = FindElement(h->key(), h->hash, probe);
     if (slot == -1) {
-      // No existing copy of the key.
+      // Nope, no existing copy of the key.
       return new_entry;
     }
-    *old = &array_[slot];
+    ClockHandle* old_entry = &array_[slot];
+    old_entry->ReleaseInternalRef();
+    *old = old_entry;
     return new_entry;
   } else {
     // There is an existing copy of the key.
-    *old = &array_[slot];
+    ClockHandle* old_entry = &array_[slot];
+    old_entry->ReleaseInternalRef();
+    *old = old_entry;
     // Find an available slot for the new element.
-    array_[slot].displacements++;
-    slot = FindAvailableSlot(h->key(), probe, 1 /*displacement*/);
+    old_entry->displacements++;
+    slot = FindAvailableSlot(h->key(), probe);
     if (slot == -1) {
-      // No available slots. Roll back displacements.
-      probe = 0;
-      slot = FindVisibleElement(h->key(), probe, -1);
-      array_[slot].displacements--;
-      FindAvailableSlot(h->key(), probe, -1);
+      // No available slots.
       return nullptr;
     }
-    Assign(slot, h);
-    return &array_[slot];
+    ClockHandle* new_entry = &array_[slot];
+    new_entry->InternalToExclusiveRef();
+    Assign(new_entry, h);
+    return new_entry;
   }
 }
 
 void ClockHandleTable::Remove(ClockHandle* h) {
-  assert(!h->IsInClockList());  // Already off the clock list.
-  int probe = 0;
+  assert(!h->IsInClock());  // Already off clock.
+  uint32_t probe = 0;
   FindSlot(
-      h->key(), [&h](ClockHandle* e) { return e == h; }, probe,
-      -1 /*displacement*/);
-  h->SetIsVisible(false);
+      h->key(), [&](ClockHandle* e) { return e == h; },
+      [&](ClockHandle* /*e*/) { return false; },
+      [&](ClockHandle* e) { e->displacements--; }, probe);
+  h->SetWillBeDeleted(false);
   h->SetIsElement(false);
   occupancy_--;
 }
 
-void ClockHandleTable::Assign(int slot, ClockHandle* h) {
-  ClockHandle* dst = &array_[slot];
-  uint32_t disp = dst->displacements;
-  *dst = *h;
-  dst->displacements = disp;
-  dst->SetIsVisible(true);
+void ClockHandleTable::Assign(ClockHandle* dst, ClockHandle* src) {
+  // DON'T touch displacements and refs.
+  dst->value = src->value;
+  dst->deleter = src->deleter;
+  dst->hash = src->hash;
+  dst->total_charge = src->total_charge;
+  dst->key_data = src->key_data;
+  dst->flags.store(0);
   dst->SetIsElement(true);
   dst->SetClockPriority(ClockHandle::ClockPriority::NONE);
+  dst->SetCachePriority(src->GetCachePriority());
   occupancy_++;
 }
 
-void ClockHandleTable::Exclude(ClockHandle* h) { h->SetIsVisible(false); }
-
-int ClockHandleTable::FindVisibleElement(const Slice& key, int& probe,
-                                         int displacement) {
-  return FindSlot(
-      key, [&](ClockHandle* h) { return h->Matches(key) && h->IsVisible(); },
-      probe, displacement);
-}
-
-int ClockHandleTable::FindAvailableSlot(const Slice& key, int& probe,
-                                        int displacement) {
+int ClockHandleTable::FindElement(const Slice& key, uint32_t hash,
+                                  uint32_t& probe) {
   return FindSlot(
-      key, [](ClockHandle* h) { return h->IsEmpty() || h->IsTombstone(); },
-      probe, displacement);
+      key,
+      [&](ClockHandle* h) {
+        if (h->TryInternalRef()) {
+          if (h->Matches(key, hash)) {
+            return true;
+          }
+          h->ReleaseInternalRef();
+        }
+        return false;
+      },
+      [&](ClockHandle* h) { return h->displacements == 0; },
+      [&](ClockHandle* /*h*/) {}, probe);
 }
 
-int ClockHandleTable::FindVisibleElementOrAvailableSlot(const Slice& key,
-                                                        int& probe,
-                                                        int displacement) {
-  return FindSlot(
+int ClockHandleTable::FindAvailableSlot(const Slice& key, uint32_t& probe) {
+  int slot = FindSlot(
       key,
       [&](ClockHandle* h) {
-        return h->IsEmpty() || h->IsTombstone() ||
-               (h->Matches(key) && h->IsVisible());
+        if (h->TryInternalRef()) {
+          if (!h->IsElement()) {
+            return true;
+          }
+          h->ReleaseInternalRef();
+        }
+        return false;
       },
-      probe, displacement);
+      [&](ClockHandle* /*h*/) { return false; },
+      [&](ClockHandle* h) { h->displacements++; }, probe);
+  if (slot == -1) {
+    Rollback(key, probe);
+  }
+  return slot;
 }
 
-inline int ClockHandleTable::FindSlot(const Slice& key,
-                                      std::function<bool(ClockHandle*)> cond,
-                                      int& probe, int displacement) {
+int ClockHandleTable::FindElementOrAvailableSlot(const Slice& key,
+                                                 uint32_t hash,
+                                                 uint32_t& probe) {
+  int slot = FindSlot(
+      key,
+      [&](ClockHandle* h) {
+        if (h->TryInternalRef()) {
+          if (!h->IsElement() || h->Matches(key, hash)) {
+            return true;
+          }
+          h->ReleaseInternalRef();
+        }
+        return false;
+      },
+      [&](ClockHandle* /*h*/) { return false; },
+      [&](ClockHandle* h) { h->displacements++; }, probe);
+  if (slot == -1) {
+    Rollback(key, probe);
+  }
+  return slot;
+}
+
+int ClockHandleTable::FindSlot(const Slice& key,
+                               std::function<bool(ClockHandle*)> match,
+                               std::function<bool(ClockHandle*)> abort,
+                               std::function<void(ClockHandle*)> update,
+                               uint32_t& probe) {
+  // We use double-hashing probing. Every probe in the sequence is a
+  // pseudorandom integer, computed as a linear function of two random hashes,
+  // which we call base and increment. Specifically, the i-th probe is base + i
+  // * increment modulo the table size.
   uint32_t base = ModTableSize(Hash(key.data(), key.size(), kProbingSeed1));
+  // We use an odd increment, which is relatively prime with the power-of-two
+  // table size. This implies that we cycle back to the first probe only
+  // after probing every slot exactly once.
   uint32_t increment =
       ModTableSize((Hash(key.data(), key.size(), kProbingSeed2) << 1) | 1);
   uint32_t current = ModTableSize(base + probe * increment);
   while (true) {
     ClockHandle* h = &array_[current];
-    probe++;
-    if (current == base && probe > 1) {
+    if (current == base && probe > 0) {
       // We looped back.
       return -1;
     }
-    if (cond(h)) {
+    if (match(h)) {
+      probe++;
       return current;
     }
-    if (h->IsEmpty()) {
-      // We check emptyness after the condition, because
-      // the condition may be emptyness.
+    if (abort(h)) {
       return -1;
     }
-    h->displacements += displacement;
+    probe++;
+    update(h);
+    current = ModTableSize(current + increment);
+  }
+}
+
+void ClockHandleTable::Rollback(const Slice& key, uint32_t probe) {
+  uint32_t current = ModTableSize(Hash(key.data(), key.size(), kProbingSeed1));
+  uint32_t increment =
+      ModTableSize((Hash(key.data(), key.size(), kProbingSeed2) << 1) | 1);
+  for (uint32_t i = 0; i < probe; i++) {
+    array_[current].displacements--;
     current = ModTableSize(current + increment);
   }
 }
@@ -176,8 +234,7 @@ ClockCacheShard::ClockCacheShard(
       clock_pointer_(0),
       table_(
           CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)),
-      usage_(0),
-      clock_usage_(0) {
+      usage_(0) {
   set_metadata_charge_policy(metadata_charge_policy);
 }
 
@@ -185,22 +242,16 @@ void ClockCacheShard::EraseUnRefEntries() {
   autovector<ClockHandle> last_reference_list;
   {
     DMutexLock l(mutex_);
-    uint32_t slot = 0;
-    do {
-      ClockHandle* old = &(table_.array_[slot]);
-      if (!old->IsInClockList()) {
-        continue;
-      }
-      ClockRemove(old);
-      table_.Remove(old);
-      assert(usage_ >= old->total_charge);
-      usage_ -= old->total_charge;
-      last_reference_list.push_back(*old);
-      slot = table_.ModTableSize(slot + 1);
-    } while (slot != 0);
+    table_.ApplyToEntriesRange(
+        [this, &last_reference_list](ClockHandle* h) {
+          // Externally unreferenced element.
+          last_reference_list.push_back(*h);
+          Evict(h);
+        },
+        0, table_.GetTableSize(), true);
   }
 
-  // Free the entries here outside of mutex for performance reasons.
+  // Free the entry outside of the mutex for performance reasons.
   for (auto& h : last_reference_list) {
     h.FreeData();
   }
@@ -238,45 +289,60 @@ void ClockCacheShard::ApplyToSomeEntries(
         callback(h->key(), h->value, h->GetCharge(metadata_charge_policy),
                  h->deleter);
       },
-      index_begin, index_end);
+      index_begin, index_end, false);
 }
 
-void ClockCacheShard::ClockRemove(ClockHandle* h) {
-  assert(h->IsInClockList());
+void ClockCacheShard::ClockOff(ClockHandle* h) {
   h->SetClockPriority(ClockHandle::ClockPriority::NONE);
-  assert(clock_usage_ >= h->total_charge);
-  clock_usage_ -= h->total_charge;
 }
 
-void ClockCacheShard::ClockInsert(ClockHandle* h) {
-  assert(!h->IsInClockList());
+void ClockCacheShard::ClockOn(ClockHandle* h) {
+  assert(!h->IsInClock());
   bool is_high_priority =
       h->HasHit() || h->GetCachePriority() == Cache::Priority::HIGH;
   h->SetClockPriority(static_cast<ClockHandle::ClockPriority>(
       is_high_priority * ClockHandle::ClockPriority::HIGH +
       (1 - is_high_priority) * ClockHandle::ClockPriority::MEDIUM));
-  clock_usage_ += h->total_charge;
+}
+
+void ClockCacheShard::Evict(ClockHandle* h) {
+  ClockOff(h);
+  table_.Remove(h);
+  assert(usage_ >= h->total_charge);
+  usage_ -= h->total_charge;
 }
 
 void ClockCacheShard::EvictFromClock(size_t charge,
                                      autovector<ClockHandle>* deleted) {
+  // TODO(Guido) When an element is in the probe sequence of a
+  // hot element, it will be hard to get an exclusive ref.
+  // We may need a mechanism to avoid that an element sits forever
+  // in cache waiting to be evicted.
   assert(charge <= capacity_);
-  while (clock_usage_ > 0 && (usage_ + charge) > capacity_) {
-    ClockHandle* old = &table_.array_[clock_pointer_];
+  uint32_t max_iterations = table_.GetTableSize();
+  while (usage_ + charge > capacity_ && max_iterations--) {
+    ClockHandle* h = &table_.array_[clock_pointer_];
     clock_pointer_ = table_.ModTableSize(clock_pointer_ + 1);
-    // Clock list contains only elements which can be evicted.
-    if (!old->IsInClockList()) {
-      continue;
-    }
-    if (old->GetClockPriority() == ClockHandle::ClockPriority::LOW) {
-      ClockRemove(old);
-      table_.Remove(old);
-      assert(usage_ >= old->total_charge);
-      usage_ -= old->total_charge;
-      deleted->push_back(*old);
-      return;
+
+    if (h->TryExclusiveRef()) {
+      if (!h->IsInClock() && h->IsElement()) {
+        // We adjust the clock priority to make the element evictable again.
+        // Why? Elements that are not in clock are either currently
+        // externally referenced or used to be---because we are holding an
+        // exclusive ref, we know we are in the latter case. This can only
+        // happen when the last external reference to an element was released,
+        // and the element was not immediately removed.
+        ClockOn(h);
+      }
+
+      if (h->GetClockPriority() == ClockHandle::ClockPriority::LOW) {
+        deleted->push_back(*h);
+        Evict(h);
+      } else if (h->GetClockPriority() > ClockHandle::ClockPriority::LOW) {
+        h->DecreaseClockPriority();
+      }
+      h->ReleaseExclusiveRef();
     }
-    old->DecreaseClockPriority();
   }
 }
 
@@ -309,13 +375,14 @@ void ClockCacheShard::SetCapacity(size_t capacity) {
     EvictFromClock(0, &last_reference_list);
   }
 
-  // Free the entries here outside of mutex for performance reasons.
+  // Free the entry outside of the mutex for performance reasons.
   for (auto& h : last_reference_list) {
     h.FreeData();
   }
 }
 
 void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
+  assert(false);  // Not supported. TODO(Guido) Support it?
   DMutexLock l(mutex_);
   strict_capacity_limit_ = strict_capacity_limit;
 }
@@ -343,9 +410,10 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
   autovector<ClockHandle> last_reference_list;
   {
     DMutexLock l(mutex_);
+
     assert(table_.GetOccupancy() <= table_.GetOccupancyLimit());
     // Free the space following strict clock policy until enough space
-    // is freed or the clock list is empty.
+    // is freed or there are no evictable elements.
     EvictFromClock(tmp.total_charge, &last_reference_list);
     if ((usage_ + tmp.total_charge > capacity_ &&
          (strict_capacity_limit_ || handle == nullptr)) ||
@@ -376,30 +444,29 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
       usage_ += h->total_charge;
       if (old != nullptr) {
         s = Status::OkOverwritten();
-        assert(old->IsVisible());
-        table_.Exclude(old);
-        if (!old->HasRefs()) {
-          // old is in clock because it's in cache and its reference count is 0.
-          ClockRemove(old);
-          table_.Remove(old);
-          assert(usage_ >= old->total_charge);
-          usage_ -= old->total_charge;
+        assert(!old->WillBeDeleted());
+        old->SetWillBeDeleted(true);
+        // Try to evict the old copy of the element.
+        if (old->TryExclusiveRef()) {
           last_reference_list.push_back(*old);
+          Evict(old);
+          old->ReleaseExclusiveRef();
         }
       }
       if (handle == nullptr) {
-        ClockInsert(h);
+        // If the user didn't provide a handle, no reference is taken,
+        // so we make the element evictable.
+        ClockOn(h);
+        h->ReleaseExclusiveRef();
       } else {
-        // If caller already holds a ref, no need to take one here.
-        if (!h->HasRefs()) {
-          h->Ref();
-        }
+        // The caller already holds a ref.
+        h->ExclusiveToExternalRef();
         *handle = reinterpret_cast<Cache::Handle*>(h);
       }
     }
   }
 
-  // Free the entries here outside of mutex for performance reasons.
+  // Free the entry outside of the mutex for performance reasons.
   for (auto& h : last_reference_list) {
     h.FreeData();
   }
@@ -407,95 +474,102 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
   return s;
 }
 
-Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t /* hash */) {
+Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
   ClockHandle* h = nullptr;
-  {
-    DMutexLock l(mutex_);
-    h = table_.Lookup(key);
-    if (h != nullptr) {
-      assert(h->IsVisible());
-      if (!h->HasRefs()) {
-        // The entry is in clock since it's in the hash table and has no
-        // external references.
-        ClockRemove(h);
-      }
-      h->Ref();
-      h->SetHit();
-    }
+  h = table_.Lookup(key, hash);
+  if (h != nullptr) {
+    // TODO(Guido) Comment from #10347: Here it looks like we have three atomic
+    // updates where it would be possible to combine into one CAS (more metadata
+    // under one atomic field) or maybe two atomic updates (one arithmetic, one
+    // bitwise). Something to think about optimizing.
+    h->InternalToExternalRef();
+    h->SetHit();
+    // The handle is now referenced, so we take it out of clock.
+    ClockOff(h);
   }
   return reinterpret_cast<Cache::Handle*>(h);
 }
 
 bool ClockCacheShard::Ref(Cache::Handle* h) {
   ClockHandle* e = reinterpret_cast<ClockHandle*>(h);
-  DMutexLock l(mutex_);
-  // To create another reference - entry must be already externally referenced.
-  assert(e->HasRefs());
-  e->Ref();
-  return true;
+  assert(e->HasExternalRefs());
+  return e->TryExternalRef();
 }
 
 bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
+  // In contrast with LRUCache's Release, this function won't delete the handle
+  // when the reference is the last one and the cache is above capacity. Space
+  // is only freed up by EvictFromClock (called by Insert when space is needed)
+  // and Erase.
   if (handle == nullptr) {
     return false;
   }
+
   ClockHandle* h = reinterpret_cast<ClockHandle*>(handle);
-  ClockHandle copy;
-  bool last_reference = false;
-  assert(!h->IsInClockList());
-  {
-    DMutexLock l(mutex_);
-    last_reference = h->Unref();
-    if (last_reference && h->IsVisible()) {
-      // The item is still in cache, and nobody else holds a reference to it.
-      if (usage_ > capacity_ || erase_if_last_ref) {
-        // The clock list must be empty since the cache is full.
-        assert(clock_usage_ == 0 || erase_if_last_ref);
-        // Take this opportunity and remove the item.
-        table_.Remove(h);
+  uint32_t hash = h->hash;
+  uint32_t refs = h->ReleaseExternalRef();
+  bool last_reference = !(refs & ClockHandle::EXTERNAL_REFS);
+  bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED;
+
+  if (last_reference && (will_be_deleted || erase_if_last_ref)) {
+    // At this point we want to evict the element, so we need to take
+    // a lock and an exclusive reference. But there's a problem:
+    // as soon as we released the last reference, an Insert or Erase could've
+    // replaced this element, and by the time we take the lock and ref
+    // we could potentially be referencing a different element.
+    // Thus, before evicting the (potentially different) element, we need to
+    // re-check that it's unreferenced and marked as WILL_BE_DELETED, so the
+    // eviction is safe. Additionally, we check that the hash doesn't change,
+    // which will detect, most of the time, whether the element is a different
+    // one. The bottomline is that we only guarantee that the input handle will
+    // be deleted, and occasionally also another handle, but in any case all
+    // deleted handles are safe to delete.
+    // TODO(Guido) With lock-free inserts and deletes we may be able to
+    // "atomically" transition to an exclusive ref, without creating a deadlock.
+    ClockHandle copy;
+    {
+      DMutexLock l(mutex_);
+      if (h->TrySpinExclusiveRef()) {
+        will_be_deleted = h->refs & ClockHandle::WILL_BE_DELETED;
+        // Check that it's still safe to delete.
+        if (h->IsElement() && (will_be_deleted || erase_if_last_ref) &&
+            h->hash == hash) {
+          copy = *h;
+          Evict(h);
+        }
+        h->ReleaseExclusiveRef();
       } else {
-        // Put the item back on the clock list, and don't free it.
-        ClockInsert(h);
-        last_reference = false;
+        // An external ref was detected.
+        return false;
       }
     }
-    // If it was the last reference, then decrement the cache usage.
-    if (last_reference) {
-      assert(usage_ >= h->total_charge);
-      usage_ -= h->total_charge;
-      copy = *h;
-    }
-  }
 
-  // Free the entry here outside of mutex for performance reasons.
-  if (last_reference) {
+    // Free the entry outside of the mutex for performance reasons.
     copy.FreeData();
+    return true;
   }
-  return last_reference;
+
+  return false;
 }
 
-void ClockCacheShard::Erase(const Slice& key, uint32_t /* hash */) {
+void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
   ClockHandle copy;
   bool last_reference = false;
   {
     DMutexLock l(mutex_);
-    ClockHandle* h = table_.Lookup(key);
+    ClockHandle* h = table_.Lookup(key, hash);
     if (h != nullptr) {
-      table_.Exclude(h);
-      if (!h->HasRefs()) {
-        // The entry is in Clock since it's in cache and has no external
-        // references.
-        ClockRemove(h);
-        table_.Remove(h);
-        assert(usage_ >= h->total_charge);
-        usage_ -= h->total_charge;
-        last_reference = true;
+      h->SetWillBeDeleted(true);
+      h->ReleaseInternalRef();
+      if (h->TryExclusiveRef()) {
         copy = *h;
+        Evict(h);
+        last_reference = true;
+        h->ReleaseExclusiveRef();
       }
     }
   }
-  // Free the entry here outside of mutex for performance reasons.
-  // last_reference will only be true if e != nullptr.
+  // Free the entry outside of the mutex for performance reasons.
   if (last_reference) {
     copy.FreeData();
   }
@@ -507,9 +581,25 @@ size_t ClockCacheShard::GetUsage() const {
 }
 
 size_t ClockCacheShard::GetPinnedUsage() const {
+  // Computes the pinned usage scanning the whole hash table. This
+  // is slow, but avoid keeping an exact counter on the clock usage,
+  // i.e., the number of not externally referenced elements.
+  // Why avoid this? Because Lookup removes elements from the clock
+  // list, so it would need to update the pinned usage every time,
+  // which creates additional synchronization costs.
   DMutexLock l(mutex_);
-  assert(usage_ >= clock_usage_);
-  return usage_ - clock_usage_;
+
+  size_t clock_usage = 0;
+
+  table_.ConstApplyToEntriesRange(
+      [&clock_usage](ClockHandle* h) {
+        if (h->HasExternalRefs()) {
+          clock_usage += h->total_charge;
+        }
+      },
+      0, table_.GetTableSize(), true);
+
+  return clock_usage;
 }
 
 std::string ClockCacheShard::GetPrintableOptions() const {
diff --git a/cache/clock_cache.h b/cache/clock_cache.h
index ca6205b83..8091bab27 100644
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@@ -10,6 +10,8 @@
 #pragma once
 
 #include <array>
+#include <atomic>
+#include <cstdint>
 #include <memory>
 #include <string>
 
@@ -27,116 +29,254 @@ namespace ROCKSDB_NAMESPACE {
 
 namespace clock_cache {
 
-// Clock cache implementation. This is based on FastLRUCache's open-addressed
-// hash table. Importantly, it stores elements in an array, and resolves
-// collision using a probing strategy. Visibility and referenceability of
-// elements works as usual. See fast_lru_cache.h for a detailed description.
+// Block cache implementation using a lock-free open-address hash table
+// and clock eviction.
+
+///////////////////////////////////////////////////////////////////////////////
+//                          Part 1: Handles
+//
+// Every slot in the hash table is a ClockHandle. A handle can be in a few
+// different states, that stem from the fact that handles can be externally
+// referenced and, thus, can't always be immediately evicted when a delete
+// operation is executed or when they are replaced by a new version (via an
+// insert of the same key). Concretely, the state of a handle is defined by the
+// following two properties:
+// (R) Externally referenced: A handle can be referenced externally, or not.
+//    Importantly, a handle can be evicted if and only if it's not
+//    referenced. In particular, when an handle becomes referenced, it's
+//    temporarily taken out of clock until all references to it are released.
+// (M) Marked for deletion (or invisible): An handle is marked for deletion
+//    when an operation attempts to delete it, but the handle is externally
+//    referenced, so it can't be immediately deleted. When this mark is placed,
+//    lookups will no longer be able to find it. Consequently, no more external
+//    references will be taken to the handle. When a handle is marked for
+//    deletion, we also say it's invisible.
+// These properties induce 4 different states, with transitions defined as
+// follows:
+// - Not M --> M: When a handle is deleted or replaced by a new version, but
+//    not immediately evicted.
+// - M --> not M: This cannot happen. Once a handle is marked for deletion,
+//    there is no can't go back.
+// - R --> not R: When all references to an handle are released.
+// - Not R --> R: When an unreferenced handle becomes referenced. This can only
+//    happen if the handle is visible, since references to an handle can only be
+//    created when it's visible.
+//
+///////////////////////////////////////////////////////////////////////////////
+//                      Part 2: Hash table structure
+//
+// Internally, the cache uses an open-addressed hash table to index the handles.
+// We use tombstone counters to keep track of displacements. Probes are
+// generated with double-hashing (but the code can be easily modified to use
+// other probing schemes, like linear hashing). Because of the tombstones and
+// the two possible visibility states of a handle, the table slots (we use the
+// word "slot" to refer to handles that are not necessary valid key-value
+// elements) can be in 4 different states:
+// 1. Visible element: The slot contains an element in not M state.
+// 2. To-be-deleted element: The slot contains an element in M state.
+// 3. Tombstone: The slot doesn't contain an element, but there is some other
+//    element that probed this slot during its insertion.
+// 4. Empty: The slot is unused.
+// When a ghost is removed from the table, it can either transition to being a
+// tombstone or an empty slot, depending on the number of displacements of the
+// slot. In any case, the slot becomes available. When a handle is inserted
+// into that slot, it becomes a visible element again.
+//
+///////////////////////////////////////////////////////////////////////////////
+//                      Part 3: The clock algorithm
+//
+// We maintain a circular buffer with the handles available for eviction,
+// which the clock algorithm traverses (using a "clock pointer") to pick the
+// next victim. We use the hash table array as the circular buffer, and mark
+// the handles that are evictable. For this we use different clock flags, namely
+// NONE, LOW, MEDIUM, HIGH, that represent priorities: LOW, MEDIUM and HIGH
+// represent how close an element is from being evictable, LOW being immediately
+// evictable. NONE means the slot is not evictable. This is due to one of the
+// following reasons:
+// (i) the slot doesn't contain an element, or
+// (ii) the slot contains an element that is in R state, or
+// (iii) the slot contains an element that was in R state but it's
+//      not any more, and the clock pointer has not swept through the
+//      slot since the element stopped being referenced.
+//
+// The priority NONE is really only important for case (iii), as in the other
+// two cases there are other metadata fields that already capture the state.
+// When an element stops being referenced (and is not deleted), the clock
+// algorithm must acknowledge this, and assign a non-NONE priority to make
+// the element evictable again.
+//
+///////////////////////////////////////////////////////////////////////////////
+//                      Part 4: Synchronization
+//
+// We provide the following synchronization guarantees:
+// - Lookup is lock-free.
+// - Release is lock-free, unless (i) no references to the element are left,
+//   and (ii) it was marked for deletion or the user wishes to delete if
+//   releasing the last reference.
+// - Insert and Erase still use a per-shard lock.
+//
+// Our hash table is lock-free, in the sense that system-wide progress is
+// guaranteed, i.e., some thread is always able to make progress.
 //
-// The main difference with FastLRUCache is, not surprisingly, the eviction
-// algorithm
-// ---instead of an LRU list, we maintain a circular list with the elements
-// available for eviction, which the clock algorithm traverses to pick the next
-// victim. The clock list is represented using the array of handles, and we
-// simply mark those elements that are present in the list. This is done using
-// different clock flags, namely NONE, LOW, MEDIUM, HIGH, that represent
-// priorities: NONE means that the element is not part of the clock list, and
-// LOW to HIGH represent how close an element is from being evictable (LOW being
-// immediately evictable). When the clock pointer steps on an element that is
-// not immediately evictable, it decreases its priority.
-
-constexpr double kLoadFactor = 0.35;  // See fast_lru_cache.h.
-
-constexpr double kStrictLoadFactor = 0.7;  // See fast_lru_cache.h.
+///////////////////////////////////////////////////////////////////////////////
+
+// The load factor p is a real number in (0, 1) such that at all
+// times at most a fraction p of all slots, without counting tombstones,
+// are occupied by elements. This means that the probability that a
+// random probe hits an empty slot is at most p, and thus at most 1/p probes
+// are required on average. For example, p = 70% implies that between 1 and 2
+// probes are needed on average (bear in mind that this reasoning doesn't
+// consider the effects of clustering over time).
+// Because the size of the hash table is always rounded up to the next
+// power of 2, p is really an upper bound on the actual load factor---the
+// actual load factor is anywhere between p/2 and p. This is a bit wasteful,
+// but bear in mind that slots only hold metadata, not actual values.
+// Since space cost is dominated by the values (the LSM blocks),
+// overprovisioning the table with metadata only increases the total cache space
+// usage by a tiny fraction.
+constexpr double kLoadFactor = 0.35;
+
+// The user can exceed kLoadFactor if the sizes of the inserted values don't
+// match estimated_value_size, or if strict_capacity_limit == false. To
+// avoid performance to plunge, we set a strict upper bound on the load factor.
+constexpr double kStrictLoadFactor = 0.7;
 
 // Arbitrary seeds.
 constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
 constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
 
-// An experimental (under development!) alternative to LRUCache
+// An experimental (under development!) alternative to LRUCache.
 
 struct ClockHandle {
   void* value;
   Cache::DeleterFn deleter;
   uint32_t hash;
-  size_t total_charge;  // TODO(opt): Only allow uint32_t?
-  // The number of external refs to this entry.
-  uint32_t refs;
+  size_t total_charge;
+  std::array<char, kCacheKeySize> key_data;
+
+  static constexpr uint8_t kExternalRefsOffset = 0;
+  static constexpr uint8_t kSharedRefsOffset = 15;
+  static constexpr uint8_t kExclusiveRefOffset = 30;
+  static constexpr uint8_t kWillBeDeletedOffset = 31;
+
+  enum Refs : uint32_t {
+    // Number of external references to the slot.
+    EXTERNAL_REFS = ((uint32_t{1} << 15) - 1)
+                    << kExternalRefsOffset,  // Bits 0, ..., 14
+    // Number of internal references plus external references to the slot.
+    SHARED_REFS = ((uint32_t{1} << 15) - 1)
+                  << kSharedRefsOffset,  // Bits 15, ..., 29
+    // Whether a thread has an exclusive reference to the slot.
+    EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset,  // Bit 30
+    // Whether the handle will be deleted soon. When this bit is set, new
+    // internal
+    // or external references to this handle stop being accepted.
+    // There is an exception: external references can be created from
+    // existing external references, or converting from existing internal
+    // references.
+    WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset  // Bit 31
+
+    // Shared references (i.e., external and internal references) and exclusive
+    // references are our custom implementation of RW locks---external and
+    // internal references are read locks, and exclusive references are write
+    // locks. We prioritize readers, which never block; in fact, they don't even
+    // use compare-and-swap operations. Using our own implementation of RW locks
+    // allows us to save many atomic operations by packing data more carefully.
+    // In particular:
+    // - Combining EXTERNAL_REFS and SHARED_REFS allows us to convert an
+    // internal
+    //    reference into an external reference in a single atomic arithmetic
+    //    operation.
+    // - Combining SHARED_REFS and WILL_BE_DELETED allows us to attempt to take
+    //    a shared reference and check whether the entry is marked for deletion
+    //    in a single atomic arithmetic operation.
+  };
+
+  static constexpr uint32_t kOneInternalRef = 0x8000;
+  static constexpr uint32_t kOneExternalRef = 0x8001;
+
+  std::atomic<uint32_t> refs;
 
-  static constexpr int kIsVisibleOffset = 0;
-  static constexpr int kIsElementOffset = 1;
-  static constexpr int kClockPriorityOffset = 2;
-  static constexpr int kIsHitOffset = 4;
-  static constexpr int kCachePriorityOffset = 5;
+  static constexpr uint8_t kIsElementOffset = 1;
+  static constexpr uint8_t kClockPriorityOffset = 2;
+  static constexpr uint8_t kIsHitOffset = 4;
+  static constexpr uint8_t kCachePriorityOffset = 5;
 
   enum Flags : uint8_t {
-    // Whether the handle is visible to Lookups.
-    IS_VISIBLE = (1 << kIsVisibleOffset),
     // Whether the slot is in use by an element.
-    IS_ELEMENT = (1 << kIsElementOffset),
-    // Clock priorities. Represents how close a handle is from
-    // being evictable.
-    CLOCK_PRIORITY = (3 << kClockPriorityOffset),
+    IS_ELEMENT = 1 << kIsElementOffset,
+    // Clock priorities. Represents how close a handle is from being evictable.
+    CLOCK_PRIORITY = 3 << kClockPriorityOffset,
     // Whether the handle has been looked up after its insertion.
-    HAS_HIT = (1 << kIsHitOffset),
-    CACHE_PRIORITY = (1 << kCachePriorityOffset),
+    HAS_HIT = 1 << kIsHitOffset,
+    // The value of Cache::Priority for the handle.
+    CACHE_PRIORITY = 1 << kCachePriorityOffset,
   };
-  uint8_t flags;
+
+  std::atomic<uint8_t> flags;
 
   enum ClockPriority : uint8_t {
-    NONE = (0 << kClockPriorityOffset),  // Not an element in the eyes of clock.
-    LOW = (1 << kClockPriorityOffset),   // Immediately evictable.
+    NONE = (0 << kClockPriorityOffset),
+    LOW = (1 << kClockPriorityOffset),
     MEDIUM = (2 << kClockPriorityOffset),
     HIGH = (3 << kClockPriorityOffset)
-    // Priority is NONE if and only if
-    // (i) the handle is not an element, or
-    // (ii) the handle is an element but it is being referenced.
   };
 
-  // The number of elements that hash to this slot or a lower one,
-  // but wind up in a higher slot.
-  uint32_t displacements;
-
-  std::array<char, kCacheKeySize> key_data;
-
-  ClockHandle() {
-    value = nullptr;
-    deleter = nullptr;
-    hash = 0;
-    total_charge = 0;
-    refs = 0;
-    flags = 0;
-    SetIsVisible(false);
+  // The number of elements that hash to this slot or a lower one, but wind
+  // up in this slot or a higher one.
+  std::atomic<uint32_t> displacements;
+
+  // Synchronization rules:
+  // - Use a shared reference when we want the handle's identity
+  //    members (key_data, hash, value and IS_ELEMENT flag) to
+  //    remain untouched, but not modify them. The only updates
+  //    that a shared reference allows are:
+  //      * set CLOCK_PRIORITY to NONE;
+  //      * set the HAS_HIT bit.
+  //    Notice that these two types of updates are idempotent, so
+  //    they don't require synchronization across shared references.
+  // - Use an exclusive reference when we want identity members
+  //    to remain untouched, as well as modify any identity member
+  //    or flag.
+  // - displacements can be modified without holding a reference.
+  // - refs is only modified through appropriate functions to
+  //    take or release references.
+
+  ClockHandle()
+      : value(nullptr),
+        deleter(nullptr),
+        hash(0),
+        total_charge(0),
+        refs(0),
+        flags(0),
+        displacements(0) {
+    SetWillBeDeleted(false);
     SetIsElement(false);
     SetClockPriority(ClockPriority::NONE);
     SetCachePriority(Cache::Priority::LOW);
-    displacements = 0;
     key_data.fill(0);
   }
 
-  Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
-
-  // Increase the reference count by 1.
-  void Ref() { refs++; }
-
-  // Just reduce the reference count by 1. Return true if it was last reference.
-  bool Unref() {
-    assert(refs > 0);
-    refs--;
-    return refs == 0;
+  ClockHandle(const ClockHandle& other) { *this = other; }
+
+  void operator=(const ClockHandle& other) {
+    value = other.value;
+    deleter = other.deleter;
+    hash = other.hash;
+    total_charge = other.total_charge;
+    refs.store(other.refs);
+    key_data = other.key_data;
+    flags.store(other.flags);
+    SetWillBeDeleted(other.WillBeDeleted());
+    SetIsElement(other.IsElement());
+    SetClockPriority(other.GetClockPriority());
+    SetCachePriority(other.GetCachePriority());
+    displacements.store(other.displacements);
   }
 
-  // Return true if there are external refs, false otherwise.
-  bool HasRefs() const { return refs > 0; }
+  Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
 
-  bool IsVisible() const { return flags & IS_VISIBLE; }
-
-  void SetIsVisible(bool is_visible) {
-    if (is_visible) {
-      flags |= IS_VISIBLE;
-    } else {
-      flags &= ~IS_VISIBLE;
-    }
-  }
+  bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
 
   bool IsElement() const { return flags & IS_ELEMENT; }
 
@@ -144,7 +284,7 @@ struct ClockHandle {
     if (is_element) {
       flags |= IS_ELEMENT;
     } else {
-      flags &= ~IS_ELEMENT;
+      flags &= static_cast<uint8_t>(~IS_ELEMENT);
     }
   }
 
@@ -152,7 +292,7 @@ struct ClockHandle {
 
   void SetHit() { flags |= HAS_HIT; }
 
-  bool IsInClockList() const {
+  bool IsInClock() const {
     return GetClockPriority() != ClockHandle::ClockPriority::NONE;
   }
 
@@ -164,7 +304,7 @@ struct ClockHandle {
     if (priority == Cache::Priority::HIGH) {
       flags |= Flags::CACHE_PRIORITY;
     } else {
-      flags &= ~Flags::CACHE_PRIORITY;
+      flags &= static_cast<uint8_t>(~Flags::CACHE_PRIORITY);
     }
   }
 
@@ -173,7 +313,7 @@ struct ClockHandle {
   }
 
   void SetClockPriority(ClockPriority priority) {
-    flags &= ~Flags::CLOCK_PRIORITY;
+    flags &= static_cast<uint8_t>(~Flags::CLOCK_PRIORITY);
     flags |= priority;
   }
 
@@ -182,14 +322,13 @@ struct ClockHandle {
                 kClockPriorityOffset;
     assert(p > 0);
     p--;
-    flags &= ~Flags::CLOCK_PRIORITY;
+    flags &= static_cast<uint8_t>(~Flags::CLOCK_PRIORITY);
     ClockPriority new_priority =
         static_cast<ClockPriority>(p << kClockPriorityOffset);
     flags |= new_priority;
   }
 
   void FreeData() {
-    assert(refs == 0);
     if (deleter) {
       (*deleter)(key(), value);
     }
@@ -232,17 +371,131 @@ struct ClockHandle {
     return total_charge - meta_charge;
   }
 
-  inline bool IsEmpty() {
+  inline bool IsEmpty() const {
     return !this->IsElement() && this->displacements == 0;
   }
 
-  inline bool IsTombstone() {
+  inline bool IsTombstone() const {
     return !this->IsElement() && this->displacements > 0;
   }
 
-  inline bool Matches(const Slice& some_key) {
-    return this->IsElement() && this->key() == some_key;
+  inline bool Matches(const Slice& some_key, uint32_t some_hash) const {
+    return this->IsElement() && this->hash == some_hash &&
+           this->key() == some_key;
+  }
+
+  bool WillBeDeleted() const { return refs & WILL_BE_DELETED; }
+
+  void SetWillBeDeleted(bool will_be_deleted) {
+    if (will_be_deleted) {
+      refs |= WILL_BE_DELETED;
+    } else {
+      refs &= ~WILL_BE_DELETED;
+    }
+  }
+
+  // The following functions are for taking and releasing refs.
+
+  // Tries to take an external ref. Returns true iff it succeeds.
+  inline bool TryExternalRef() {
+    if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
+      return true;
+    }
+    refs -= kOneExternalRef;
+    return false;
+  }
+
+  // Releases an external ref. Returns the new value (this is useful to
+  // avoid an extra atomic read).
+  inline uint32_t ReleaseExternalRef() { return refs -= kOneExternalRef; }
+
+  // Take an external ref, assuming there is already one external ref
+  // to the handle.
+  void Ref() {
+    // TODO(Guido) Is it okay to assume that the existing external reference
+    // survives until this function returns?
+    refs += kOneExternalRef;
+  }
+
+  // Tries to take an internal ref. Returns true iff it succeeds.
+  inline bool TryInternalRef() {
+    if (!((refs += kOneInternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
+      return true;
+    }
+    refs -= kOneInternalRef;
+    return false;
+  }
+
+  inline void ReleaseInternalRef() { refs -= kOneInternalRef; }
+
+  // Tries to take an exclusive ref. Returns true iff it succeeds.
+  inline bool TryExclusiveRef() {
+    uint32_t will_be_deleted = refs & WILL_BE_DELETED;
+    uint32_t expected = will_be_deleted;
+    return refs.compare_exchange_strong(expected,
+                                        EXCLUSIVE_REF | will_be_deleted);
   }
+
+  // Repeatedly tries to take an exclusive reference, but stops as soon
+  // as an external reference is detected (in this case the wait would
+  // presumably be too long).
+  inline bool TrySpinExclusiveRef() {
+    uint32_t expected = 0;
+    uint32_t will_be_deleted = 0;
+    while (!refs.compare_exchange_strong(expected,
+                                         EXCLUSIVE_REF | will_be_deleted)) {
+      if (expected & EXTERNAL_REFS) {
+        return false;
+      }
+      will_be_deleted = expected & WILL_BE_DELETED;
+      expected = will_be_deleted;
+    }
+    return true;
+  }
+
+  inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); }
+
+  // The following functions are for upgrading and downgrading refs.
+  // They guarantee atomicity, i.e., no exclusive refs to the handle
+  // can be taken by a different thread during the conversion.
+
+  inline void ExclusiveToInternalRef() {
+    refs += kOneInternalRef;
+    ReleaseExclusiveRef();
+  }
+
+  inline void ExclusiveToExternalRef() {
+    refs += kOneExternalRef;
+    ReleaseExclusiveRef();
+  }
+
+  // TODO(Guido) Do we want to bound the loop and prepare the
+  // algorithms to react to a failure?
+  inline void InternalToExclusiveRef() {
+    uint32_t expected = kOneInternalRef;
+    uint32_t will_be_deleted = 0;
+    while (!refs.compare_exchange_strong(expected,
+                                         EXCLUSIVE_REF | will_be_deleted)) {
+      will_be_deleted = expected & WILL_BE_DELETED;
+      expected = kOneInternalRef | will_be_deleted;
+    }
+  }
+
+  inline void InternalToExternalRef() {
+    refs += kOneExternalRef - kOneInternalRef;
+  }
+
+  // TODO(Guido) Same concern.
+  inline void ExternalToExclusiveRef() {
+    uint32_t expected = kOneExternalRef;
+    uint32_t will_be_deleted = 0;
+    while (!refs.compare_exchange_strong(expected,
+                                         EXCLUSIVE_REF | will_be_deleted)) {
+      will_be_deleted = expected & WILL_BE_DELETED;
+      expected = kOneExternalRef | will_be_deleted;
+    }
+  }
+
 };  // struct ClockHandle
 
 class ClockHandleTable {
@@ -252,31 +505,54 @@ class ClockHandleTable {
 
   // Returns a pointer to a visible element matching the key/hash, or
   // nullptr if not present.
-  ClockHandle* Lookup(const Slice& key);
+  ClockHandle* Lookup(const Slice& key, uint32_t hash);
 
   // Inserts a copy of h into the hash table.
   // Returns a pointer to the inserted handle, or nullptr if no slot
   // available was found. If an existing visible element matching the
   // key/hash is already present in the hash table, the argument old
-  // is set to pointe to it; otherwise, it's set to nullptr.
+  // is set to point to it; otherwise, it's set to nullptr.
+  // Returns an exclusive reference to h, and no references to old.
   ClockHandle* Insert(ClockHandle* h, ClockHandle** old);
 
-  // Removes h from the hash table. The handle must already be off
-  // the clock list.
+  // Removes h from the hash table. The handle must already be off clock.
   void Remove(ClockHandle* h);
 
-  // Turns a visible element h into a ghost (i.e., not visible).
-  void Exclude(ClockHandle* h);
+  // Extracts the element information from a handle (src), and assigns it
+  // to a hash table slot (dst). Doesn't touch displacements and refs,
+  // which are maintained by the hash table algorithm.
+  void Assign(ClockHandle* dst, ClockHandle* src);
 
-  // Assigns a copy of h to the given slot.
-  void Assign(int slot, ClockHandle* h);
+  template <typename T>
+  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
+                           bool apply_if_will_be_deleted) {
+    for (uint32_t i = index_begin; i < index_end; i++) {
+      ClockHandle* h = &array_[i];
+      if (h->TryExclusiveRef()) {
+        if (h->IsElement() &&
+            (apply_if_will_be_deleted || !h->WillBeDeleted())) {
+          // Hand the internal ref over to func, which is now responsible
+          // to release it.
+          func(h);
+        } else {
+          h->ReleaseExclusiveRef();
+        }
+      }
+    }
+  }
 
   template <typename T>
-  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
+  void ConstApplyToEntriesRange(T func, uint32_t index_begin,
+                                uint32_t index_end,
+                                bool apply_if_will_be_deleted) const {
     for (uint32_t i = index_begin; i < index_end; i++) {
       ClockHandle* h = &array_[i];
-      if (h->IsVisible()) {
-        func(h);
+      if (h->TryExclusiveRef()) {
+        if (h->IsElement() &&
+            (apply_if_will_be_deleted || !h->WillBeDeleted())) {
+          func(h);
+        }
+        h->ReleaseExclusiveRef();
       }
     }
   }
@@ -295,28 +571,38 @@ class ClockHandleTable {
  private:
   friend class ClockCacheShard;
 
-  int FindVisibleElement(const Slice& key, int& probe, int displacement);
+  int FindElement(const Slice& key, uint32_t hash, uint32_t& probe);
 
-  int FindAvailableSlot(const Slice& key, int& probe, int displacement);
+  int FindAvailableSlot(const Slice& key, uint32_t& probe);
 
-  int FindVisibleElementOrAvailableSlot(const Slice& key, int& probe,
-                                        int displacement);
+  int FindElementOrAvailableSlot(const Slice& key, uint32_t hash,
+                                 uint32_t& probe);
 
   // Returns the index of the first slot probed (hashing with
-  // the given key) with a handle e such that cond(e) is true.
-  // Otherwise, if no match is found, returns -1.
-  // For every handle e probed except the final slot, updates
-  // e->displacements += displacement.
-  // The argument probe is modified such that consecutive calls
-  // to FindSlot continue probing right after where the previous
-  // call left.
-  int FindSlot(const Slice& key, std::function<bool(ClockHandle*)> cond,
-               int& probe, int displacement);
+  // the given key) with a handle e such that match(e) is true.
+  // At every step, the function first tests whether match(e) holds.
+  // If it's false, it evaluates abort(e) to decide whether the
+  // search should be aborted, and in the affirmative returns -1.
+  // For every handle e probed except the last one, the function runs
+  // update(e). We say a probe to a handle e is aborting if match(e) is
+  // false and abort(e) is true. The argument probe is one more than the
+  // last non-aborting probe during the call. This is so that that the
+  // variable can be used to keep track of progress across consecutive
+  // calls to FindSlot.
+  inline int FindSlot(const Slice& key, std::function<bool(ClockHandle*)> match,
+                      std::function<bool(ClockHandle*)> stop,
+                      std::function<void(ClockHandle*)> update,
+                      uint32_t& probe);
+
+  // After a failed FindSlot call (i.e., with answer -1), this function
+  // decrements all displacements, starting from the 0-th probe.
+  void Rollback(const Slice& key, uint32_t probe);
 
   // Number of hash bits used for table index.
   // The size of the table is 1 << length_bits_.
   int length_bits_;
 
+  // For faster computation of ModTableSize.
   const uint32_t length_bits_mask_;
 
   // Number of elements in the table.
@@ -345,10 +631,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
   void SetStrictCapacityLimit(bool strict_capacity_limit) override;
 
   // Like Cache methods, but with an extra "hash" parameter.
-  // Insert an item into the hash table and, if handle is null, insert into
-  // the clock list. Older items are evicted as necessary. If the cache is full
-  // and free_handle_on_fail is true, the item is deleted and handle is set to
-  // nullptr.
+  // Insert an item into the hash table and, if handle is null, make it
+  // evictable by the clock algorithm. Older items are evicted as necessary.
+  // If the cache is full and free_handle_on_fail is true, the item is deleted
+  // and handle is set to nullptr.
   Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
                 Cache::DeleterFn deleter, Cache::Handle** handle,
                 Cache::Priority priority) override;
@@ -393,13 +679,18 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
 
  private:
   friend class ClockCache;
-  void ClockRemove(ClockHandle* e);
-  void ClockInsert(ClockHandle* e);
+
+  // Makes an element evictable by clock.
+  void ClockOn(ClockHandle* h);
+
+  // Makes an element non-evictable.
+  void ClockOff(ClockHandle* h);
+
+  // Requires an exclusive ref on h.
+  void Evict(ClockHandle* h);
 
   // Free some space following strict clock policy until enough space
-  // to hold (usage_ + charge) is freed or the clock list is empty
-  // This function is not thread safe - it needs to be executed while
-  // holding the mutex_.
+  // to hold (usage_ + charge) is freed or there are no evictable elements.
   void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
 
   // Returns the charge of a single handle.
@@ -436,9 +727,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
   // Memory size for entries residing in the cache.
   size_t usage_;
 
-  // Memory size for unpinned entries in the clock list.
-  size_t clock_usage_;
-
   // mutex_ protects the following state.
   // We don't count mutex_ as the cache's internal state so semantically we
   // don't mind mutex_ invoking the non-const actions.
diff --git a/cache/fast_lru_cache.cc b/cache/fast_lru_cache.cc
index 0152b6fbe..a425204b9 100644
--- a/cache/fast_lru_cache.cc
+++ b/cache/fast_lru_cache.cc
@@ -52,6 +52,7 @@ LRUHandle* LRUHandleTable::Insert(LRUHandle* h, LRUHandle** old) {
                                                1 /*displacement*/);
   *old = nullptr;
   if (slot == -1) {
+    // TODO(Guido) Don't we need to roll back displacements here?
     return nullptr;
   }