From 0f91c72adc977c3895f8320b13ae4ef2b8633756 Mon Sep 17 00:00:00 2001
From: Peter Dillinger <peterd@fb.com>
Date: Fri, 16 Sep 2022 12:47:29 -0700
Subject: [PATCH] Call experimental new clock cache HyperClockCache (#10684)

Summary:
This change establishes a distinctive name for the experimental new lock-free clock cache (originally developed by guidotag and revamped in PR https://github.com/facebook/rocksdb/issues/10626). A few reasons:
* We want to make it clear that this is a fundamentally different implementation vs. the old clock cache, to avoid people saying "I already tried clock cache."
* We want to highlight the key feature: it's fast (especially under parallel load)
* Because it requires an estimated charge per entry, it is not drop-in API compatible with old clock cache. This estimate might always be required for highest performance, and giving it a distinct name should reduce confusion about the distinct API requirements.
* We might develop a variant requiring the same estimate parameter but with LRU eviction. In that case, using the name HyperLRUCache should make things more clear. (FastLRUCache is just a prototype that might soon be removed.)

Some API detail:
* To reduce copy-pasting parameter lists, etc. as in LRUCache construction, I have a `MakeSharedCache()` function on `HyperClockCacheOptions` instead of `NewHyperClockCache()`.
* Changes -cache_type=clock_cache to -cache_type=hyper_clock_cache for applicable tools. I think this is more consistent / sustainable for reasons already stated.

For performance tests see https://github.com/facebook/rocksdb/pull/10626

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10684

Test Plan: no interesting functional changes; tests updated

Reviewed By: anand1976

Differential Revision: D39547800

Pulled By: pdillinger

fbshipit-source-id: 5c0fe1b5cf3cb680ab369b928c8569682b9795bf
---
 HISTORY.md                            |   1 +
 cache/cache_bench_tool.cc             |  14 ++-
 cache/cache_test.cc                   |  65 ++++++------
 cache/clock_cache.cc                  |  46 ++++-----
 cache/clock_cache.h                   |  31 +++---
 cache/lru_cache_test.cc               |  20 ++--
 db/db_block_cache_test.cc             |  22 ++--
 db_stress_tool/db_stress_test_base.cc |  16 ++-
 include/rocksdb/cache.h               | 141 +++++++++++++++++++-------
 tools/db_bench_tool.cc                |  17 ++--
 tools/db_crashtest.py                 |   2 +-
 11 files changed, 216 insertions(+), 159 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 9615b07cf..eacde6fd8 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -32,6 +32,7 @@
 *  RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2).
 * Added new perf context counters `block_cache_standalone_handle_count`, `block_cache_real_handle_count`,`compressed_sec_cache_insert_real_count`, `compressed_sec_cache_insert_dummy_count`, `compressed_sec_cache_uncompressed_bytes`, and `compressed_sec_cache_compressed_bytes`.
 * Memory for blobs which are to be inserted into the blob cache is now allocated using the cache's allocator (see #10628 and #10647).
+* HyperClockCache is an experimental, lock-free Cache alternative for block cache that offers much improved CPU efficiency under high parallel load or high contention, with some caveats. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load.
 
 ### Performance Improvements
 * Iterator performance is improved for `DeleteRange()` users. Internally, iterator will skip to the end of a range tombstone when possible, instead of looping through each key and check individually if a key is range deleted.
diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc
index dd36a5f06..4accf7ba0 100644
--- a/cache/cache_bench_tool.cc
+++ b/cache/cache_bench_tool.cc
@@ -13,7 +13,6 @@
 #include <set>
 #include <sstream>
 
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "monitoring/histogram.h"
@@ -292,13 +291,12 @@ class CacheBench {
     }
 
     if (FLAGS_cache_type == "clock_cache") {
-      cache_ = ExperimentalNewClockCache(
-          FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
-      if (!cache_) {
-        fprintf(stderr, "Clock cache not supported.\n");
-        exit(1);
-      }
+      fprintf(stderr, "Old clock cache implementation has been removed.\n");
+      exit(1);
+    } else if (FLAGS_cache_type == "hyper_clock_cache") {
+      cache_ = HyperClockCacheOptions(FLAGS_cache_size, FLAGS_value_bytes,
+                                      FLAGS_num_shard_bits)
+                   .MakeSharedCache();
     } else if (FLAGS_cache_type == "fast_lru_cache") {
       cache_ = NewFastLRUCache(
           FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
diff --git a/cache/cache_test.cc b/cache/cache_test.cc
index 14b6e44d9..1a8bae4df 100644
--- a/cache/cache_test.cc
+++ b/cache/cache_test.cc
@@ -15,7 +15,6 @@
 #include <string>
 #include <vector>
 
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "port/stack_trace.h"
@@ -23,7 +22,7 @@
 #include "util/coding.h"
 #include "util/string_util.h"
 
-// FastLRUCache and ClockCache only support 16-byte keys, so some of
+// FastLRUCache and HyperClockCache only support 16-byte keys, so some of
 // the tests originally wrote for LRUCache do not work on the other caches.
 // Those tests were adapted to use 16-byte keys. We kept the original ones.
 // TODO: Remove the original tests if they ever become unused.
@@ -76,7 +75,7 @@ void EraseDeleter2(const Slice& /*key*/, void* value) {
 }
 
 const std::string kLRU = "lru";
-const std::string kClock = "clock";
+const std::string kHyperClock = "hyper_clock";
 const std::string kFast = "fast";
 
 }  // anonymous namespace
@@ -87,7 +86,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
   static std::string type_;
 
   static void Deleter(const Slice& key, void* v) {
-    if (type_ == kFast || type_ == kClock) {
+    if (type_ == kFast || type_ == kHyperClock) {
       current_->deleted_keys_.push_back(DecodeKey16Bytes(key));
     } else {
       current_->deleted_keys_.push_back(DecodeKey32Bits(key));
@@ -122,10 +121,10 @@ class CacheTest : public testing::TestWithParam<std::string> {
     if (type == kLRU) {
       return NewLRUCache(capacity);
     }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(
-          capacity, estimated_value_size_, -1 /*num_shard_bits*/,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(
+                 capacity, estimated_value_size_ /*estimated_value_size*/)
+          .MakeSharedCache();
     }
     if (type == kFast) {
       return NewFastLRUCache(
@@ -148,10 +147,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
       co.metadata_charge_policy = charge_policy;
       return NewLRUCache(co);
     }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(capacity, 1 /*estimated_value_size*/,
-                                       num_shard_bits, strict_capacity_limit,
-                                       charge_policy);
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(capacity, 1 /*estimated_value_size*/,
+                                    num_shard_bits, strict_capacity_limit,
+                                    nullptr /*allocator*/, charge_policy)
+          .MakeSharedCache();
     }
     if (type == kFast) {
       return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
@@ -163,12 +163,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
 
   // These functions encode/decode keys in tests cases that use
   // int keys.
-  // Currently, FastLRUCache requires keys to be 16B long, whereas
-  // LRUCache and ClockCache don't, so the encoding depends on
-  // the cache type.
+  // Currently, HyperClockCache requires keys to be 16B long, whereas
+  // LRUCache doesn't, so the encoding depends on the cache type.
   std::string EncodeKey(int k) {
     auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
       return EncodeKey16Bytes(k);
     } else {
       return EncodeKey32Bits(k);
@@ -177,7 +176,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
 
   int DecodeKey(const Slice& k) {
     auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
       return DecodeKey16Bytes(k);
     } else {
       return DecodeKey32Bits(k);
@@ -242,7 +241,7 @@ TEST_P(CacheTest, UsageTest) {
   auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
   ASSERT_EQ(0, cache->GetUsage());
   size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_EQ(0, baseline_meta_usage);
   }
 
@@ -263,7 +262,7 @@ TEST_P(CacheTest, UsageTest) {
                                     kv_size, DumbDeleter));
     usage += kv_size;
     ASSERT_EQ(usage, cache->GetUsage());
-    if (type == kClock) {
+    if (type == kHyperClock) {
       ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage());
     } else {
       ASSERT_LT(usage, precise_cache->GetUsage());
@@ -293,7 +292,7 @@ TEST_P(CacheTest, UsageTest) {
   ASSERT_GT(kCapacity, cache->GetUsage());
   ASSERT_GT(kCapacity, precise_cache->GetUsage());
   ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
   } else {
     // estimated value size of 1 is weird for clock cache, because
@@ -319,7 +318,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
   auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
   auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
   size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
     ASSERT_EQ(0, baseline_meta_usage);
   }
 
@@ -428,7 +427,7 @@ TEST_P(CacheTest, HitAndMiss) {
   ASSERT_EQ(-1,  Lookup(300));
 
   Insert(100, 102);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     // ClockCache usually doesn't overwrite on Insert
     ASSERT_EQ(101, Lookup(100));
   } else {
@@ -439,7 +438,7 @@ TEST_P(CacheTest, HitAndMiss) {
 
   ASSERT_EQ(1U, deleted_keys_.size());
   ASSERT_EQ(100, deleted_keys_[0]);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ASSERT_EQ(102, deleted_values_[0]);
   } else {
     ASSERT_EQ(101, deleted_values_[0]);
@@ -447,7 +446,7 @@ TEST_P(CacheTest, HitAndMiss) {
 }
 
 TEST_P(CacheTest, InsertSameKey) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
         "ClockCache doesn't guarantee Insert overwrite same key.");
     return;
@@ -477,7 +476,7 @@ TEST_P(CacheTest, Erase) {
 }
 
 TEST_P(CacheTest, EntriesArePinned) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
         "ClockCache doesn't guarantee Insert overwrite same key.");
     return;
@@ -543,7 +542,7 @@ TEST_P(CacheTest, ExternalRefPinsEntries) {
       Insert(1000 + j, 2000 + j);
     }
     // Clock cache is even more stateful and needs more churn to evict
-    if (GetParam() == kClock) {
+    if (GetParam() == kHyperClock) {
       for (int j = 0; j < kCacheSize; j++) {
         Insert(11000 + j, 11000 + j);
       }
@@ -742,9 +741,9 @@ TEST_P(CacheTest, ReleaseWithoutErase) {
 
 TEST_P(CacheTest, SetCapacity) {
   auto type = GetParam();
-  if (type == kFast || type == kClock) {
+  if (type == kFast || type == kHyperClock) {
     ROCKSDB_GTEST_BYPASS(
-        "FastLRUCache and ClockCache don't support arbitrary capacity "
+        "FastLRUCache and HyperClockCache don't support arbitrary capacity "
         "adjustments.");
     return;
   }
@@ -883,7 +882,7 @@ TEST_P(CacheTest, OverCapacity) {
     cache->Release(handles[i]);
   }
 
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
     // Make sure eviction is triggered.
     ASSERT_OK(cache->Insert(EncodeKey(-1), nullptr, 1, &deleter, &handles[0]));
 
@@ -1020,7 +1019,8 @@ TEST_P(CacheTest, DefaultShardBits) {
   // Prevent excessive allocation (to save time & space)
   estimated_value_size_ = 100000;
   // Implementations use different minimum shard sizes
-  size_t min_shard_size = (GetParam() == kClock ? 32U * 1024U : 512U) * 1024U;
+  size_t min_shard_size =
+      (GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U;
 
   std::shared_ptr<Cache> cache = NewCache(32U * min_shard_size);
   ShardedCache* sc = dynamic_cast<ShardedCache*>(cache.get());
@@ -1052,11 +1052,8 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
   cache_->Release(h1);
 }
 
-std::shared_ptr<Cache> (*new_clock_cache_func)(size_t, size_t, int, bool,
-                                               CacheMetadataChargePolicy) =
-    ExperimentalNewClockCache;
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
-                        testing::Values(kLRU, kClock, kFast));
+                        testing::Values(kLRU, kHyperClock, kFast));
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
                         testing::Values(kLRU, kFast));
 
diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc
index 3bff5feee..0b07542c4 100644
--- a/cache/clock_cache.cc
+++ b/cache/clock_cache.cc
@@ -21,7 +21,7 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-namespace clock_cache {
+namespace hyper_clock_cache {
 
 static_assert(sizeof(ClockHandle) == 64U,
               "Expecting size / alignment with common cache line size");
@@ -1126,9 +1126,10 @@ size_t ClockCacheShard::GetTableAddressCount() const {
   return table_.GetTableSize();
 }
 
-ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
-                       int num_shard_bits, bool strict_capacity_limit,
-                       CacheMetadataChargePolicy metadata_charge_policy)
+HyperClockCache::HyperClockCache(
+    size_t capacity, size_t estimated_value_size, int num_shard_bits,
+    bool strict_capacity_limit,
+    CacheMetadataChargePolicy metadata_charge_policy)
     : ShardedCache(capacity, num_shard_bits, strict_capacity_limit),
       num_shards_(1 << num_shard_bits) {
   assert(estimated_value_size > 0 ||
@@ -1145,7 +1146,7 @@ ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
   }
 }
 
-ClockCache::~ClockCache() {
+HyperClockCache::~HyperClockCache() {
   if (shards_ != nullptr) {
     assert(num_shards_ > 0);
     for (int i = 0; i < num_shards_; i++) {
@@ -1155,32 +1156,32 @@ ClockCache::~ClockCache() {
   }
 }
 
-CacheShard* ClockCache::GetShard(uint32_t shard) {
+CacheShard* HyperClockCache::GetShard(uint32_t shard) {
   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }
 
-const CacheShard* ClockCache::GetShard(uint32_t shard) const {
+const CacheShard* HyperClockCache::GetShard(uint32_t shard) const {
   return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }
 
-void* ClockCache::Value(Handle* handle) {
+void* HyperClockCache::Value(Handle* handle) {
   return reinterpret_cast<const ClockHandle*>(handle)->value;
 }
 
-size_t ClockCache::GetCharge(Handle* handle) const {
+size_t HyperClockCache::GetCharge(Handle* handle) const {
   return reinterpret_cast<const ClockHandle*>(handle)->total_charge;
 }
 
-Cache::DeleterFn ClockCache::GetDeleter(Handle* handle) const {
+Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const {
   auto h = reinterpret_cast<const ClockHandle*>(handle);
   return h->deleter;
 }
 
-uint32_t ClockCache::GetHash(Handle* handle) const {
+uint32_t HyperClockCache::GetHash(Handle* handle) const {
   return reinterpret_cast<const ClockHandle*>(handle)->hash;
 }
 
-void ClockCache::DisownData() {
+void HyperClockCache::DisownData() {
   // Leak data only if that won't generate an ASAN/valgrind warning.
   if (!kMustFreeHeapAllocations) {
     shards_ = nullptr;
@@ -1188,8 +1189,9 @@ void ClockCache::DisownData() {
   }
 }
 
-}  // namespace clock_cache
+}  // namespace hyper_clock_cache
 
+// DEPRECATED (see public API)
 std::shared_ptr<Cache> NewClockCache(
     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
     CacheMetadataChargePolicy metadata_charge_policy) {
@@ -1199,22 +1201,20 @@ std::shared_ptr<Cache> NewClockCache(
                      /* low_pri_pool_ratio */ 0.0);
 }
 
-std::shared_ptr<Cache> ExperimentalNewClockCache(
-    size_t capacity, size_t estimated_value_size, int num_shard_bits,
-    bool strict_capacity_limit,
-    CacheMetadataChargePolicy metadata_charge_policy) {
-  if (num_shard_bits >= 20) {
+std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
+  auto my_num_shard_bits = num_shard_bits;
+  if (my_num_shard_bits >= 20) {
     return nullptr;  // The cache cannot be sharded into too many fine pieces.
   }
-  if (num_shard_bits < 0) {
+  if (my_num_shard_bits < 0) {
     // Use larger shard size to reduce risk of large entries clustering
     // or skewing individual shards.
     constexpr size_t min_shard_size = 32U * 1024U * 1024U;
-    num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
+    my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
   }
-  return std::make_shared<clock_cache::ClockCache>(
-      capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
-      metadata_charge_policy);
+  return std::make_shared<hyper_clock_cache::HyperClockCache>(
+      capacity, estimated_entry_charge, my_num_shard_bits,
+      strict_capacity_limit, metadata_charge_policy);
 }
 
 }  // namespace ROCKSDB_NAMESPACE
diff --git a/cache/clock_cache.h b/cache/clock_cache.h
index 8ceb46478..a68514e36 100644
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@@ -27,22 +27,22 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-namespace clock_cache {
+namespace hyper_clock_cache {
 
 // Forward declaration of friend class.
 class ClockCacheTest;
 
-// ClockCache is an experimental alternative to LRUCache.
+// HyperClockCache is an experimental alternative to LRUCache.
 //
 // Benefits
 // --------
 // * Fully lock free (no waits or spins) for efficiency under high concurrency
 // * Optimized for hot path reads. For concurrency control, most Lookup() and
 // essentially all Release() are a single atomic add operation.
+// * Eviction on insertion is fully parallel and lock-free.
 // * Uses a generalized + aging variant of CLOCK eviction that might outperform
 // LRU in some cases. (For background, see
 // https://en.wikipedia.org/wiki/Page_replacement_algorithm)
-// * Eviction on insertion is fully parallel and lock-free.
 //
 // Costs
 // -----
@@ -582,20 +582,20 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
   std::atomic<bool> strict_capacity_limit_;
 };  // class ClockCacheShard
 
-class ClockCache
+class HyperClockCache
 #ifdef NDEBUG
     final
 #endif
     : public ShardedCache {
  public:
-  ClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
-             bool strict_capacity_limit,
-             CacheMetadataChargePolicy metadata_charge_policy =
-                 kDontChargeCacheMetadata);
+  HyperClockCache(size_t capacity, size_t estimated_value_size,
+                  int num_shard_bits, bool strict_capacity_limit,
+                  CacheMetadataChargePolicy metadata_charge_policy =
+                      kDontChargeCacheMetadata);
 
-  ~ClockCache() override;
+  ~HyperClockCache() override;
 
-  const char* Name() const override { return "ClockCache"; }
+  const char* Name() const override { return "HyperClockCache"; }
 
   CacheShard* GetShard(uint32_t shard) override;
 
@@ -615,15 +615,8 @@ class ClockCache
   ClockCacheShard* shards_ = nullptr;
 
   int num_shards_;
-};  // class ClockCache
-
-}  // namespace clock_cache
+};  // class HyperClockCache
 
-// Only for internal testing, temporarily replacing NewClockCache.
-// TODO(Guido) Remove once NewClockCache constructs a ClockCache again.
-extern std::shared_ptr<Cache> ExperimentalNewClockCache(
-    size_t capacity, size_t estimated_value_size, int num_shard_bits,
-    bool strict_capacity_limit,
-    CacheMetadataChargePolicy metadata_charge_policy);
+}  // namespace hyper_clock_cache
 
 }  // namespace ROCKSDB_NAMESPACE
diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc
index 1b70bde2d..f42404cce 100644
--- a/cache/lru_cache_test.cc
+++ b/cache/lru_cache_test.cc
@@ -506,7 +506,7 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
 
 }  // namespace fast_lru_cache
 
-namespace clock_cache {
+namespace hyper_clock_cache {
 
 class ClockCacheTest : public testing::Test {
  public:
@@ -975,9 +975,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
       SCOPED_TRACE("est_count = " + std::to_string(est_count));
       size_t capacity = static_cast<size_t>(est_val_size * est_count);
       // kDontChargeCacheMetadata
-      auto cache = ExperimentalNewClockCache(
-          capacity, est_val_size, /*num shard_bits*/ -1,
-          /*strict_capacity_limit*/ false, kDontChargeCacheMetadata);
+      auto cache = HyperClockCacheOptions(
+                       capacity, est_val_size, /*num shard_bits*/ -1,
+                       /*strict_capacity_limit*/ false,
+                       /*memory_allocator*/ nullptr, kDontChargeCacheMetadata)
+                       .MakeSharedCache();
       // Table sizes are currently only powers of two
       EXPECT_GE(cache->GetTableAddressCount(), est_count / kLoadFactor);
       EXPECT_LE(cache->GetTableAddressCount(), est_count / kLoadFactor * 2.0);
@@ -989,9 +991,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
       // doubling the table size could cut by 90% the space available to
       // values. Therefore, we omit those weird cases for now.
       if (est_val_size >= 512) {
-        cache = ExperimentalNewClockCache(
-            capacity, est_val_size, /*num shard_bits*/ -1,
-            /*strict_capacity_limit*/ false, kFullChargeCacheMetadata);
+        cache = HyperClockCacheOptions(
+                    capacity, est_val_size, /*num shard_bits*/ -1,
+                    /*strict_capacity_limit*/ false,
+                    /*memory_allocator*/ nullptr, kFullChargeCacheMetadata)
+                    .MakeSharedCache();
         double est_count_after_meta =
             (capacity - cache->GetUsage()) * 1.0 / est_val_size;
         EXPECT_GE(cache->GetTableAddressCount(),
@@ -1003,7 +1007,7 @@ TEST_F(ClockCacheTest, TableSizesTest) {
   }
 }
 
-}  // namespace clock_cache
+}  // namespace hyper_clock_cache
 
 class TestSecondaryCache : public SecondaryCache {
  public:
diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc
index d550c5225..6c335febc 100644
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -13,7 +13,6 @@
 
 #include "cache/cache_entry_roles.h"
 #include "cache/cache_key.h"
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "db/column_family.h"
@@ -938,16 +937,14 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) {
   int iterations_tested = 0;
   for (std::shared_ptr<Cache> base_cache :
        {NewLRUCache(capacity, num_shard_bits),
-        ExperimentalNewClockCache(
+        HyperClockCacheOptions(
             capacity,
             BlockBasedTableOptions().block_size /*estimated_value_size*/,
-            num_shard_bits, false /*strict_capacity_limit*/,
-            kDefaultCacheMetadataChargePolicy),
-        NewFastLRUCache(
-            capacity,
-            BlockBasedTableOptions().block_size /*estimated_value_size*/,
-            num_shard_bits, false /*strict_capacity_limit*/,
-            kDefaultCacheMetadataChargePolicy)}) {
+            num_shard_bits)
+            .MakeSharedCache(),
+        NewFastLRUCache(capacity, 1 /*estimated_value_size*/, num_shard_bits,
+                        false /*strict_capacity_limit*/,
+                        kDefaultCacheMetadataChargePolicy)}) {
     if (!base_cache) {
       // Skip clock cache when not supported
       continue;
@@ -1302,11 +1299,10 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
   for (bool partition : {false, true}) {
     for (std::shared_ptr<Cache> cache :
          {NewLRUCache(capacity),
-          ExperimentalNewClockCache(
+          HyperClockCacheOptions(
               capacity,
-              BlockBasedTableOptions().block_size /*estimated_value_size*/,
-              -1 /*num_shard_bits*/, false /*strict_capacity_limit*/,
-              kDefaultCacheMetadataChargePolicy)}) {
+              BlockBasedTableOptions().block_size /*estimated_value_size*/)
+              .MakeSharedCache()}) {
       if (!cache) {
         // Skip clock cache when not supported
         continue;
diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc
index 03f7e1a13..1b850d425 100644
--- a/db_stress_tool/db_stress_test_base.cc
+++ b/db_stress_tool/db_stress_test_base.cc
@@ -10,7 +10,6 @@
 
 #include "util/compression.h"
 #ifdef GFLAGS
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db_stress_tool/db_stress_common.h"
 #include "db_stress_tool/db_stress_compaction_filter.h"
@@ -115,14 +114,13 @@ std::shared_ptr<Cache> StressTest::NewCache(size_t capacity,
   }
 
   if (FLAGS_cache_type == "clock_cache") {
-    auto cache = ExperimentalNewClockCache(
-        static_cast<size_t>(capacity), FLAGS_block_size, num_shard_bits,
-        false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
-    if (!cache) {
-      fprintf(stderr, "Clock cache not supported.");
-      exit(1);
-    }
-    return cache;
+    fprintf(stderr, "Old clock cache implementation has been removed.\n");
+    exit(1);
+  } else if (FLAGS_cache_type == "hyper_clock_cache") {
+    return HyperClockCacheOptions(static_cast<size_t>(capacity),
+                                  FLAGS_block_size /*estimated_entry_charge*/,
+                                  num_shard_bits)
+        .MakeSharedCache();
   } else if (FLAGS_cache_type == "fast_lru_cache") {
     return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
                            num_shard_bits, false /*strict_capacity_limit*/,
diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h
index 119cf959c..004c30376 100644
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@@ -42,24 +42,64 @@ class SecondaryCache;
 extern const bool kDefaultToAdaptiveMutex;
 
 enum CacheMetadataChargePolicy {
+  // Only the `charge` of each entry inserted into a Cache counts against
+  // the `capacity`
   kDontChargeCacheMetadata,
+  // In addition to the `charge`, the approximate space overheads in the
+  // Cache (in bytes) also count against `capacity`. These space overheads
+  // are for supporting fast Lookup and managing the lifetime of entries.
   kFullChargeCacheMetadata
 };
 const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
     kFullChargeCacheMetadata;
 
-struct LRUCacheOptions {
-  // Capacity of the cache.
+// Options shared betweeen various cache implementations that
+// divide the key space into shards using hashing.
+struct ShardedCacheOptions {
+  // Capacity of the cache, in the same units as the `charge` of each entry.
+  // This is typically measured in bytes, but can be a different unit if using
+  // kDontChargeCacheMetadata.
   size_t capacity = 0;
 
   // Cache is sharded into 2^num_shard_bits shards, by hash of key.
-  // Refer to NewLRUCache for further information.
+  // If < 0, a good default is chosen based on the capacity and the
+  // implementation. (Mutex-based implementations are much more reliant
+  // on many shards for parallel scalability.)
   int num_shard_bits = -1;
 
-  // If strict_capacity_limit is set,
-  // insert to the cache will fail when cache is full.
+  // If strict_capacity_limit is set, Insert() will fail if there is not
+  // enough capacity for the new entry along with all the existing referenced
+  // (pinned) cache entries. (Unreferenced cache entries are evicted as
+  // needed, sometimes immediately.) If strict_capacity_limit == false
+  // (default), Insert() never fails.
   bool strict_capacity_limit = false;
 
+  // If non-nullptr, RocksDB will use this allocator instead of system
+  // allocator when allocating memory for cache blocks.
+  //
+  // Caveat: when the cache is used as block cache, the memory allocator is
+  // ignored when dealing with compression libraries that allocate memory
+  // internally (currently only XPRESS).
+  std::shared_ptr<MemoryAllocator> memory_allocator;
+
+  // See CacheMetadataChargePolicy
+  CacheMetadataChargePolicy metadata_charge_policy =
+      kDefaultCacheMetadataChargePolicy;
+
+  ShardedCacheOptions() {}
+  ShardedCacheOptions(
+      size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : capacity(_capacity),
+        num_shard_bits(_num_shard_bits),
+        strict_capacity_limit(_strict_capacity_limit),
+        memory_allocator(std::move(_memory_allocator)),
+        metadata_charge_policy(_metadata_charge_policy) {}
+};
+
+struct LRUCacheOptions : public ShardedCacheOptions {
   // Percentage of cache reserved for high priority entries.
   // If greater than zero, the LRU list will be split into a high-pri
   // list and a low-pri list. High-pri entries will be inserted to the
@@ -83,24 +123,12 @@ struct LRUCacheOptions {
   // See also high_pri_pool_ratio.
   double low_pri_pool_ratio = 0.0;
 
-  // If non-nullptr will use this allocator instead of system allocator when
-  // allocating memory for cache blocks. Call this method before you start using
-  // the cache!
-  //
-  // Caveat: when the cache is used as block cache, the memory allocator is
-  // ignored when dealing with compression libraries that allocate memory
-  // internally (currently only XPRESS).
-  std::shared_ptr<MemoryAllocator> memory_allocator;
-
   // Whether to use adaptive mutexes for cache shards. Note that adaptive
   // mutexes need to be supported by the platform in order for this to have any
   // effect. The default value is true if RocksDB is compiled with
   // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
   bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
 
-  CacheMetadataChargePolicy metadata_charge_policy =
-      kDefaultCacheMetadataChargePolicy;
-
   // A SecondaryCache instance to use a the non-volatile tier.
   std::shared_ptr<SecondaryCache> secondary_cache;
 
@@ -112,14 +140,12 @@ struct LRUCacheOptions {
                   CacheMetadataChargePolicy _metadata_charge_policy =
                       kDefaultCacheMetadataChargePolicy,
                   double _low_pri_pool_ratio = 0.0)
-      : capacity(_capacity),
-        num_shard_bits(_num_shard_bits),
-        strict_capacity_limit(_strict_capacity_limit),
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
         high_pri_pool_ratio(_high_pri_pool_ratio),
         low_pri_pool_ratio(_low_pri_pool_ratio),
-        memory_allocator(std::move(_memory_allocator)),
-        use_adaptive_mutex(_use_adaptive_mutex),
-        metadata_charge_policy(_metadata_charge_policy) {}
+        use_adaptive_mutex(_use_adaptive_mutex) {}
 };
 
 // Create a new cache with a fixed size capacity. The cache is sharded
@@ -190,18 +216,65 @@ extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
     const CompressedSecondaryCacheOptions& opts);
 
-// EXPERIMENTAL Currently ClockCache is under development, although it's
-// already exposed in the public API. To avoid unreliable performance and
-// correctness issues, NewClockCache will temporarily return an LRUCache
-// constructed with the corresponding arguments.
+// HyperClockCache - EXPERIMENTAL
 //
-// TODO(Guido) When ClockCache is complete, roll back to the old text:
-// ``
-// Similar to NewLRUCache, but create a cache based on clock algorithm with
-// better concurrent performance in some cases. See util/clock_cache.cc for
-// more detail.
-// Return nullptr if it is not supported.
-// ``
+// A lock-free Cache alternative for RocksDB block cache that offers much
+// improved CPU efficiency under high parallel load or high contention, with
+// some caveats.
+//
+// See internal cache/clock_cache.h for full description.
+struct HyperClockCacheOptions : public ShardedCacheOptions {
+  // The estimated average `charge` associated with cache entries. This is a
+  // critical configuration parameter for good performance from the hyper
+  // cache, because having a table size that is fixed at creation time greatly
+  // reduces the required synchronization between threads.
+  // * If the estimate is substantially too low (e.g. less than half the true
+  // average) then metadata space overhead with be substantially higher (e.g.
+  // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
+  // can slightly reduce cache hit rates, and slightly reduce access times due
+  // to the larger working memory size.
+  // * If the estimate is substantially too high (e.g. 25% higher than the true
+  // average) then there might not be sufficient slots in the hash table for
+  // both efficient operation and capacity utilization (hit rate). The hyper
+  // cache will evict entries to prevent load factors that could dramatically
+  // affect lookup times, instead letting the hit rate suffer by not utilizing
+  // the full capacity.
+  //
+  // A reasonable choice is the larger of block_size and metadata_block_size.
+  // When WriteBufferManager (and similar) charge memory usage to the block
+  // cache, this can lead to the same effect as estimate being too low, which
+  // is better than the opposite. Therefore, the general recommendation is to
+  // assume that other memory charged to block cache could be negligible, and
+  // ignore it in making the estimate.
+  //
+  // The best parameter choice based on a cache in use is given by
+  // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
+  // with kDontChargeCacheMetadata. More precisely with
+  // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
+  // GetOccupancyCount(). However, when the average value size might vary
+  // (e.g. balance between metadata and data blocks in cache), it is better
+  // to estimate toward the lower side than the higher side.
+  size_t estimated_entry_charge;
+
+  HyperClockCacheOptions(
+      size_t _capacity, size_t _estimated_entry_charge,
+      int _num_shard_bits = -1, bool _strict_capacity_limit = false,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
+        estimated_entry_charge(_estimated_entry_charge) {}
+
+  // Construct an instance of HyperClockCache using these options
+  std::shared_ptr<Cache> MakeSharedCache() const;
+};
+
+// DEPRECATED - The old Clock Cache implementation had an unresolved bug and
+// has been removed. The new HyperClockCache requires an additional
+// configuration parameter that is not provided by this API. This function
+// simply returns a new LRUCache for functional compatibility.
 extern std::shared_ptr<Cache> NewClockCache(
     size_t capacity, int num_shard_bits = -1,
     bool strict_capacity_limit = false,
diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc
index 50c143f5f..6c5457a54 100644
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@@ -37,7 +37,6 @@
 #include <thread>
 #include <unordered_map>
 
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "db/malloc_stats.h"
@@ -3057,15 +3056,13 @@ class Benchmark {
       return nullptr;
     }
     if (FLAGS_cache_type == "clock_cache") {
-      auto cache = ExperimentalNewClockCache(
-          static_cast<size_t>(capacity), FLAGS_block_size,
-          FLAGS_cache_numshardbits, false /*strict_capacity_limit*/,
-          kDefaultCacheMetadataChargePolicy);
-      if (!cache) {
-        fprintf(stderr, "Clock cache not supported.");
-        exit(1);
-      }
-      return cache;
+      fprintf(stderr, "Old clock cache implementation has been removed.\n");
+      exit(1);
+    } else if (FLAGS_cache_type == "hyper_clock_cache") {
+      return HyperClockCacheOptions(static_cast<size_t>(capacity),
+                                    FLAGS_block_size /*estimated_entry_charge*/,
+                                    FLAGS_cache_numshardbits)
+          .MakeSharedCache();
     } else if (FLAGS_cache_type == "fast_lru_cache") {
       return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
                              FLAGS_cache_numshardbits,
diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py
index 1cdc62b94..2f7af92e3 100644
--- a/tools/db_crashtest.py
+++ b/tools/db_crashtest.py
@@ -116,7 +116,7 @@ default_params = {
     "use_direct_reads": lambda: random.randint(0, 1),
     "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
     "mock_direct_io": False,
-    "cache_type": lambda: random.choice(["lru_cache", "clock_cache"]),
+    "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
         # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
     "use_full_merge_v1": lambda: random.randint(0, 1),
     "use_merge": lambda: random.randint(0, 1),