Call experimental new clock cache HyperClockCache (#10684)

Summary: This change establishes a distinctive name for the experimental new lock-free clock cache (originally developed by guidotag and revamped in PR https://github.com/facebook/rocksdb/issues/10626). A few reasons: * We want to make it clear that this is a fundamentally different implementation vs. the old clock cache, to avoid people saying "I already tried clock cache." * We want to highlight the key feature: it's fast (especially under parallel load) * Because it requires an estimated charge per entry, it is not drop-in API compatible with old clock cache. This estimate might always be required for highest performance, and giving it a distinct name should reduce confusion about the distinct API requirements. * We might develop a variant requiring the same estimate parameter but with LRU eviction. In that case, using the name HyperLRUCache should make things more clear. (FastLRUCache is just a prototype that might soon be removed.) Some API detail: * To reduce copy-pasting parameter lists, etc. as in LRUCache construction, I have a `MakeSharedCache()` function on `HyperClockCacheOptions` instead of `NewHyperClockCache()`. * Changes -cache_type=clock_cache to -cache_type=hyper_clock_cache for applicable tools. I think this is more consistent / sustainable for reasons already stated. For performance tests see https://github.com/facebook/rocksdb/pull/10626 Pull Request resolved: https://github.com/facebook/rocksdb/pull/10684 Test Plan: no interesting functional changes; tests updated Reviewed By: anand1976 Differential Revision: D39547800 Pulled By: pdillinger fbshipit-source-id: 5c0fe1b5cf3cb680ab369b928c8569682b9795bf
3 years ago · 0f91c72adc
parent 5724348689
commit 0f91c72adc
11 changed files with 216 additions and 159 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@ -32,6 +32,7 @@
 *  RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2).
 * Added new perf context counters `block_cache_standalone_handle_count`, `block_cache_real_handle_count`,`compressed_sec_cache_insert_real_count`, `compressed_sec_cache_insert_dummy_count`, `compressed_sec_cache_uncompressed_bytes`, and `compressed_sec_cache_compressed_bytes`.
 * Memory for blobs which are to be inserted into the blob cache is now allocated using the cache's allocator (see #10628 and #10647).
+* HyperClockCache is an experimental, lock-free Cache alternative for block cache that offers much improved CPU efficiency under high parallel load or high contention, with some caveats. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load.

 ### Performance Improvements
 * Iterator performance is improved for `DeleteRange()` users. Internally, iterator will skip to the end of a range tombstone when possible, instead of looping through each key and check individually if a key is range deleted.
--- a/cache/cache_bench_tool.cc
+++ b/cache/cache_bench_tool.cc
@ -13,7 +13,6 @@
 #include <set>
 #include <sstream>

-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "monitoring/histogram.h"
@ -292,13 +291,12 @@ class CacheBench {
    }

    if (FLAGS_cache_type == "clock_cache") {
-      cache_ = ExperimentalNewClockCache(
-          FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
-      if (!cache_) {
-        fprintf(stderr, "Clock cache not supported.\n");
+      fprintf(stderr, "Old clock cache implementation has been removed.\n");
      exit(1);
-      }
+    } else if (FLAGS_cache_type == "hyper_clock_cache") {
+      cache_ = HyperClockCacheOptions(FLAGS_cache_size, FLAGS_value_bytes,
+                                      FLAGS_num_shard_bits)
+                   .MakeSharedCache();
    } else if (FLAGS_cache_type == "fast_lru_cache") {
      cache_ = NewFastLRUCache(
          FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
--- a/cache/cache_test.cc
+++ b/cache/cache_test.cc
@ -15,7 +15,6 @@
 #include <string>
 #include <vector>

-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "port/stack_trace.h"
@ -23,7 +22,7 @@
 #include "util/coding.h"
 #include "util/string_util.h"

-// FastLRUCache and ClockCache only support 16-byte keys, so some of
+// FastLRUCache and HyperClockCache only support 16-byte keys, so some of
 // the tests originally wrote for LRUCache do not work on the other caches.
 // Those tests were adapted to use 16-byte keys. We kept the original ones.
 // TODO: Remove the original tests if they ever become unused.
@ -76,7 +75,7 @@ void EraseDeleter2(const Slice& /*key*/, void* value) {
 }

 const std::string kLRU = "lru";
-const std::string kClock = "clock";
+const std::string kHyperClock = "hyper_clock";
 const std::string kFast = "fast";

 }  // anonymous namespace
@ -87,7 +86,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
  static std::string type_;

  static void Deleter(const Slice& key, void* v) {
-    if (type_ == kFast || type_ == kClock) {
+    if (type_ == kFast || type_ == kHyperClock) {
      current_->deleted_keys_.push_back(DecodeKey16Bytes(key));
    } else {
      current_->deleted_keys_.push_back(DecodeKey32Bits(key));
@ -122,10 +121,10 @@ class CacheTest : public testing::TestWithParam<std::string> {
    if (type == kLRU) {
      return NewLRUCache(capacity);
    }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(
-          capacity, estimated_value_size_, -1 /*num_shard_bits*/,
-          false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(
+                 capacity, estimated_value_size_ /*estimated_value_size*/)
+          .MakeSharedCache();
    }
    if (type == kFast) {
      return NewFastLRUCache(
@ -148,10 +147,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
      co.metadata_charge_policy = charge_policy;
      return NewLRUCache(co);
    }
-    if (type == kClock) {
-      return ExperimentalNewClockCache(capacity, 1 /*estimated_value_size*/,
+    if (type == kHyperClock) {
+      return HyperClockCacheOptions(capacity, 1 /*estimated_value_size*/,
                                    num_shard_bits, strict_capacity_limit,
-                                       charge_policy);
+                                    nullptr /*allocator*/, charge_policy)
+          .MakeSharedCache();
    }
    if (type == kFast) {
      return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
@ -163,12 +163,11 @@ class CacheTest : public testing::TestWithParam<std::string> {

  // These functions encode/decode keys in tests cases that use
  // int keys.
-  // Currently, FastLRUCache requires keys to be 16B long, whereas
-  // LRUCache and ClockCache don't, so the encoding depends on
-  // the cache type.
+  // Currently, HyperClockCache requires keys to be 16B long, whereas
+  // LRUCache doesn't, so the encoding depends on the cache type.
  std::string EncodeKey(int k) {
    auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
      return EncodeKey16Bytes(k);
    } else {
      return EncodeKey32Bits(k);
@ -177,7 +176,7 @@ class CacheTest : public testing::TestWithParam<std::string> {

  int DecodeKey(const Slice& k) {
    auto type = GetParam();
-    if (type == kFast || type == kClock) {
+    if (type == kFast || type == kHyperClock) {
      return DecodeKey16Bytes(k);
    } else {
      return DecodeKey32Bits(k);
@ -242,7 +241,7 @@ TEST_P(CacheTest, UsageTest) {
  auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
  ASSERT_EQ(0, cache->GetUsage());
  size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
    ASSERT_EQ(0, baseline_meta_usage);
  }

@ -263,7 +262,7 @@ TEST_P(CacheTest, UsageTest) {
                                    kv_size, DumbDeleter));
    usage += kv_size;
    ASSERT_EQ(usage, cache->GetUsage());
-    if (type == kClock) {
+    if (type == kHyperClock) {
      ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage());
    } else {
      ASSERT_LT(usage, precise_cache->GetUsage());
@ -293,7 +292,7 @@ TEST_P(CacheTest, UsageTest) {
  ASSERT_GT(kCapacity, cache->GetUsage());
  ASSERT_GT(kCapacity, precise_cache->GetUsage());
  ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
-  if (type != kClock) {
+  if (type != kHyperClock) {
    ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
  } else {
    // estimated value size of 1 is weird for clock cache, because
@ -319,7 +318,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
  auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
  auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
  size_t baseline_meta_usage = precise_cache->GetUsage();
-  if (type != kClock) {
+  if (type != kHyperClock) {
    ASSERT_EQ(0, baseline_meta_usage);
  }

@ -428,7 +427,7 @@ TEST_P(CacheTest, HitAndMiss) {
  ASSERT_EQ(-1,  Lookup(300));

  Insert(100, 102);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
    // ClockCache usually doesn't overwrite on Insert
    ASSERT_EQ(101, Lookup(100));
  } else {
@ -439,7 +438,7 @@ TEST_P(CacheTest, HitAndMiss) {

  ASSERT_EQ(1U, deleted_keys_.size());
  ASSERT_EQ(100, deleted_keys_[0]);
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
    ASSERT_EQ(102, deleted_values_[0]);
  } else {
    ASSERT_EQ(101, deleted_values_[0]);
@ -447,7 +446,7 @@ TEST_P(CacheTest, HitAndMiss) {
 }

 TEST_P(CacheTest, InsertSameKey) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
    ROCKSDB_GTEST_BYPASS(
        "ClockCache doesn't guarantee Insert overwrite same key.");
    return;
@ -477,7 +476,7 @@ TEST_P(CacheTest, Erase) {
 }

 TEST_P(CacheTest, EntriesArePinned) {
-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
    ROCKSDB_GTEST_BYPASS(
        "ClockCache doesn't guarantee Insert overwrite same key.");
    return;
@ -543,7 +542,7 @@ TEST_P(CacheTest, ExternalRefPinsEntries) {
      Insert(1000 + j, 2000 + j);
    }
    // Clock cache is even more stateful and needs more churn to evict
-    if (GetParam() == kClock) {
+    if (GetParam() == kHyperClock) {
      for (int j = 0; j < kCacheSize; j++) {
        Insert(11000 + j, 11000 + j);
      }
@ -742,9 +741,9 @@ TEST_P(CacheTest, ReleaseWithoutErase) {

 TEST_P(CacheTest, SetCapacity) {
  auto type = GetParam();
-  if (type == kFast || type == kClock) {
+  if (type == kFast || type == kHyperClock) {
    ROCKSDB_GTEST_BYPASS(
-        "FastLRUCache and ClockCache don't support arbitrary capacity "
+        "FastLRUCache and HyperClockCache don't support arbitrary capacity "
        "adjustments.");
    return;
  }
@ -883,7 +882,7 @@ TEST_P(CacheTest, OverCapacity) {
    cache->Release(handles[i]);
  }

-  if (GetParam() == kClock) {
+  if (GetParam() == kHyperClock) {
    // Make sure eviction is triggered.
    ASSERT_OK(cache->Insert(EncodeKey(-1), nullptr, 1, &deleter, &handles[0]));

@ -1020,7 +1019,8 @@ TEST_P(CacheTest, DefaultShardBits) {
  // Prevent excessive allocation (to save time & space)
  estimated_value_size_ = 100000;
  // Implementations use different minimum shard sizes
-  size_t min_shard_size = (GetParam() == kClock ? 32U * 1024U : 512U) * 1024U;
+  size_t min_shard_size =
+      (GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U;

  std::shared_ptr<Cache> cache = NewCache(32U * min_shard_size);
  ShardedCache* sc = dynamic_cast<ShardedCache*>(cache.get());
@ -1052,11 +1052,8 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
  cache_->Release(h1);
 }

-std::shared_ptr<Cache> (*new_clock_cache_func)(size_t, size_t, int, bool,
-                                               CacheMetadataChargePolicy) =
-    ExperimentalNewClockCache;
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
-                        testing::Values(kLRU, kClock, kFast));
+                        testing::Values(kLRU, kHyperClock, kFast));
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
                        testing::Values(kLRU, kFast));

--- a/cache/clock_cache.cc
+++ b/cache/clock_cache.cc
@ -21,7 +21,7 @@

 namespace ROCKSDB_NAMESPACE {

-namespace clock_cache {
+namespace hyper_clock_cache {

 static_assert(sizeof(ClockHandle) == 64U,
              "Expecting size / alignment with common cache line size");
@ -1126,8 +1126,9 @@ size_t ClockCacheShard::GetTableAddressCount() const {
  return table_.GetTableSize();
 }

-ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
-                       int num_shard_bits, bool strict_capacity_limit,
+HyperClockCache::HyperClockCache(
+    size_t capacity, size_t estimated_value_size, int num_shard_bits,
+    bool strict_capacity_limit,
    CacheMetadataChargePolicy metadata_charge_policy)
    : ShardedCache(capacity, num_shard_bits, strict_capacity_limit),
      num_shards_(1 << num_shard_bits) {
@ -1145,7 +1146,7 @@ ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
  }
 }

-ClockCache::~ClockCache() {
+HyperClockCache::~HyperClockCache() {
  if (shards_ != nullptr) {
    assert(num_shards_ > 0);
    for (int i = 0; i < num_shards_; i++) {
@ -1155,32 +1156,32 @@ ClockCache::~ClockCache() {
  }
 }

-CacheShard* ClockCache::GetShard(uint32_t shard) {
+CacheShard* HyperClockCache::GetShard(uint32_t shard) {
  return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }

-const CacheShard* ClockCache::GetShard(uint32_t shard) const {
+const CacheShard* HyperClockCache::GetShard(uint32_t shard) const {
  return reinterpret_cast<CacheShard*>(&shards_[shard]);
 }

-void* ClockCache::Value(Handle* handle) {
+void* HyperClockCache::Value(Handle* handle) {
  return reinterpret_cast<const ClockHandle*>(handle)->value;
 }

-size_t ClockCache::GetCharge(Handle* handle) const {
+size_t HyperClockCache::GetCharge(Handle* handle) const {
  return reinterpret_cast<const ClockHandle*>(handle)->total_charge;
 }

-Cache::DeleterFn ClockCache::GetDeleter(Handle* handle) const {
+Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const {
  auto h = reinterpret_cast<const ClockHandle*>(handle);
  return h->deleter;
 }

-uint32_t ClockCache::GetHash(Handle* handle) const {
+uint32_t HyperClockCache::GetHash(Handle* handle) const {
  return reinterpret_cast<const ClockHandle*>(handle)->hash;
 }

-void ClockCache::DisownData() {
+void HyperClockCache::DisownData() {
  // Leak data only if that won't generate an ASAN/valgrind warning.
  if (!kMustFreeHeapAllocations) {
    shards_ = nullptr;
@ -1188,8 +1189,9 @@ void ClockCache::DisownData() {
  }
 }

-}  // namespace clock_cache
+}  // namespace hyper_clock_cache

+// DEPRECATED (see public API)
 std::shared_ptr<Cache> NewClockCache(
    size_t capacity, int num_shard_bits, bool strict_capacity_limit,
    CacheMetadataChargePolicy metadata_charge_policy) {
@ -1199,22 +1201,20 @@ std::shared_ptr<Cache> NewClockCache(
                     /* low_pri_pool_ratio */ 0.0);
 }

-std::shared_ptr<Cache> ExperimentalNewClockCache(
-    size_t capacity, size_t estimated_value_size, int num_shard_bits,
-    bool strict_capacity_limit,
-    CacheMetadataChargePolicy metadata_charge_policy) {
-  if (num_shard_bits >= 20) {
+std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
+  auto my_num_shard_bits = num_shard_bits;
+  if (my_num_shard_bits >= 20) {
    return nullptr;  // The cache cannot be sharded into too many fine pieces.
  }
-  if (num_shard_bits < 0) {
+  if (my_num_shard_bits < 0) {
    // Use larger shard size to reduce risk of large entries clustering
    // or skewing individual shards.
    constexpr size_t min_shard_size = 32U * 1024U * 1024U;
-    num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
+    my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
  }
-  return std::make_shared<clock_cache::ClockCache>(
-      capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
-      metadata_charge_policy);
+  return std::make_shared<hyper_clock_cache::HyperClockCache>(
+      capacity, estimated_entry_charge, my_num_shard_bits,
+      strict_capacity_limit, metadata_charge_policy);
 }

 }  // namespace ROCKSDB_NAMESPACE
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@ -27,22 +27,22 @@

 namespace ROCKSDB_NAMESPACE {

-namespace clock_cache {
+namespace hyper_clock_cache {

 // Forward declaration of friend class.
 class ClockCacheTest;

-// ClockCache is an experimental alternative to LRUCache.
+// HyperClockCache is an experimental alternative to LRUCache.
 //
 // Benefits
 // --------
 // * Fully lock free (no waits or spins) for efficiency under high concurrency
 // * Optimized for hot path reads. For concurrency control, most Lookup() and
 // essentially all Release() are a single atomic add operation.
+// * Eviction on insertion is fully parallel and lock-free.
 // * Uses a generalized + aging variant of CLOCK eviction that might outperform
 // LRU in some cases. (For background, see
 // https://en.wikipedia.org/wiki/Page_replacement_algorithm)
-// * Eviction on insertion is fully parallel and lock-free.
 //
 // Costs
 // -----
@ -582,20 +582,20 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
  std::atomic<bool> strict_capacity_limit_;
 };  // class ClockCacheShard

-class ClockCache
+class HyperClockCache
 #ifdef NDEBUG
    final
 #endif
    : public ShardedCache {
 public:
-  ClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
-             bool strict_capacity_limit,
+  HyperClockCache(size_t capacity, size_t estimated_value_size,
+                  int num_shard_bits, bool strict_capacity_limit,
                  CacheMetadataChargePolicy metadata_charge_policy =
                      kDontChargeCacheMetadata);

-  ~ClockCache() override;
+  ~HyperClockCache() override;

-  const char* Name() const override { return "ClockCache"; }
+  const char* Name() const override { return "HyperClockCache"; }

  CacheShard* GetShard(uint32_t shard) override;

@ -615,15 +615,8 @@ class ClockCache
  ClockCacheShard* shards_ = nullptr;

  int num_shards_;
-};  // class ClockCache
+};  // class HyperClockCache

-}  // namespace clock_cache
-
-// Only for internal testing, temporarily replacing NewClockCache.
-// TODO(Guido) Remove once NewClockCache constructs a ClockCache again.
-extern std::shared_ptr<Cache> ExperimentalNewClockCache(
-    size_t capacity, size_t estimated_value_size, int num_shard_bits,
-    bool strict_capacity_limit,
-    CacheMetadataChargePolicy metadata_charge_policy);
+}  // namespace hyper_clock_cache

 }  // namespace ROCKSDB_NAMESPACE
--- a/cache/lru_cache_test.cc
+++ b/cache/lru_cache_test.cc
@ -506,7 +506,7 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {

 }  // namespace fast_lru_cache

-namespace clock_cache {
+namespace hyper_clock_cache {

 class ClockCacheTest : public testing::Test {
 public:
@ -975,9 +975,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
      SCOPED_TRACE("est_count = " + std::to_string(est_count));
      size_t capacity = static_cast<size_t>(est_val_size * est_count);
      // kDontChargeCacheMetadata
-      auto cache = ExperimentalNewClockCache(
+      auto cache = HyperClockCacheOptions(
                       capacity, est_val_size, /*num shard_bits*/ -1,
-          /*strict_capacity_limit*/ false, kDontChargeCacheMetadata);
+                       /*strict_capacity_limit*/ false,
+                       /*memory_allocator*/ nullptr, kDontChargeCacheMetadata)
+                       .MakeSharedCache();
      // Table sizes are currently only powers of two
      EXPECT_GE(cache->GetTableAddressCount(), est_count / kLoadFactor);
      EXPECT_LE(cache->GetTableAddressCount(), est_count / kLoadFactor * 2.0);
@ -989,9 +991,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
      // doubling the table size could cut by 90% the space available to
      // values. Therefore, we omit those weird cases for now.
      if (est_val_size >= 512) {
-        cache = ExperimentalNewClockCache(
+        cache = HyperClockCacheOptions(
                    capacity, est_val_size, /*num shard_bits*/ -1,
-            /*strict_capacity_limit*/ false, kFullChargeCacheMetadata);
+                    /*strict_capacity_limit*/ false,
+                    /*memory_allocator*/ nullptr, kFullChargeCacheMetadata)
+                    .MakeSharedCache();
        double est_count_after_meta =
            (capacity - cache->GetUsage()) * 1.0 / est_val_size;
        EXPECT_GE(cache->GetTableAddressCount(),
@ -1003,7 +1007,7 @@ TEST_F(ClockCacheTest, TableSizesTest) {
  }
 }

-}  // namespace clock_cache
+}  // namespace hyper_clock_cache

 class TestSecondaryCache : public SecondaryCache {
 public:
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@ -13,7 +13,6 @@

 #include "cache/cache_entry_roles.h"
 #include "cache/cache_key.h"
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "db/column_family.h"
@ -938,15 +937,13 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) {
  int iterations_tested = 0;
  for (std::shared_ptr<Cache> base_cache :
       {NewLRUCache(capacity, num_shard_bits),
-        ExperimentalNewClockCache(
+        HyperClockCacheOptions(
            capacity,
            BlockBasedTableOptions().block_size /*estimated_value_size*/,
-            num_shard_bits, false /*strict_capacity_limit*/,
-            kDefaultCacheMetadataChargePolicy),
-        NewFastLRUCache(
-            capacity,
-            BlockBasedTableOptions().block_size /*estimated_value_size*/,
-            num_shard_bits, false /*strict_capacity_limit*/,
+            num_shard_bits)
+            .MakeSharedCache(),
+        NewFastLRUCache(capacity, 1 /*estimated_value_size*/, num_shard_bits,
+                        false /*strict_capacity_limit*/,
                        kDefaultCacheMetadataChargePolicy)}) {
    if (!base_cache) {
      // Skip clock cache when not supported
@ -1302,11 +1299,10 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
  for (bool partition : {false, true}) {
    for (std::shared_ptr<Cache> cache :
         {NewLRUCache(capacity),
-          ExperimentalNewClockCache(
+          HyperClockCacheOptions(
              capacity,
-              BlockBasedTableOptions().block_size /*estimated_value_size*/,
-              -1 /*num_shard_bits*/, false /*strict_capacity_limit*/,
-              kDefaultCacheMetadataChargePolicy)}) {
+              BlockBasedTableOptions().block_size /*estimated_value_size*/)
+              .MakeSharedCache()}) {
      if (!cache) {
        // Skip clock cache when not supported
        continue;
--- a/db_stress_tool/db_stress_test_base.cc
+++ b/db_stress_tool/db_stress_test_base.cc
@ -10,7 +10,6 @@

 #include "util/compression.h"
 #ifdef GFLAGS
-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db_stress_tool/db_stress_common.h"
 #include "db_stress_tool/db_stress_compaction_filter.h"
@ -115,14 +114,13 @@ std::shared_ptr<Cache> StressTest::NewCache(size_t capacity,
  }

  if (FLAGS_cache_type == "clock_cache") {
-    auto cache = ExperimentalNewClockCache(
-        static_cast<size_t>(capacity), FLAGS_block_size, num_shard_bits,
-        false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
-    if (!cache) {
-      fprintf(stderr, "Clock cache not supported.");
+    fprintf(stderr, "Old clock cache implementation has been removed.\n");
    exit(1);
-    }
-    return cache;
+  } else if (FLAGS_cache_type == "hyper_clock_cache") {
+    return HyperClockCacheOptions(static_cast<size_t>(capacity),
+                                  FLAGS_block_size /*estimated_entry_charge*/,
+                                  num_shard_bits)
+        .MakeSharedCache();
  } else if (FLAGS_cache_type == "fast_lru_cache") {
    return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
                           num_shard_bits, false /*strict_capacity_limit*/,
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@ -42,24 +42,64 @@ class SecondaryCache;
 extern const bool kDefaultToAdaptiveMutex;

 enum CacheMetadataChargePolicy {
+  // Only the `charge` of each entry inserted into a Cache counts against
+  // the `capacity`
  kDontChargeCacheMetadata,
+  // In addition to the `charge`, the approximate space overheads in the
+  // Cache (in bytes) also count against `capacity`. These space overheads
+  // are for supporting fast Lookup and managing the lifetime of entries.
  kFullChargeCacheMetadata
 };
 const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
    kFullChargeCacheMetadata;

-struct LRUCacheOptions {
-  // Capacity of the cache.
+// Options shared betweeen various cache implementations that
+// divide the key space into shards using hashing.
+struct ShardedCacheOptions {
+  // Capacity of the cache, in the same units as the `charge` of each entry.
+  // This is typically measured in bytes, but can be a different unit if using
+  // kDontChargeCacheMetadata.
  size_t capacity = 0;

  // Cache is sharded into 2^num_shard_bits shards, by hash of key.
-  // Refer to NewLRUCache for further information.
+  // If < 0, a good default is chosen based on the capacity and the
+  // implementation. (Mutex-based implementations are much more reliant
+  // on many shards for parallel scalability.)
  int num_shard_bits = -1;

-  // If strict_capacity_limit is set,
-  // insert to the cache will fail when cache is full.
+  // If strict_capacity_limit is set, Insert() will fail if there is not
+  // enough capacity for the new entry along with all the existing referenced
+  // (pinned) cache entries. (Unreferenced cache entries are evicted as
+  // needed, sometimes immediately.) If strict_capacity_limit == false
+  // (default), Insert() never fails.
  bool strict_capacity_limit = false;

+  // If non-nullptr, RocksDB will use this allocator instead of system
+  // allocator when allocating memory for cache blocks.
+  //
+  // Caveat: when the cache is used as block cache, the memory allocator is
+  // ignored when dealing with compression libraries that allocate memory
+  // internally (currently only XPRESS).
+  std::shared_ptr<MemoryAllocator> memory_allocator;
+
+  // See CacheMetadataChargePolicy
+  CacheMetadataChargePolicy metadata_charge_policy =
+      kDefaultCacheMetadataChargePolicy;
+
+  ShardedCacheOptions() {}
+  ShardedCacheOptions(
+      size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : capacity(_capacity),
+        num_shard_bits(_num_shard_bits),
+        strict_capacity_limit(_strict_capacity_limit),
+        memory_allocator(std::move(_memory_allocator)),
+        metadata_charge_policy(_metadata_charge_policy) {}
+};
+
+struct LRUCacheOptions : public ShardedCacheOptions {
  // Percentage of cache reserved for high priority entries.
  // If greater than zero, the LRU list will be split into a high-pri
  // list and a low-pri list. High-pri entries will be inserted to the
@ -83,24 +123,12 @@ struct LRUCacheOptions {
  // See also high_pri_pool_ratio.
  double low_pri_pool_ratio = 0.0;

-  // If non-nullptr will use this allocator instead of system allocator when
-  // allocating memory for cache blocks. Call this method before you start using
-  // the cache!
-  //
-  // Caveat: when the cache is used as block cache, the memory allocator is
-  // ignored when dealing with compression libraries that allocate memory
-  // internally (currently only XPRESS).
-  std::shared_ptr<MemoryAllocator> memory_allocator;
-
  // Whether to use adaptive mutexes for cache shards. Note that adaptive
  // mutexes need to be supported by the platform in order for this to have any
  // effect. The default value is true if RocksDB is compiled with
  // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
  bool use_adaptive_mutex = kDefaultToAdaptiveMutex;

-  CacheMetadataChargePolicy metadata_charge_policy =
-      kDefaultCacheMetadataChargePolicy;
-
  // A SecondaryCache instance to use a the non-volatile tier.
  std::shared_ptr<SecondaryCache> secondary_cache;

@ -112,14 +140,12 @@ struct LRUCacheOptions {
                  CacheMetadataChargePolicy _metadata_charge_policy =
                      kDefaultCacheMetadataChargePolicy,
                  double _low_pri_pool_ratio = 0.0)
-      : capacity(_capacity),
-        num_shard_bits(_num_shard_bits),
-        strict_capacity_limit(_strict_capacity_limit),
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
        high_pri_pool_ratio(_high_pri_pool_ratio),
        low_pri_pool_ratio(_low_pri_pool_ratio),
-        memory_allocator(std::move(_memory_allocator)),
-        use_adaptive_mutex(_use_adaptive_mutex),
-        metadata_charge_policy(_metadata_charge_policy) {}
+        use_adaptive_mutex(_use_adaptive_mutex) {}
 };

 // Create a new cache with a fixed size capacity. The cache is sharded
@ -190,18 +216,65 @@ extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
 extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
    const CompressedSecondaryCacheOptions& opts);

-// EXPERIMENTAL Currently ClockCache is under development, although it's
-// already exposed in the public API. To avoid unreliable performance and
-// correctness issues, NewClockCache will temporarily return an LRUCache
-// constructed with the corresponding arguments.
+// HyperClockCache - EXPERIMENTAL
 //
-// TODO(Guido) When ClockCache is complete, roll back to the old text:
-// ``
-// Similar to NewLRUCache, but create a cache based on clock algorithm with
-// better concurrent performance in some cases. See util/clock_cache.cc for
-// more detail.
-// Return nullptr if it is not supported.
-// ``
+// A lock-free Cache alternative for RocksDB block cache that offers much
+// improved CPU efficiency under high parallel load or high contention, with
+// some caveats.
+//
+// See internal cache/clock_cache.h for full description.
+struct HyperClockCacheOptions : public ShardedCacheOptions {
+  // The estimated average `charge` associated with cache entries. This is a
+  // critical configuration parameter for good performance from the hyper
+  // cache, because having a table size that is fixed at creation time greatly
+  // reduces the required synchronization between threads.
+  // * If the estimate is substantially too low (e.g. less than half the true
+  // average) then metadata space overhead with be substantially higher (e.g.
+  // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
+  // can slightly reduce cache hit rates, and slightly reduce access times due
+  // to the larger working memory size.
+  // * If the estimate is substantially too high (e.g. 25% higher than the true
+  // average) then there might not be sufficient slots in the hash table for
+  // both efficient operation and capacity utilization (hit rate). The hyper
+  // cache will evict entries to prevent load factors that could dramatically
+  // affect lookup times, instead letting the hit rate suffer by not utilizing
+  // the full capacity.
+  //
+  // A reasonable choice is the larger of block_size and metadata_block_size.
+  // When WriteBufferManager (and similar) charge memory usage to the block
+  // cache, this can lead to the same effect as estimate being too low, which
+  // is better than the opposite. Therefore, the general recommendation is to
+  // assume that other memory charged to block cache could be negligible, and
+  // ignore it in making the estimate.
+  //
+  // The best parameter choice based on a cache in use is given by
+  // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
+  // with kDontChargeCacheMetadata. More precisely with
+  // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
+  // GetOccupancyCount(). However, when the average value size might vary
+  // (e.g. balance between metadata and data blocks in cache), it is better
+  // to estimate toward the lower side than the higher side.
+  size_t estimated_entry_charge;
+
+  HyperClockCacheOptions(
+      size_t _capacity, size_t _estimated_entry_charge,
+      int _num_shard_bits = -1, bool _strict_capacity_limit = false,
+      std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
+      CacheMetadataChargePolicy _metadata_charge_policy =
+          kDefaultCacheMetadataChargePolicy)
+      : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
+                            std::move(_memory_allocator),
+                            _metadata_charge_policy),
+        estimated_entry_charge(_estimated_entry_charge) {}
+
+  // Construct an instance of HyperClockCache using these options
+  std::shared_ptr<Cache> MakeSharedCache() const;
+};
+
+// DEPRECATED - The old Clock Cache implementation had an unresolved bug and
+// has been removed. The new HyperClockCache requires an additional
+// configuration parameter that is not provided by this API. This function
+// simply returns a new LRUCache for functional compatibility.
 extern std::shared_ptr<Cache> NewClockCache(
    size_t capacity, int num_shard_bits = -1,
    bool strict_capacity_limit = false,
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@ -37,7 +37,6 @@
 #include <thread>
 #include <unordered_map>

-#include "cache/clock_cache.h"
 #include "cache/fast_lru_cache.h"
 #include "db/db_impl/db_impl.h"
 #include "db/malloc_stats.h"
@ -3057,15 +3056,13 @@ class Benchmark {
      return nullptr;
    }
    if (FLAGS_cache_type == "clock_cache") {
-      auto cache = ExperimentalNewClockCache(
-          static_cast<size_t>(capacity), FLAGS_block_size,
-          FLAGS_cache_numshardbits, false /*strict_capacity_limit*/,
-          kDefaultCacheMetadataChargePolicy);
-      if (!cache) {
-        fprintf(stderr, "Clock cache not supported.");
+      fprintf(stderr, "Old clock cache implementation has been removed.\n");
      exit(1);
-      }
-      return cache;
+    } else if (FLAGS_cache_type == "hyper_clock_cache") {
+      return HyperClockCacheOptions(static_cast<size_t>(capacity),
+                                    FLAGS_block_size /*estimated_entry_charge*/,
+                                    FLAGS_cache_numshardbits)
+          .MakeSharedCache();
    } else if (FLAGS_cache_type == "fast_lru_cache") {
      return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
                             FLAGS_cache_numshardbits,
--- a/tools/db_crashtest.py
+++ b/tools/db_crashtest.py
@ -116,7 +116,7 @@ default_params = {
    "use_direct_reads": lambda: random.randint(0, 1),
    "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
    "mock_direct_io": False,
-    "cache_type": lambda: random.choice(["lru_cache", "clock_cache"]),
+    "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
        # fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
    "use_full_merge_v1": lambda: random.randint(0, 1),
    "use_merge": lambda: random.randint(0, 1),