From bb87164db308589fdd3a46471bbbb3871962244b Mon Sep 17 00:00:00 2001
From: Peter Dillinger <peterd@fb.com>
Date: Tue, 3 May 2022 12:32:02 -0700
Subject: [PATCH] Fork and simplify LRUCache for developing enhancements
 (#9917)

Summary:
To support a project to prototype and evaluate algorithmic
enhancments and alternatives to LRUCache, here I have separated out
LRUCache into internal-only "FastLRUCache" and cut it down to
essentials, so that details like secondary cache handling and
priorities do not interfere with prototyping. These can be
re-integrated later as needed, along with refactoring to minimize code
duplication (which would slow down prototyping for now).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9917

Test Plan:
unit tests updated to ensure basic functionality has (likely)
been preserved

Reviewed By: anand1976

Differential Revision: D35995554

Pulled By: pdillinger

fbshipit-source-id: d67b20b7ada3b5d3bfe56d897a73885894a1d9db
---
 CMakeLists.txt            |   1 +
 TARGETS                   |   2 +
 cache/cache_test.cc       |  18 +-
 cache/fast_lru_cache.cc   | 511 ++++++++++++++++++++++++++++++++++++++
 cache/fast_lru_cache.h    | 299 ++++++++++++++++++++++
 cache/lru_cache.cc        |   3 +
 cache/lru_cache.h         |   7 +
 db/db_block_cache_test.cc |   7 +-
 src.mk                    |   1 +
 9 files changed, 844 insertions(+), 5 deletions(-)
 create mode 100644 cache/fast_lru_cache.cc
 create mode 100644 cache/fast_lru_cache.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 008304cfc..ac9f3a6ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -596,6 +596,7 @@ set(SOURCES
         cache/cache_reservation_manager.cc
         cache/clock_cache.cc
         cache/compressed_secondary_cache.cc
+        cache/fast_lru_cache.cc
         cache/lru_cache.cc
         cache/sharded_cache.cc
         db/arena_wrapped_db_iter.cc
diff --git a/TARGETS b/TARGETS
index 5b72ea483..0bc5ad0f5 100644
--- a/TARGETS
+++ b/TARGETS
@@ -15,6 +15,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
         "cache/cache_reservation_manager.cc",
         "cache/clock_cache.cc",
         "cache/compressed_secondary_cache.cc",
+        "cache/fast_lru_cache.cc",
         "cache/lru_cache.cc",
         "cache/sharded_cache.cc",
         "db/arena_wrapped_db_iter.cc",
@@ -335,6 +336,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
         "cache/cache_reservation_manager.cc",
         "cache/clock_cache.cc",
         "cache/compressed_secondary_cache.cc",
+        "cache/fast_lru_cache.cc",
         "cache/lru_cache.cc",
         "cache/sharded_cache.cc",
         "db/arena_wrapped_db_iter.cc",
diff --git a/cache/cache_test.cc b/cache/cache_test.cc
index 8562490ee..d7efb6652 100644
--- a/cache/cache_test.cc
+++ b/cache/cache_test.cc
@@ -14,7 +14,9 @@
 #include <iostream>
 #include <string>
 #include <vector>
+
 #include "cache/clock_cache.h"
+#include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "test_util/testharness.h"
 #include "util/coding.h"
@@ -39,6 +41,7 @@ static int DecodeValue(void* v) {
 
 const std::string kLRU = "lru";
 const std::string kClock = "clock";
+const std::string kFast = "fast";
 
 void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
 
@@ -83,6 +86,9 @@ class CacheTest : public testing::TestWithParam<std::string> {
     if (type == kClock) {
       return NewClockCache(capacity);
     }
+    if (type == kFast) {
+      return NewFastLRUCache(capacity);
+    }
     return nullptr;
   }
 
@@ -103,6 +109,10 @@ class CacheTest : public testing::TestWithParam<std::string> {
       return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
                            charge_policy);
     }
+    if (type == kFast) {
+      return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
+                             charge_policy);
+    }
     return nullptr;
   }
 
@@ -838,11 +848,13 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
 std::shared_ptr<Cache> (*new_clock_cache_func)(
     size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
-                        testing::Values(kLRU, kClock));
+                        testing::Values(kLRU, kClock, kFast));
 #else
-INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU));
+INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
+                        testing::Values(kLRU, kFast));
 #endif  // SUPPORT_CLOCK_CACHE
-INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU));
+INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
+                        testing::Values(kLRU, kFast));
 
 }  // namespace ROCKSDB_NAMESPACE
 
diff --git a/cache/fast_lru_cache.cc b/cache/fast_lru_cache.cc
new file mode 100644
index 000000000..10ae7367f
--- /dev/null
+++ b/cache/fast_lru_cache.cc
@@ -0,0 +1,511 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "cache/fast_lru_cache.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics.h"
+#include "port/lang.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace fast_lru_cache {
+
+LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
+    : length_bits_(/* historical starting size*/ 4),
+      list_(new LRUHandle* [size_t{1} << length_bits_] {}),
+      elems_(0),
+      max_length_bits_(max_upper_hash_bits) {}
+
+LRUHandleTable::~LRUHandleTable() {
+  ApplyToEntriesRange(
+      [](LRUHandle* h) {
+        if (!h->HasRefs()) {
+          h->Free();
+        }
+      },
+      0, uint32_t{1} << length_bits_);
+}
+
+LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
+  return *FindPointer(key, hash);
+}
+
+LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
+  LRUHandle** ptr = FindPointer(h->key(), h->hash);
+  LRUHandle* old = *ptr;
+  h->next_hash = (old == nullptr ? nullptr : old->next_hash);
+  *ptr = h;
+  if (old == nullptr) {
+    ++elems_;
+    if ((elems_ >> length_bits_) > 0) {  // elems_ >= length
+      // Since each cache entry is fairly large, we aim for a small
+      // average linked list length (<= 1).
+      Resize();
+    }
+  }
+  return old;
+}
+
+LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = FindPointer(key, hash);
+  LRUHandle* result = *ptr;
+  if (result != nullptr) {
+    *ptr = result->next_hash;
+    --elems_;
+  }
+  return result;
+}
+
+LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
+  while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
+    ptr = &(*ptr)->next_hash;
+  }
+  return ptr;
+}
+
+void LRUHandleTable::Resize() {
+  if (length_bits_ >= max_length_bits_) {
+    // Due to reaching limit of hash information, if we made the table bigger,
+    // we would allocate more addresses but only the same number would be used.
+    return;
+  }
+  if (length_bits_ >= 31) {
+    // Avoid undefined behavior shifting uint32_t by 32.
+    return;
+  }
+
+  uint32_t old_length = uint32_t{1} << length_bits_;
+  int new_length_bits = length_bits_ + 1;
+  std::unique_ptr<LRUHandle* []> new_list {
+    new LRUHandle* [size_t{1} << new_length_bits] {}
+  };
+  uint32_t count = 0;
+  for (uint32_t i = 0; i < old_length; i++) {
+    LRUHandle* h = list_[i];
+    while (h != nullptr) {
+      LRUHandle* next = h->next_hash;
+      uint32_t hash = h->hash;
+      LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
+      h->next_hash = *ptr;
+      *ptr = h;
+      h = next;
+      count++;
+    }
+  }
+  assert(elems_ == count);
+  list_ = std::move(new_list);
+  length_bits_ = new_length_bits;
+}
+
+LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                             CacheMetadataChargePolicy metadata_charge_policy,
+                             int max_upper_hash_bits)
+    : capacity_(0),
+      strict_capacity_limit_(strict_capacity_limit),
+      table_(max_upper_hash_bits),
+      usage_(0),
+      lru_usage_(0) {
+  set_metadata_charge_policy(metadata_charge_policy);
+  // Make empty circular linked list.
+  lru_.next = &lru_;
+  lru_.prev = &lru_;
+  lru_low_pri_ = &lru_;
+  SetCapacity(capacity);
+}
+
+void LRUCacheShard::EraseUnRefEntries() {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    MutexLock l(&mutex_);
+    while (lru_.next != &lru_) {
+      LRUHandle* old = lru_.next;
+      // LRU list contains only elements which can be evicted.
+      assert(old->InCache() && !old->HasRefs());
+      LRU_Remove(old);
+      table_.Remove(old->key(), old->hash);
+      old->SetInCache(false);
+      size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
+      last_reference_list.push_back(old);
+    }
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+}
+
+void LRUCacheShard::ApplyToSomeEntries(
+    const std::function<void(const Slice& key, void* value, size_t charge,
+                             DeleterFn deleter)>& callback,
+    uint32_t average_entries_per_lock, uint32_t* state) {
+  // The state is essentially going to be the starting hash, which works
+  // nicely even if we resize between calls because we use upper-most
+  // hash bits for table indexes.
+  MutexLock l(&mutex_);
+  uint32_t length_bits = table_.GetLengthBits();
+  uint32_t length = uint32_t{1} << length_bits;
+
+  assert(average_entries_per_lock > 0);
+  // Assuming we are called with same average_entries_per_lock repeatedly,
+  // this simplifies some logic (index_end will not overflow).
+  assert(average_entries_per_lock < length || *state == 0);
+
+  uint32_t index_begin = *state >> (32 - length_bits);
+  uint32_t index_end = index_begin + average_entries_per_lock;
+  if (index_end >= length) {
+    // Going to end
+    index_end = length;
+    *state = UINT32_MAX;
+  } else {
+    *state = index_end << (32 - length_bits);
+  }
+
+  table_.ApplyToEntriesRange(
+      [callback](LRUHandle* h) {
+        callback(h->key(), h->value, h->charge, h->deleter);
+      },
+      index_begin, index_end);
+}
+
+void LRUCacheShard::LRU_Remove(LRUHandle* e) {
+  assert(e->next != nullptr);
+  assert(e->prev != nullptr);
+  e->next->prev = e->prev;
+  e->prev->next = e->next;
+  e->prev = e->next = nullptr;
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+  assert(lru_usage_ >= total_charge);
+  lru_usage_ -= total_charge;
+}
+
+void LRUCacheShard::LRU_Insert(LRUHandle* e) {
+  assert(e->next == nullptr);
+  assert(e->prev == nullptr);
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+  // Inset "e" to head of LRU list.
+  e->next = &lru_;
+  e->prev = lru_.prev;
+  e->prev->next = e;
+  e->next->prev = e;
+  lru_usage_ += total_charge;
+}
+
+void LRUCacheShard::EvictFromLRU(size_t charge,
+                                 autovector<LRUHandle*>* deleted) {
+  while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
+    LRUHandle* old = lru_.next;
+    // LRU list contains only elements which can be evicted.
+    assert(old->InCache() && !old->HasRefs());
+    LRU_Remove(old);
+    table_.Remove(old->key(), old->hash);
+    old->SetInCache(false);
+    size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+    assert(usage_ >= old_total_charge);
+    usage_ -= old_total_charge;
+    deleted->push_back(old);
+  }
+}
+
+void LRUCacheShard::SetCapacity(size_t capacity) {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    MutexLock l(&mutex_);
+    capacity_ = capacity;
+    EvictFromLRU(0, &last_reference_list);
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+}
+
+void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
+  MutexLock l(&mutex_);
+  strict_capacity_limit_ = strict_capacity_limit;
+}
+
+Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
+                                 bool free_handle_on_fail) {
+  Status s = Status::OK();
+  autovector<LRUHandle*> last_reference_list;
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+
+  {
+    MutexLock l(&mutex_);
+
+    // Free the space following strict LRU policy until enough space
+    // is freed or the lru list is empty.
+    EvictFromLRU(total_charge, &last_reference_list);
+
+    if ((usage_ + total_charge) > capacity_ &&
+        (strict_capacity_limit_ || handle == nullptr)) {
+      e->SetInCache(false);
+      if (handle == nullptr) {
+        // Don't insert the entry but still return ok, as if the entry inserted
+        // into cache and get evicted immediately.
+        last_reference_list.push_back(e);
+      } else {
+        if (free_handle_on_fail) {
+          delete[] reinterpret_cast<char*>(e);
+          *handle = nullptr;
+        }
+        s = Status::Incomplete("Insert failed due to LRU cache being full.");
+      }
+    } else {
+      // Insert into the cache. Note that the cache might get larger than its
+      // capacity if not enough space was freed up.
+      LRUHandle* old = table_.Insert(e);
+      usage_ += total_charge;
+      if (old != nullptr) {
+        s = Status::OkOverwritten();
+        assert(old->InCache());
+        old->SetInCache(false);
+        if (!old->HasRefs()) {
+          // old is on LRU because it's in cache and its reference count is 0.
+          LRU_Remove(old);
+          size_t old_total_charge =
+              old->CalcTotalCharge(metadata_charge_policy_);
+          assert(usage_ >= old_total_charge);
+          usage_ -= old_total_charge;
+          last_reference_list.push_back(old);
+        }
+      }
+      if (handle == nullptr) {
+        LRU_Insert(e);
+      } else {
+        // If caller already holds a ref, no need to take one here.
+        if (!e->HasRefs()) {
+          e->Ref();
+        }
+        *handle = reinterpret_cast<Cache::Handle*>(e);
+      }
+    }
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+
+  return s;
+}
+
+Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
+  LRUHandle* e = nullptr;
+  {
+    MutexLock l(&mutex_);
+    e = table_.Lookup(key, hash);
+    if (e != nullptr) {
+      assert(e->InCache());
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
+        LRU_Remove(e);
+      }
+      e->Ref();
+    }
+  }
+  return reinterpret_cast<Cache::Handle*>(e);
+}
+
+bool LRUCacheShard::Ref(Cache::Handle* h) {
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
+  MutexLock l(&mutex_);
+  // To create another reference - entry must be already externally referenced.
+  assert(e->HasRefs());
+  e->Ref();
+  return true;
+}
+
+bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
+  if (handle == nullptr) {
+    return false;
+  }
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    last_reference = e->Unref();
+    if (last_reference && e->InCache()) {
+      // The item is still in cache, and nobody else holds a reference to it.
+      if (usage_ > capacity_ || erase_if_last_ref) {
+        // The LRU list must be empty since the cache is full.
+        assert(lru_.next == &lru_ || erase_if_last_ref);
+        // Take this opportunity and remove the item.
+        table_.Remove(e->key(), e->hash);
+        e->SetInCache(false);
+      } else {
+        // Put the item back on the LRU list, and don't free it.
+        LRU_Insert(e);
+        last_reference = false;
+      }
+    }
+    // If it was the last reference, then decrement the cache usage.
+    if (last_reference) {
+      size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  if (last_reference) {
+    e->Free();
+  }
+  return last_reference;
+}
+
+Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
+                             size_t charge, Cache::DeleterFn deleter,
+                             Cache::Handle** handle,
+                             Cache::Priority /*priority*/) {
+  // Allocate the memory here outside of the mutex.
+  // If the cache is full, we'll have to release it.
+  // It shouldn't happen very often though.
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(
+      new char[sizeof(LRUHandle) - 1 + key.size()]);
+
+  e->value = value;
+  e->flags = 0;
+  e->deleter = deleter;
+  e->charge = charge;
+  e->key_length = key.size();
+  e->hash = hash;
+  e->refs = 0;
+  e->next = e->prev = nullptr;
+  e->SetInCache(true);
+  memcpy(e->key_data, key.data(), key.size());
+
+  return InsertItem(e, handle, /* free_handle_on_fail */ true);
+}
+
+void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
+  LRUHandle* e;
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    e = table_.Remove(key, hash);
+    if (e != nullptr) {
+      assert(e->InCache());
+      e->SetInCache(false);
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
+        LRU_Remove(e);
+        size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+        assert(usage_ >= total_charge);
+        usage_ -= total_charge;
+        last_reference = true;
+      }
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  // last_reference will only be true if e != nullptr.
+  if (last_reference) {
+    e->Free();
+  }
+}
+
+size_t LRUCacheShard::GetUsage() const {
+  MutexLock l(&mutex_);
+  return usage_;
+}
+
+size_t LRUCacheShard::GetPinnedUsage() const {
+  MutexLock l(&mutex_);
+  assert(usage_ >= lru_usage_);
+  return usage_ - lru_usage_;
+}
+
+std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
+
+LRUCache::LRUCache(size_t capacity, int num_shard_bits,
+                   bool strict_capacity_limit,
+                   CacheMetadataChargePolicy metadata_charge_policy)
+    : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
+  num_shards_ = 1 << num_shard_bits;
+  shards_ = reinterpret_cast<LRUCacheShard*>(
+      port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
+  size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
+  for (int i = 0; i < num_shards_; i++) {
+    new (&shards_[i])
+        LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
+                      /* max_upper_hash_bits */ 32 - num_shard_bits);
+  }
+}
+
+LRUCache::~LRUCache() {
+  if (shards_ != nullptr) {
+    assert(num_shards_ > 0);
+    for (int i = 0; i < num_shards_; i++) {
+      shards_[i].~LRUCacheShard();
+    }
+    port::cacheline_aligned_free(shards_);
+  }
+}
+
+CacheShard* LRUCache::GetShard(uint32_t shard) {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}
+
+const CacheShard* LRUCache::GetShard(uint32_t shard) const {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}
+
+void* LRUCache::Value(Handle* handle) {
+  return reinterpret_cast<const LRUHandle*>(handle)->value;
+}
+
+size_t LRUCache::GetCharge(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->charge;
+}
+
+Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
+  auto h = reinterpret_cast<const LRUHandle*>(handle);
+  return h->deleter;
+}
+
+uint32_t LRUCache::GetHash(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->hash;
+}
+
+void LRUCache::DisownData() {
+  // Leak data only if that won't generate an ASAN/valgrind warning.
+  if (!kMustFreeHeapAllocations) {
+    shards_ = nullptr;
+    num_shards_ = 0;
+  }
+}
+
+}  // namespace fast_lru_cache
+
+std::shared_ptr<Cache> NewFastLRUCache(
+    size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+    CacheMetadataChargePolicy metadata_charge_policy) {
+  if (num_shard_bits >= 20) {
+    return nullptr;  // The cache cannot be sharded into too many fine pieces.
+  }
+  if (num_shard_bits < 0) {
+    num_shard_bits = GetDefaultCacheShardBits(capacity);
+  }
+  return std::make_shared<fast_lru_cache::LRUCache>(
+      capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/cache/fast_lru_cache.h b/cache/fast_lru_cache.h
new file mode 100644
index 000000000..a672afaf7
--- /dev/null
+++ b/cache/fast_lru_cache.h
@@ -0,0 +1,299 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "cache/sharded_cache.h"
+#include "port/lang.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/secondary_cache.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace fast_lru_cache {
+
+// An experimental (under development!) alternative to LRUCache
+
+struct LRUHandle {
+  void* value;
+  Cache::DeleterFn deleter;
+  LRUHandle* next_hash;
+  LRUHandle* next;
+  LRUHandle* prev;
+  size_t charge;  // TODO(opt): Only allow uint32_t?
+  size_t key_length;
+  // The hash of key(). Used for fast sharding and comparisons.
+  uint32_t hash;
+  // The number of external refs to this entry. The cache itself is not counted.
+  uint32_t refs;
+
+  enum Flags : uint8_t {
+    // Whether this entry is referenced by the hash table.
+    IN_CACHE = (1 << 0),
+  };
+  uint8_t flags;
+
+  // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
+  char key_data[1];
+
+  Slice key() const { return Slice(key_data, key_length); }
+
+  // Increase the reference count by 1.
+  void Ref() { refs++; }
+
+  // Just reduce the reference count by 1. Return true if it was last reference.
+  bool Unref() {
+    assert(refs > 0);
+    refs--;
+    return refs == 0;
+  }
+
+  // Return true if there are external refs, false otherwise.
+  bool HasRefs() const { return refs > 0; }
+
+  bool InCache() const { return flags & IN_CACHE; }
+
+  void SetInCache(bool in_cache) {
+    if (in_cache) {
+      flags |= IN_CACHE;
+    } else {
+      flags &= ~IN_CACHE;
+    }
+  }
+
+  void Free() {
+    assert(refs == 0);
+    if (deleter) {
+      (*deleter)(key(), value);
+    }
+    delete[] reinterpret_cast<char*>(this);
+  }
+
+  // Calculate the memory usage by metadata.
+  inline size_t CalcTotalCharge(
+      CacheMetadataChargePolicy metadata_charge_policy) {
+    size_t meta_charge = 0;
+    if (metadata_charge_policy == kFullChargeCacheMetadata) {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      meta_charge += malloc_usable_size(static_cast<void*>(this));
+#else
+      // This is the size that is used when a new handle is created.
+      meta_charge += sizeof(LRUHandle) - 1 + key_length;
+#endif
+    }
+    return charge + meta_charge;
+  }
+};
+
+// We provide our own simple hash table since it removes a whole bunch
+// of porting hacks and is also faster than some of the built-in hash
+// table implementations in some of the compiler/runtime combinations
+// we have tested.  E.g., readrandom speeds up by ~5% over the g++
+// 4.4.3's builtin hashtable.
+class LRUHandleTable {
+ public:
+  // If the table uses more hash bits than `max_upper_hash_bits`,
+  // it will eat into the bits used for sharding, which are constant
+  // for a given LRUHandleTable.
+  explicit LRUHandleTable(int max_upper_hash_bits);
+  ~LRUHandleTable();
+
+  LRUHandle* Lookup(const Slice& key, uint32_t hash);
+  LRUHandle* Insert(LRUHandle* h);
+  LRUHandle* Remove(const Slice& key, uint32_t hash);
+
+  template <typename T>
+  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
+    for (uint32_t i = index_begin; i < index_end; i++) {
+      LRUHandle* h = list_[i];
+      while (h != nullptr) {
+        auto n = h->next_hash;
+        assert(h->InCache());
+        func(h);
+        h = n;
+      }
+    }
+  }
+
+  int GetLengthBits() const { return length_bits_; }
+
+ private:
+  // Return a pointer to slot that points to a cache entry that
+  // matches key/hash.  If there is no such cache entry, return a
+  // pointer to the trailing slot in the corresponding linked list.
+  LRUHandle** FindPointer(const Slice& key, uint32_t hash);
+
+  void Resize();
+
+  // Number of hash bits (upper because lower bits used for sharding)
+  // used for table index. Length == 1 << length_bits_
+  int length_bits_;
+
+  // The table consists of an array of buckets where each bucket is
+  // a linked list of cache entries that hash into the bucket.
+  std::unique_ptr<LRUHandle*[]> list_;
+
+  // Number of elements currently in the table.
+  uint32_t elems_;
+
+  // Set from max_upper_hash_bits (see constructor).
+  const int max_length_bits_;
+};
+
+// A single shard of sharded cache.
+class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
+ public:
+  LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                CacheMetadataChargePolicy metadata_charge_policy,
+                int max_upper_hash_bits);
+  ~LRUCacheShard() override = default;
+
+  // Separate from constructor so caller can easily make an array of LRUCache
+  // if current usage is more than new capacity, the function will attempt to
+  // free the needed space.
+  void SetCapacity(size_t capacity) override;
+
+  // Set the flag to reject insertion if cache if full.
+  void SetStrictCapacityLimit(bool strict_capacity_limit) override;
+
+  // Like Cache methods, but with an extra "hash" parameter.
+  Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
+                Cache::DeleterFn deleter, Cache::Handle** handle,
+                Cache::Priority priority) override;
+
+  Status Insert(const Slice& key, uint32_t hash, void* value,
+                const Cache::CacheItemHelper* helper, size_t charge,
+                Cache::Handle** handle, Cache::Priority priority) override {
+    return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
+  }
+
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash,
+                        const Cache::CacheItemHelper* /*helper*/,
+                        const Cache::CreateCallback& /*create_cb*/,
+                        Cache::Priority /*priority*/, bool /*wait*/,
+                        Statistics* /*stats*/) override {
+    return Lookup(key, hash);
+  }
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
+
+  bool Release(Cache::Handle* handle, bool /*useful*/,
+               bool erase_if_last_ref) override {
+    return Release(handle, erase_if_last_ref);
+  }
+  bool IsReady(Cache::Handle* /*handle*/) override { return true; }
+  void Wait(Cache::Handle* /*handle*/) override {}
+
+  bool Ref(Cache::Handle* handle) override;
+  bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
+  void Erase(const Slice& key, uint32_t hash) override;
+
+  size_t GetUsage() const override;
+  size_t GetPinnedUsage() const override;
+
+  void ApplyToSomeEntries(
+      const std::function<void(const Slice& key, void* value, size_t charge,
+                               DeleterFn deleter)>& callback,
+      uint32_t average_entries_per_lock, uint32_t* state) override;
+
+  void EraseUnRefEntries() override;
+
+  std::string GetPrintableOptions() const override;
+
+ private:
+  friend class LRUCache;
+  // Insert an item into the hash table and, if handle is null, insert into
+  // the LRU list. Older items are evicted as necessary. If the cache is full
+  // and free_handle_on_fail is true, the item is deleted and handle is set to
+  // nullptr.
+  Status InsertItem(LRUHandle* item, Cache::Handle** handle,
+                    bool free_handle_on_fail);
+
+  void LRU_Remove(LRUHandle* e);
+  void LRU_Insert(LRUHandle* e);
+
+  // Free some space following strict LRU policy until enough space
+  // to hold (usage_ + charge) is freed or the lru list is empty
+  // This function is not thread safe - it needs to be executed while
+  // holding the mutex_.
+  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
+
+  // Initialized before use.
+  size_t capacity_;
+
+  // Whether to reject insertion if cache reaches its full capacity.
+  bool strict_capacity_limit_;
+
+  // Dummy head of LRU list.
+  // lru.prev is newest entry, lru.next is oldest entry.
+  // LRU contains items which can be evicted, ie reference only by cache
+  LRUHandle lru_;
+
+  // Pointer to head of low-pri pool in LRU list.
+  LRUHandle* lru_low_pri_;
+
+  // ------------^^^^^^^^^^^^^-----------
+  // Not frequently modified data members
+  // ------------------------------------
+  //
+  // We separate data members that are updated frequently from the ones that
+  // are not frequently updated so that they don't share the same cache line
+  // which will lead into false cache sharing
+  //
+  // ------------------------------------
+  // Frequently modified data members
+  // ------------vvvvvvvvvvvvv-----------
+  LRUHandleTable table_;
+
+  // Memory size for entries residing in the cache.
+  size_t usage_;
+
+  // Memory size for entries residing only in the LRU list.
+  size_t lru_usage_;
+
+  // mutex_ protects the following state.
+  // We don't count mutex_ as the cache's internal state so semantically we
+  // don't mind mutex_ invoking the non-const actions.
+  mutable port::Mutex mutex_;
+};
+
+class LRUCache
+#ifdef NDEBUG
+    final
+#endif
+    : public ShardedCache {
+ public:
+  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+           CacheMetadataChargePolicy metadata_charge_policy =
+               kDontChargeCacheMetadata);
+  ~LRUCache() override;
+  const char* Name() const override { return "LRUCache"; }
+  CacheShard* GetShard(uint32_t shard) override;
+  const CacheShard* GetShard(uint32_t shard) const override;
+  void* Value(Handle* handle) override;
+  size_t GetCharge(Handle* handle) const override;
+  uint32_t GetHash(Handle* handle) const override;
+  DeleterFn GetDeleter(Handle* handle) const override;
+  void DisownData() override;
+
+ private:
+  LRUCacheShard* shards_ = nullptr;
+  int num_shards_ = 0;
+};
+}  // namespace fast_lru_cache
+
+std::shared_ptr<Cache> NewFastLRUCache(
+    size_t capacity, int num_shard_bits = -1,
+    bool strict_capacity_limit = false,
+    CacheMetadataChargePolicy metadata_charge_policy =
+        kDefaultCacheMetadataChargePolicy);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc
index 5765ffb50..a28c2b515 100644
--- a/cache/lru_cache.cc
+++ b/cache/lru_cache.cc
@@ -19,6 +19,7 @@
 #include "util/mutexlock.h"
 
 namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
 
 LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
     : length_bits_(/* historical starting size*/ 4),
@@ -759,6 +760,8 @@ void LRUCache::WaitAll(std::vector<Handle*>& handles) {
   }
 }
 
+}  // namespace lru_cache
+
 std::shared_ptr<Cache> NewLRUCache(
     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
     double high_pri_pool_ratio,
diff --git a/cache/lru_cache.h b/cache/lru_cache.h
index 0dd83d890..2da78eb67 100644
--- a/cache/lru_cache.h
+++ b/cache/lru_cache.h
@@ -19,6 +19,7 @@
 #include "util/autovector.h"
 
 namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
 
 // LRU cache implementation. This class is not thread-safe.
 
@@ -479,4 +480,10 @@ class LRUCache
   std::shared_ptr<SecondaryCache> secondary_cache_;
 };
 
+}  // namespace lru_cache
+
+using LRUCache = lru_cache::LRUCache;
+using LRUHandle = lru_cache::LRUHandle;
+using LRUCacheShard = lru_cache::LRUCacheShard;
+
 }  // namespace ROCKSDB_NAMESPACE
diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc
index b6afb8003..518105af2 100644
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -13,6 +13,7 @@
 
 #include "cache/cache_entry_roles.h"
 #include "cache/cache_key.h"
+#include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "db/column_family.h"
 #include "db/db_impl/db_impl.h"
@@ -934,7 +935,8 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) {
   int iterations_tested = 0;
   for (std::shared_ptr<Cache> base_cache :
        {NewLRUCache(capacity, num_shard_bits),
-        NewClockCache(capacity, num_shard_bits)}) {
+        NewClockCache(capacity, num_shard_bits),
+        NewFastLRUCache(capacity, num_shard_bits)}) {
     if (!base_cache) {
       // Skip clock cache when not supported
       continue;
@@ -1288,7 +1290,8 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
   int iterations_tested = 0;
   for (bool partition : {false, true}) {
     for (std::shared_ptr<Cache> cache :
-         {NewLRUCache(capacity), NewClockCache(capacity)}) {
+         {NewLRUCache(capacity), NewClockCache(capacity),
+          NewFastLRUCache(capacity)}) {
       if (!cache) {
         // Skip clock cache when not supported
         continue;
diff --git a/src.mk b/src.mk
index 6e39f00e6..747d18d2b 100644
--- a/src.mk
+++ b/src.mk
@@ -5,6 +5,7 @@ LIB_SOURCES =                                                   \
   cache/cache_key.cc                                            \
   cache/cache_reservation_manager.cc                            \
   cache/clock_cache.cc                                          \
+  cache/fast_lru_cache.cc                                       \
   cache/lru_cache.cc                                            \
   cache/compressed_secondary_cache.cc                           \
   cache/sharded_cache.cc                                        \