diff --git a/CMakeLists.txt b/CMakeLists.txt
index 082b13c22..c0c1bf447 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -193,7 +193,6 @@ set(SOURCES
         util/arena.cc
         util/bloom.cc
         util/build_version.cc
-        util/cache.cc
         util/coding.cc
         util/compaction_job_stats_impl.cc
         util/comparator.cc
@@ -214,6 +213,7 @@ set(SOURCES
         util/histogram_windowing.cc
         util/instrumented_mutex.cc
         util/iostats_context.cc
+        util/lru_cache.cc
         tools/ldb_cmd.cc
         tools/ldb_tool.cc
         util/logging.cc
@@ -229,6 +229,7 @@ set(SOURCES
         util/perf_level.cc
         util/random.cc
         util/rate_limiter.cc
+        util/sharded_cache.cc
         util/slice.cc
         util/statistics.cc
         util/status.cc
diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h
index 6a2d43313..53cb6c60f 100644
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@@ -34,13 +34,9 @@ class Cache;
 // Create a new cache with a fixed size capacity. The cache is sharded
 // to 2^num_shard_bits shards, by hash of the key. The total capacity
 // is divided and evenly assigned to each shard.
-//
-// The parameter num_shard_bits defaults to 4, and strict_capacity_limit
-// defaults to false.
-extern std::shared_ptr<Cache> NewLRUCache(size_t capacity);
-extern std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits);
-extern std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
-                                          bool strict_capacity_limit);
+extern std::shared_ptr<Cache> NewLRUCache(size_t capacity,
+                                          int num_shard_bits = 6,
+                                          bool strict_capacity_limit = false);
 
 class Cache {
  public:
@@ -112,8 +108,8 @@ class Cache {
   // capacity.
   virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
 
-  // Set whether to return error on insertion when cache reaches its full
-  // capacity.
+  // Get the flag whether to return error on insertion when cache reaches its
+  // full capacity.
   virtual bool HasStrictCapacityLimit() const = 0;
 
   // returns the maximum configured capacity of the cache
@@ -148,10 +144,6 @@ class Cache {
   virtual void EraseUnRefEntries() = 0;
 
  private:
-  void LRU_Remove(Handle* e);
-  void LRU_Append(Handle* e);
-  void Unref(Handle* e);
-
   // No copying allowed
   Cache(const Cache&);
   Cache& operator=(const Cache&);
diff --git a/include/rocksdb/utilities/sim_cache.h b/include/rocksdb/utilities/sim_cache.h
index 7bd6363e1..cc8a01bec 100644
--- a/include/rocksdb/utilities/sim_cache.h
+++ b/include/rocksdb/utilities/sim_cache.h
@@ -11,7 +11,6 @@
 #include "rocksdb/cache.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/status.h"
-#include "util/lru_cache_handle.h"
 
 namespace rocksdb {
 
diff --git a/src.mk b/src.mk
index 4a320f28a..aad71d3d8 100644
--- a/src.mk
+++ b/src.mk
@@ -88,7 +88,6 @@ LIB_SOURCES =                                                   \
   util/arena.cc                                                 \
   util/bloom.cc                                                 \
   util/build_version.cc                                         \
-  util/cache.cc                                                 \
   util/coding.cc                                                \
   util/comparator.cc                                            \
   util/compaction_job_stats_impl.cc                             \
@@ -100,18 +99,20 @@ LIB_SOURCES =                                                   \
   util/env_chroot.cc                                            \
   util/env_hdfs.cc                                              \
   util/env_posix.cc                                             \
-  util/io_posix.cc                                              \
-  util/threadpool.cc                                            \
-  util/transaction_test_util.cc                                 \
-  util/sst_file_manager_impl.cc                                 \
   util/file_util.cc                                             \
   util/file_reader_writer.cc                                    \
   util/filter_policy.cc                                         \
   util/hash.cc                                                  \
   util/histogram.cc                                             \
-  util/histogram_windowing.cc                                    \
+  util/histogram_windowing.cc                                   \
   util/instrumented_mutex.cc                                    \
   util/iostats_context.cc                                       \
+  util/io_posix.cc                                              \
+  util/lru_cache.cc                                             \
+  util/threadpool.cc                                            \
+  util/transaction_test_util.cc                                 \
+	util/sharded_cache.cc       																	\
+  util/sst_file_manager_impl.cc                                 \
   utilities/backupable/backupable_db.cc                         \
   utilities/convenience/info_log_finder.cc                      \
   utilities/checkpoint/checkpoint.cc                            \
diff --git a/util/hash.h b/util/hash.h
index 5c90e63ec..d0fe35c43 100644
--- a/util/hash.h
+++ b/util/hash.h
@@ -13,6 +13,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "rocksdb/slice.h"
+
 namespace rocksdb {
 
 extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
diff --git a/util/cache.cc b/util/lru_cache.cc
similarity index 67%
rename from util/cache.cc
rename to util/lru_cache.cc
index ff2015c73..cdc1c3f27 100644
--- a/util/cache.cc
+++ b/util/lru_cache.cc
@@ -12,11 +12,9 @@
 #include <stdlib.h>
 
 #include "port/port.h"
-#include "rocksdb/cache.h"
 #include "util/autovector.h"
-#include "util/hash.h"
-#include "util/lru_cache_handle.h"
 #include "util/mutexlock.h"
+#include "util/sharded_cache.h"
 
 namespace rocksdb {
 
@@ -24,6 +22,60 @@ namespace {
 
 // LRU cache implementation
 
+// An entry is a variable length heap-allocated structure.
+// Entries are referenced by cache and/or by any external entity.
+// The cache keeps all its entries in table. Some elements
+// are also stored on LRU list.
+//
+// LRUHandle can be in these states:
+// 1. Referenced externally AND in hash table.
+//  In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
+// 2. Not referenced externally and in hash table. In that case the entry is
+// in the LRU and can be freed. (refs == 1 && in_cache == true)
+// 3. Referenced externally and not in hash table. In that case the entry is
+// in not on LRU and not in table. (refs >= 1 && in_cache == false)
+//
+// All newly created LRUHandles are in state 1. If you call
+// LRUCacheShard::Release
+// on entry in state 1, it will go into state 2. To move from state 1 to
+// state 3, either call LRUCacheShard::Erase or LRUCacheShard::Insert with the
+// same key.
+// To move from state 2 to state 1, use LRUCacheShard::Lookup.
+// Before destruction, make sure that no handles are in state 1. This means
+// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
+// matching
+// RUCache::Release (to move into state 2) or LRUCacheShard::Erase (for state 3)
+struct LRUHandle {
+  void* value;
+  void (*deleter)(const Slice&, void* value);
+  LRUHandle* next_hash;
+  LRUHandle* next;
+  LRUHandle* prev;
+  size_t charge;  // TODO(opt): Only allow uint32_t?
+  size_t key_length;
+  uint32_t refs;     // a number of refs to this entry
+                     // cache itself is counted as 1
+  bool in_cache;     // true, if this entry is referenced by the hash table
+  uint32_t hash;     // Hash of key(); used for fast sharding and comparisons
+  char key_data[1];  // Beginning of key
+
+  Slice key() const {
+    // For cheaper lookups, we allow a temporary Handle object
+    // to store a pointer to a key in "value".
+    if (next == this) {
+      return *(reinterpret_cast<Slice*>(value));
+    } else {
+      return Slice(key_data, key_length);
+    }
+  }
+
+  void Free() {
+    assert((refs == 1 && in_cache) || (refs == 0 && !in_cache));
+    (*deleter)(key(), value);
+    delete[] reinterpret_cast<char*>(this);
+  }
+};
+
 // We provide our own simple hash table since it removes a whole bunch
 // of porting hacks and is also faster than some of the built-in hash
 // table implementations in some of the compiler/runtime combinations
@@ -131,46 +183,47 @@ class HandleTable {
 };
 
 // A single shard of sharded cache.
-class LRUCache {
+class LRUCacheShard : public CacheShard {
  public:
-  LRUCache();
-  ~LRUCache();
+  LRUCacheShard();
+  virtual ~LRUCacheShard();
 
   // Separate from constructor so caller can easily make an array of LRUCache
   // if current usage is more than new capacity, the function will attempt to
   // free the needed space
-  void SetCapacity(size_t capacity);
+  virtual void SetCapacity(size_t capacity) override;
 
   // Set the flag to reject insertion if cache if full.
-  void SetStrictCapacityLimit(bool strict_capacity_limit);
+  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
 
   // Like Cache methods, but with an extra "hash" parameter.
-  Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
-                void (*deleter)(const Slice& key, void* value),
-                Cache::Handle** handle);
-  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
-  void Release(Cache::Handle* handle);
-  void Erase(const Slice& key, uint32_t hash);
+  virtual Status Insert(const Slice& key, uint32_t hash, void* value,
+                        size_t charge,
+                        void (*deleter)(const Slice& key, void* value),
+                        Cache::Handle** handle) override;
+  virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
+  virtual void Release(Cache::Handle* handle) override;
+  virtual void Erase(const Slice& key, uint32_t hash) override;
 
   // Although in some platforms the update of size_t is atomic, to make sure
   // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
   // protect them with mutex_.
 
-  size_t GetUsage() const {
+  virtual size_t GetUsage() const override {
     MutexLock l(&mutex_);
     return usage_;
   }
 
-  size_t GetPinnedUsage() const {
+  virtual size_t GetPinnedUsage() const override {
     MutexLock l(&mutex_);
     assert(usage_ >= lru_usage_);
     return usage_ - lru_usage_;
   }
 
-  void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
-                              bool thread_safe);
+  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) override;
 
-  void EraseUnRefEntries();
+  virtual void EraseUnRefEntries() override;
 
  private:
   void LRU_Remove(LRUHandle* e);
@@ -210,15 +263,15 @@ class LRUCache {
   HandleTable table_;
 };
 
-LRUCache::LRUCache() : usage_(0), lru_usage_(0) {
+LRUCacheShard::LRUCacheShard() : usage_(0), lru_usage_(0) {
   // Make empty circular linked list
   lru_.next = &lru_;
   lru_.prev = &lru_;
 }
 
-LRUCache::~LRUCache() {}
+LRUCacheShard::~LRUCacheShard() {}
 
-bool LRUCache::Unref(LRUHandle* e) {
+bool LRUCacheShard::Unref(LRUHandle* e) {
   assert(e->refs > 0);
   e->refs--;
   return e->refs == 0;
@@ -226,7 +279,7 @@ bool LRUCache::Unref(LRUHandle* e) {
 
 // Call deleter and free
 
-void LRUCache::EraseUnRefEntries() {
+void LRUCacheShard::EraseUnRefEntries() {
   autovector<LRUHandle*> last_reference_list;
   {
     MutexLock l(&mutex_);
@@ -249,8 +302,8 @@ void LRUCache::EraseUnRefEntries() {
   }
 }
 
-void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
-                                      bool thread_safe) {
+void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                           bool thread_safe) {
   if (thread_safe) {
     mutex_.Lock();
   }
@@ -261,7 +314,7 @@ void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
   }
 }
 
-void LRUCache::LRU_Remove(LRUHandle* e) {
+void LRUCacheShard::LRU_Remove(LRUHandle* e) {
   assert(e->next != nullptr);
   assert(e->prev != nullptr);
   e->next->prev = e->prev;
@@ -270,7 +323,7 @@ void LRUCache::LRU_Remove(LRUHandle* e) {
   lru_usage_ -= e->charge;
 }
 
-void LRUCache::LRU_Append(LRUHandle* e) {
+void LRUCacheShard::LRU_Append(LRUHandle* e) {
   // Make "e" newest entry by inserting just before lru_
   assert(e->next == nullptr);
   assert(e->prev == nullptr);
@@ -281,7 +334,8 @@ void LRUCache::LRU_Append(LRUHandle* e) {
   lru_usage_ += e->charge;
 }
 
-void LRUCache::EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted) {
+void LRUCacheShard::EvictFromLRU(size_t charge,
+                                 autovector<LRUHandle*>* deleted) {
   while (usage_ + charge > capacity_ && lru_.next != &lru_) {
     LRUHandle* old = lru_.next;
     assert(old->in_cache);
@@ -295,7 +349,7 @@ void LRUCache::EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted) {
   }
 }
 
-void LRUCache::SetCapacity(size_t capacity) {
+void LRUCacheShard::SetCapacity(size_t capacity) {
   autovector<LRUHandle*> last_reference_list;
   {
     MutexLock l(&mutex_);
@@ -309,12 +363,12 @@ void LRUCache::SetCapacity(size_t capacity) {
   }
 }
 
-void LRUCache::SetStrictCapacityLimit(bool strict_capacity_limit) {
+void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
   MutexLock l(&mutex_);
   strict_capacity_limit_ = strict_capacity_limit;
 }
 
-Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
+Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
   MutexLock l(&mutex_);
   LRUHandle* e = table_.Lookup(key, hash);
   if (e != nullptr) {
@@ -327,7 +381,7 @@ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
   return reinterpret_cast<Cache::Handle*>(e);
 }
 
-void LRUCache::Release(Cache::Handle* handle) {
+void LRUCacheShard::Release(Cache::Handle* handle) {
   if (handle == nullptr) {
     return;
   }
@@ -364,10 +418,10 @@ void LRUCache::Release(Cache::Handle* handle) {
   }
 }
 
-Status LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
-                        size_t charge,
-                        void (*deleter)(const Slice& key, void* value),
-                        Cache::Handle** handle) {
+Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
+                             size_t charge,
+                             void (*deleter)(const Slice& key, void* value),
+                             Cache::Handle** handle) {
   // Allocate the memory here outside of the mutex
   // If the cache is full, we'll have to release it
   // It shouldn't happen very often though.
@@ -437,7 +491,7 @@ Status LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
   return s;
 }
 
-void LRUCache::Erase(const Slice& key, uint32_t hash) {
+void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
   LRUHandle* e;
   bool last_reference = false;
   {
@@ -462,150 +516,53 @@ void LRUCache::Erase(const Slice& key, uint32_t hash) {
   }
 }
 
-static int kNumShardBits = 6;  // default values, can be overridden
-
-class ShardedLRUCache : public Cache {
- private:
-  LRUCache* shards_;
-  port::Mutex id_mutex_;
-  port::Mutex capacity_mutex_;
-  uint64_t last_id_;
-  int num_shard_bits_;
-  size_t capacity_;
-  bool strict_capacity_limit_;
-
-  static inline uint32_t HashSlice(const Slice& s) {
-    return Hash(s.data(), s.size(), 0);
+class LRUCache : public ShardedCache {
+ public:
+  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit)
+      : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
+    int num_shards = 1 << num_shard_bits;
+    shards_ = new LRUCacheShard[num_shards];
+    SetCapacity(capacity);
+    SetStrictCapacityLimit(strict_capacity_limit);
   }
 
-  uint32_t Shard(uint32_t hash) {
-    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
-    return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
-  }
+  virtual ~LRUCache() { delete[] shards_; }
 
- public:
-  ShardedLRUCache(size_t capacity, int num_shard_bits,
-                  bool strict_capacity_limit)
-      : last_id_(0),
-        num_shard_bits_(num_shard_bits),
-        capacity_(capacity),
-        strict_capacity_limit_(strict_capacity_limit) {
-    int num_shards = 1 << num_shard_bits_;
-    shards_ = new LRUCache[num_shards];
-    const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
-    for (int s = 0; s < num_shards; s++) {
-      shards_[s].SetCapacity(per_shard);
-      shards_[s].SetStrictCapacityLimit(strict_capacity_limit);
-    }
-  }
-  virtual ~ShardedLRUCache() { delete[] shards_; }
-  virtual void SetCapacity(size_t capacity) override {
-    int num_shards = 1 << num_shard_bits_;
-    const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
-    MutexLock l(&capacity_mutex_);
-    for (int s = 0; s < num_shards; s++) {
-      shards_[s].SetCapacity(per_shard);
-    }
-    capacity_ = capacity;
-  }
-  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override {
-    int num_shards = 1 << num_shard_bits_;
-    for (int s = 0; s < num_shards; s++) {
-      shards_[s].SetStrictCapacityLimit(strict_capacity_limit);
-    }
-    strict_capacity_limit_ = strict_capacity_limit;
-  }
-  virtual Status Insert(const Slice& key, void* value, size_t charge,
-                        void (*deleter)(const Slice& key, void* value),
-                        Handle** handle) override {
-    const uint32_t hash = HashSlice(key);
-    return shards_[Shard(hash)].Insert(key, hash, value, charge, deleter,
-                                       handle);
-  }
-  virtual Handle* Lookup(const Slice& key) override {
-    const uint32_t hash = HashSlice(key);
-    return shards_[Shard(hash)].Lookup(key, hash);
-  }
-  virtual void Release(Handle* handle) override {
-    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
-    shards_[Shard(h->hash)].Release(handle);
+  virtual CacheShard* GetShard(int shard) override {
+    return reinterpret_cast<CacheShard*>(&shards_[shard]);
   }
-  virtual void Erase(const Slice& key) override {
-    const uint32_t hash = HashSlice(key);
-    shards_[Shard(hash)].Erase(key, hash);
-  }
-  virtual void* Value(Handle* handle) override {
-    return reinterpret_cast<LRUHandle*>(handle)->value;
-  }
-  virtual uint64_t NewId() override {
-    MutexLock l(&id_mutex_);
-    return ++(last_id_);
-  }
-  virtual size_t GetCapacity() const override { return capacity_; }
 
-  virtual bool HasStrictCapacityLimit() const override {
-    return strict_capacity_limit_;
+  virtual const CacheShard* GetShard(int shard) const override {
+    return reinterpret_cast<CacheShard*>(&shards_[shard]);
   }
 
-  virtual size_t GetUsage() const override {
-    // We will not lock the cache when getting the usage from shards.
-    int num_shards = 1 << num_shard_bits_;
-    size_t usage = 0;
-    for (int s = 0; s < num_shards; s++) {
-      usage += shards_[s].GetUsage();
-    }
-    return usage;
+  virtual void* Value(Handle* handle) override {
+    return reinterpret_cast<const LRUHandle*>(handle)->value;
   }
 
-  virtual size_t GetUsage(Handle* handle) const override {
-    return reinterpret_cast<LRUHandle*>(handle)->charge;
+  virtual size_t GetCharge(Handle* handle) const override {
+    return reinterpret_cast<const LRUHandle*>(handle)->charge;
   }
 
-  virtual size_t GetPinnedUsage() const override {
-    // We will not lock the cache when getting the usage from shards.
-    int num_shards = 1 << num_shard_bits_;
-    size_t usage = 0;
-    for (int s = 0; s < num_shards; s++) {
-      usage += shards_[s].GetPinnedUsage();
-    }
-    return usage;
+  virtual uint32_t GetHash(Handle* handle) const override {
+    return reinterpret_cast<const LRUHandle*>(handle)->hash;
   }
 
   virtual void DisownData() override { shards_ = nullptr; }
 
-  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
-                                      bool thread_safe) override {
-    int num_shards = 1 << num_shard_bits_;
-    for (int s = 0; s < num_shards; s++) {
-      shards_[s].ApplyToAllCacheEntries(callback, thread_safe);
-    }
-  }
-
-  virtual void EraseUnRefEntries() override {
-    int num_shards = 1 << num_shard_bits_;
-    for (int s = 0; s < num_shards; s++) {
-      shards_[s].EraseUnRefEntries();
-    }
-  }
+ private:
+  LRUCacheShard* shards_;
 };
 
 }  // end anonymous namespace
 
-std::shared_ptr<Cache> NewLRUCache(size_t capacity) {
-  return NewLRUCache(capacity, kNumShardBits, false);
-}
-
-std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits) {
-  return NewLRUCache(capacity, num_shard_bits, false);
-}
-
 std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
                                    bool strict_capacity_limit) {
   if (num_shard_bits >= 20) {
     return nullptr;  // the cache cannot be sharded into too many fine pieces
   }
-  return std::make_shared<ShardedLRUCache>(capacity, num_shard_bits,
-                                           strict_capacity_limit);
+  return std::make_shared<LRUCache>(capacity, num_shard_bits,
+                                    strict_capacity_limit);
 }
 
 }  // namespace rocksdb
diff --git a/util/lru_cache_handle.h b/util/lru_cache_handle.h
deleted file mode 100644
index 1cd755a62..000000000
--- a/util/lru_cache_handle.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under the BSD-style license found in the
-//  LICENSE file in the root directory of this source tree. An additional grant
-//  of patent rights can be found in the PATENTS file in the same directory.
-//
-// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file. See the AUTHORS file for names of contributors.
-
-#pragma once
-
-#include <cassert>
-#include <cstdint>
-
-#include "port/port.h"
-#include "rocksdb/slice.h"
-
-namespace rocksdb {
-// An entry is a variable length heap-allocated structure.
-// Entries are referenced by cache and/or by any external entity.
-// The cache keeps all its entries in table. Some elements
-// are also stored on LRU list.
-//
-// LRUHandle can be in these states:
-// 1. Referenced externally AND in hash table.
-//  In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
-// 2. Not referenced externally and in hash table. In that case the entry is
-// in the LRU and can be freed. (refs == 1 && in_cache == true)
-// 3. Referenced externally and not in hash table. In that case the entry is
-// in not on LRU and not in table. (refs >= 1 && in_cache == false)
-//
-// All newly created LRUHandles are in state 1. If you call LRUCache::Release
-// on entry in state 1, it will go into state 2. To move from state 1 to
-// state 3, either call LRUCache::Erase or LRUCache::Insert with the same key.
-// To move from state 2 to state 1, use LRUCache::Lookup.
-// Before destruction, make sure that no handles are in state 1. This means
-// that any successful LRUCache::Lookup/LRUCache::Insert have a matching
-// RUCache::Release (to move into state 2) or LRUCache::Erase (for state 3)
-
-struct LRUHandle {
-  void* value;
-  void (*deleter)(const Slice&, void* value);
-  LRUHandle* next_hash;
-  LRUHandle* next;
-  LRUHandle* prev;
-  size_t charge;  // TODO(opt): Only allow uint32_t?
-  size_t key_length;
-  uint32_t refs;     // a number of refs to this entry
-                     // cache itself is counted as 1
-  bool in_cache;     // true, if this entry is referenced by the hash table
-  uint32_t hash;     // Hash of key(); used for fast sharding and comparisons
-  char key_data[1];  // Beginning of key
-
-  Slice key() const {
-    // For cheaper lookups, we allow a temporary Handle object
-    // to store a pointer to a key in "value".
-    if (next == this) {
-      return *(reinterpret_cast<Slice*>(value));
-    } else {
-      return Slice(key_data, key_length);
-    }
-  }
-
-  void Free() {
-    assert((refs == 1 && in_cache) || (refs == 0 && !in_cache));
-    (*deleter)(key(), value);
-    delete[] reinterpret_cast<char*>(this);
-  }
-};
-
-}  // end namespace rocksdb
diff --git a/util/sharded_cache.cc b/util/sharded_cache.cc
new file mode 100644
index 000000000..030c52749
--- /dev/null
+++ b/util/sharded_cache.cc
@@ -0,0 +1,117 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/sharded_cache.h"
+#include "util/mutexlock.h"
+
+namespace rocksdb {
+
+ShardedCache::ShardedCache(size_t capacity, int num_shard_bits,
+                           bool strict_capacity_limit)
+    : num_shard_bits_(num_shard_bits),
+      capacity_(capacity),
+      strict_capacity_limit_(strict_capacity_limit),
+      last_id_(1) {}
+
+void ShardedCache::SetCapacity(size_t capacity) {
+  int num_shards = 1 << num_shard_bits_;
+  const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
+  MutexLock l(&capacity_mutex_);
+  for (int s = 0; s < num_shards; s++) {
+    GetShard(s)->SetCapacity(per_shard);
+  }
+  capacity_ = capacity;
+}
+
+void ShardedCache::SetStrictCapacityLimit(bool strict_capacity_limit) {
+  int num_shards = 1 << num_shard_bits_;
+  MutexLock l(&capacity_mutex_);
+  for (int s = 0; s < num_shards; s++) {
+    GetShard(s)->SetStrictCapacityLimit(strict_capacity_limit);
+  }
+  strict_capacity_limit_ = strict_capacity_limit;
+}
+
+Status ShardedCache::Insert(const Slice& key, void* value, size_t charge,
+                            void (*deleter)(const Slice& key, void* value),
+                            Handle** handle) {
+  uint32_t hash = HashSlice(key);
+  return GetShard(Shard(hash))
+      ->Insert(key, hash, value, charge, deleter, handle);
+}
+
+Cache::Handle* ShardedCache::Lookup(const Slice& key) {
+  uint32_t hash = HashSlice(key);
+  return GetShard(Shard(hash))->Lookup(key, hash);
+}
+
+void ShardedCache::Release(Handle* handle) {
+  uint32_t hash = GetHash(handle);
+  GetShard(Shard(hash))->Release(handle);
+}
+
+void ShardedCache::Erase(const Slice& key) {
+  uint32_t hash = HashSlice(key);
+  GetShard(Shard(hash))->Erase(key, hash);
+}
+
+uint64_t ShardedCache::NewId() {
+  return last_id_.fetch_add(1, std::memory_order_relaxed);
+}
+
+size_t ShardedCache::GetCapacity() const {
+  MutexLock l(&capacity_mutex_);
+  return capacity_;
+}
+
+bool ShardedCache::HasStrictCapacityLimit() const {
+  MutexLock l(&capacity_mutex_);
+  return strict_capacity_limit_;
+}
+
+size_t ShardedCache::GetUsage() const {
+  // We will not lock the cache when getting the usage from shards.
+  int num_shards = 1 << num_shard_bits_;
+  size_t usage = 0;
+  for (int s = 0; s < num_shards; s++) {
+    usage += GetShard(s)->GetUsage();
+  }
+  return usage;
+}
+
+size_t ShardedCache::GetUsage(Handle* handle) const {
+  return GetCharge(handle);
+}
+
+size_t ShardedCache::GetPinnedUsage() const {
+  // We will not lock the cache when getting the usage from shards.
+  int num_shards = 1 << num_shard_bits_;
+  size_t usage = 0;
+  for (int s = 0; s < num_shards; s++) {
+    usage += GetShard(s)->GetPinnedUsage();
+  }
+  return usage;
+}
+
+void ShardedCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                          bool thread_safe) {
+  int num_shards = 1 << num_shard_bits_;
+  for (int s = 0; s < num_shards; s++) {
+    GetShard(s)->ApplyToAllCacheEntries(callback, thread_safe);
+  }
+}
+
+void ShardedCache::EraseUnRefEntries() {
+  int num_shards = 1 << num_shard_bits_;
+  for (int s = 0; s < num_shards; s++) {
+    GetShard(s)->EraseUnRefEntries();
+  }
+}
+
+}  // namespace rocksdb
diff --git a/util/sharded_cache.h b/util/sharded_cache.h
new file mode 100644
index 000000000..58658bdc3
--- /dev/null
+++ b/util/sharded_cache.h
@@ -0,0 +1,93 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+
+#include "port/port.h"
+#include "rocksdb/cache.h"
+#include "util/hash.h"
+
+namespace rocksdb {
+
+// Single cache shard interface.
+class CacheShard {
+ public:
+  CacheShard() = default;
+  virtual ~CacheShard() = default;
+
+  virtual Status Insert(const Slice& key, uint32_t hash, void* value,
+                        size_t charge,
+                        void (*deleter)(const Slice& key, void* value),
+                        Cache::Handle** handle) = 0;
+  virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;
+  virtual void Release(Cache::Handle* handle) = 0;
+  virtual void Erase(const Slice& key, uint32_t hash) = 0;
+  virtual void SetCapacity(size_t capacity) = 0;
+  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
+  virtual size_t GetUsage() const = 0;
+  virtual size_t GetPinnedUsage() const = 0;
+  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) = 0;
+  virtual void EraseUnRefEntries() = 0;
+};
+
+// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
+// shards will be created, with capacity split evenly to each of the shards.
+// Keys are sharded by the highest num_shard_bits bits of hash value.
+class ShardedCache : public Cache {
+ public:
+  ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit);
+  virtual ~ShardedCache() = default;
+
+  virtual CacheShard* GetShard(int shard) = 0;
+  virtual const CacheShard* GetShard(int shard) const = 0;
+  virtual void* Value(Handle* handle) override = 0;
+  virtual size_t GetCharge(Handle* handle) const = 0;
+  virtual uint32_t GetHash(Handle* handle) const = 0;
+  virtual void DisownData() override = 0;
+
+  virtual void SetCapacity(size_t capacity) override;
+  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
+
+  virtual Status Insert(const Slice& key, void* value, size_t charge,
+                        void (*deleter)(const Slice& key, void* value),
+                        Handle** handle) override;
+  virtual Handle* Lookup(const Slice& key) override;
+  virtual void Release(Handle* handle) override;
+  virtual void Erase(const Slice& key) override;
+  virtual uint64_t NewId() override;
+  virtual size_t GetCapacity() const override;
+  virtual bool HasStrictCapacityLimit() const override;
+  virtual size_t GetUsage() const override;
+  virtual size_t GetUsage(Handle* handle) const override;
+  virtual size_t GetPinnedUsage() const override;
+  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) override;
+  virtual void EraseUnRefEntries() override;
+
+ private:
+  static inline uint32_t HashSlice(const Slice& s) {
+    return Hash(s.data(), s.size(), 0);
+  }
+
+  uint32_t Shard(uint32_t hash) {
+    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
+    return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
+  }
+
+  int num_shard_bits_;
+  mutable port::Mutex capacity_mutex_;
+  size_t capacity_;
+  bool strict_capacity_limit_;
+  std::atomic<uint64_t> last_id_;
+};
+
+}  // namespace rocksdb
diff --git a/utilities/simulator_cache/sim_cache.cc b/utilities/simulator_cache/sim_cache.cc
index 4e6b20bc4..e88043d9d 100644
--- a/utilities/simulator_cache/sim_cache.cc
+++ b/utilities/simulator_cache/sim_cache.cc
@@ -65,9 +65,7 @@ class SimCacheImpl : public SimCache {
     key_only_cache_->Erase(key);
   }
 
-  virtual void* Value(Handle* handle) override {
-    return reinterpret_cast<LRUHandle*>(handle)->value;
-  }
+  virtual void* Value(Handle* handle) override { return cache_->Value(handle); }
 
   virtual uint64_t NewId() override { return cache_->NewId(); }
 
@@ -80,7 +78,7 @@ class SimCacheImpl : public SimCache {
   virtual size_t GetUsage() const override { return cache_->GetUsage(); }
 
   virtual size_t GetUsage(Handle* handle) const override {
-    return reinterpret_cast<LRUHandle*>(handle)->charge;
+    return cache_->GetUsage(handle);
   }
 
   virtual size_t GetPinnedUsage() const override {
@@ -113,12 +111,21 @@ class SimCacheImpl : public SimCache {
     key_only_cache_->SetCapacity(capacity);
   }
 
-  virtual uint64_t get_lookup_counter() const override { return lookup_times_; }
-  virtual uint64_t get_hit_counter() const override { return hit_times_; }
+  virtual uint64_t get_lookup_counter() const override {
+    return lookup_times_.load(std::memory_order_relaxed);
+  }
+
+  virtual uint64_t get_hit_counter() const override {
+    return hit_times_.load(std::memory_order_relaxed);
+  }
+
   virtual double get_hit_rate() const override {
-    return hit_times_ * 1.0f / lookup_times_;
+    return get_hit_counter() * 1.0f / get_lookup_counter();
+  }
+  virtual void reset_counter() override {
+    lookup_times_.store(0, std::memory_order_relaxed);
+    hit_times_.store(0, std::memory_order_relaxed);
   }
-  virtual void reset_counter() override { hit_times_ = lookup_times_ = 0; }
 
   virtual std::string ToString() const override {
     std::string res;
@@ -137,8 +144,10 @@ class SimCacheImpl : public SimCache {
   std::shared_ptr<Cache> key_only_cache_;
   std::atomic<uint64_t> lookup_times_;
   std::atomic<uint64_t> hit_times_;
-  void inc_lookup_counter() { lookup_times_++; }
-  void inc_hit_counter() { hit_times_++; }
+  void inc_lookup_counter() {
+    lookup_times_.fetch_add(1, std::memory_order_relaxed);
+  }
+  void inc_hit_counter() { hit_times_.fetch_add(1, std::memory_order_relaxed); }
 };
 
 }  // end anonymous namespace