LRUCache to try to clean entries not referenced first.

Summary: With this patch, when LRUCache.Insert() is called and the cache is full, it will first try to free up entries whose reference counter is 1 (would become 0 after remo\ ving from the cache). We do it in two passes, in the first pass, we only try to release those unreferenced entries. If we cannot free enough space after traversing t\ he first remove_scan_cnt_ entries, we start from the beginning again and remove those entries being used. Test Plan: add two unit tests to cover the codes Reviewers: dhruba, haobo, emayanke Reviewed By: emayanke CC: leveldb, emayanke, xjin Differential Revision: https://reviews.facebook.net/D13377
12 years ago · f8509653ba
parent c0ce562c32
commit f8509653ba
7 changed files with 238 additions and 19 deletions
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@ -196,6 +196,8 @@ static const char* FLAGS_db = nullptr;
 // if FLAGS_cache_size is non-negative.
 static int FLAGS_cache_numshardbits = -1;

+static int FLAGS_cache_remove_scan_count_limit = 32;
+
 // Verify checksum for every block read from storage
 static bool FLAGS_verify_checksum = false;

@ -816,7 +818,8 @@ class Benchmark {
  Benchmark()
  : cache_(FLAGS_cache_size >= 0 ?
           (FLAGS_cache_numshardbits >= 1 ?
-            NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits) :
+            NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits,
+                        FLAGS_cache_remove_scan_count_limit) :
            NewLRUCache(FLAGS_cache_size)) : nullptr),
    filter_policy_(FLAGS_bloom_bits >= 0
                   ? NewBloomFilterPolicy(FLAGS_bloom_bits)
--- a/db/table_cache.cc
+++ b/db/table_cache.cc
@ -32,7 +32,10 @@ TableCache::TableCache(const std::string& dbname,
      dbname_(dbname),
      options_(options),
      storage_options_(storage_options),
-      cache_(NewLRUCache(entries, options->table_cache_numshardbits)) {}
+      cache_(
+        NewLRUCache(entries, options->table_cache_numshardbits,
+                    options->table_cache_remove_scan_count_limit)) {
+}

 TableCache::~TableCache() {
 }
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@ -28,10 +28,22 @@ using std::shared_ptr;

 class Cache;

-// Create a new cache with a fixed size capacity.  This implementation
-// of Cache uses a least-recently-used eviction policy.
+// Create a new cache with a fixed size capacity. The cache is sharded
+// to 2^numShardBits shards, by hash of the key. The total capacity
+// is divided and evenly assigned to each shard. Inside each shard,
+// the eviction is done in two passes: first try to free spaces by
+// evicting entries that are among the most least used removeScanCountLimit
+// entries and do not have reference other than by the cache itself, in
+// the least-used order. If not enough space is freed, further free the
+// entries in least used order.
+//
+// The functions without parameter numShardBits and/or removeScanCountLimit
+// use default values. removeScanCountLimit's default value is 0, which
+// means a strict LRU order inside each shard.
 extern shared_ptr<Cache> NewLRUCache(size_t capacity);
 extern shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits);
+extern shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits,
+                                     int removeScanCountLimit);

 class Cache {
 public:
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@ -442,6 +442,15 @@ struct Options {
  // Number of shards used for table cache.
  int table_cache_numshardbits;

+  // During data eviction of table's LRU cache, it would be inefficient
+  // to strictly follow LRU because this piece of memory will not really
+  // be released unless its refcount falls to zero. Instead, make two
+  // passes: the first pass will release items with refcount = 1,
+  // and if not enough space releases after scanning the number of
+  // elements specified by this parameter, we will remove items in LRU
+  // order.
+  int table_cache_remove_scan_count_limit;
+
  // size of one block in arena memory allocation.
  // If <= 0, a proper value is automatically calculated (usually 1/10 of
  // writer_buffer_size).
--- a/util/cache.cc
+++ b/util/cache.cc
@ -140,6 +140,9 @@ class LRUCache {

  // Separate from constructor so caller can easily make an array of LRUCache
  void SetCapacity(size_t capacity) { capacity_ = capacity; }
+  void SetRemoveScanCountLimit(size_t remove_scan_count_limit) {
+    remove_scan_count_limit_ = remove_scan_count_limit;
+  }

  // Like Cache methods, but with an extra "hash" parameter.
  Cache::Handle* Insert(const Slice& key, uint32_t hash,
@ -160,6 +163,7 @@ class LRUCache {

  // Initialized before use.
  size_t capacity_;
+  uint32_t remove_scan_count_limit_;

  // mutex_ protects the following state.
  port::Mutex mutex_;
@ -270,6 +274,27 @@ Cache::Handle* LRUCache::Insert(
      }
    }

+    if (remove_scan_count_limit_ > 0) {
+      // Try to free the space by evicting the entries that are only
+      // referenced by the cache first.
+      LRUHandle* cur = lru_.next;
+      for (unsigned int scanCount = 0;
+           usage_ > capacity_ && cur != &lru_
+           && scanCount < remove_scan_count_limit_; scanCount++) {
+        LRUHandle* next = cur->next;
+        if (cur->refs <= 1) {
+          LRU_Remove(cur);
+          table_.Remove(cur->key(), cur->hash);
+          if (Unref(cur)) {
+            last_reference_list.push_back(cur);
+          }
+        }
+        cur = next;
+      }
+    }
+
+    // Free the space following strict LRU policy until enough space
+    // is freed.
    while (usage_ > capacity_ && lru_.next != &lru_) {
      LRUHandle* old = lru_.next;
      LRU_Remove(old);
@ -308,6 +333,7 @@ void LRUCache::Erase(const Slice& key, uint32_t hash) {
 }

 static int kNumShardBits = 4;          // default values, can be overridden
+static int kRemoveScanCountLimit = 0; // default values, can be overridden

 class ShardedLRUCache : public Cache {
 private:
@ -326,7 +352,7 @@ class ShardedLRUCache : public Cache {
    return (numShardBits > 0) ? (hash >> (32 - numShardBits)) : 0;
  }

-  void init(size_t capacity, int numbits) {
+  void init(size_t capacity, int numbits, int removeScanCountLimit) {
    numShardBits = numbits;
    capacity_ = capacity;
    int numShards = 1 << numShardBits;
@ -334,17 +360,19 @@ class ShardedLRUCache : public Cache {
    const size_t per_shard = (capacity + (numShards - 1)) / numShards;
    for (int s = 0; s < numShards; s++) {
      shard_[s].SetCapacity(per_shard);
+      shard_[s].SetRemoveScanCountLimit(removeScanCountLimit);
    }
  }

 public:
  explicit ShardedLRUCache(size_t capacity)
      : last_id_(0) {
-    init(capacity, kNumShardBits);
+    init(capacity, kNumShardBits, kRemoveScanCountLimit);
  }
-  ShardedLRUCache(size_t capacity, int numShardBits)
+  ShardedLRUCache(size_t capacity, int numShardBits,
+                  int removeScanCountLimit)
     : last_id_(0) {
-    init(capacity, numShardBits);
+    init(capacity, numShardBits, removeScanCountLimit);
  }
  virtual ~ShardedLRUCache() {
    delete[] shard_;
@ -381,14 +409,21 @@ class ShardedLRUCache : public Cache {
 }  // end anonymous namespace

 shared_ptr<Cache> NewLRUCache(size_t capacity) {
-  return std::make_shared<ShardedLRUCache>(capacity);
+  return NewLRUCache(capacity, kNumShardBits);
 }

 shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits) {
+  return NewLRUCache(capacity, numShardBits, kRemoveScanCountLimit);
+}
+
+shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits,
+                              int removeScanCountLimit) {
  if (numShardBits >= 20) {
    return nullptr;  // the cache cannot be sharded into too many fine pieces
  }
-  return std::make_shared<ShardedLRUCache>(capacity, numShardBits);
+  return std::make_shared<ShardedLRUCache>(capacity,
+                                           numShardBits,
+                                           removeScanCountLimit);
 }

 }  // namespace rocksdb
--- a/util/cache_test.cc
+++ b/util/cache_test.cc
@ -35,33 +35,69 @@ class CacheTest {
  }

  static const int kCacheSize = 1000;
+  static const int kNumShardBits = 4;
+  static const int kRemoveScanCountLimit = 16;
+
+  static const int kCacheSize2 = 100;
+  static const int kNumShardBits2 = 2;
+  static const int kRemoveScanCountLimit2 = 200;
+
  std::vector<int> deleted_keys_;
  std::vector<int> deleted_values_;
  shared_ptr<Cache> cache_;
+  shared_ptr<Cache> cache2_;

-  CacheTest() : cache_(NewLRUCache(kCacheSize)) {
+  CacheTest() :
+      cache_(NewLRUCache(kCacheSize, kNumShardBits, kRemoveScanCountLimit)),
+      cache2_(NewLRUCache(kCacheSize2, kNumShardBits2,
+                          kRemoveScanCountLimit2)) {
    current_ = this;
  }

  ~CacheTest() {
  }

-  int Lookup(int key) {
-    Cache::Handle* handle = cache_->Lookup(EncodeKey(key));
-    const int r = (handle == nullptr) ? -1 : DecodeValue(cache_->Value(handle));
+  int Lookup(shared_ptr<Cache> cache, int key) {
+    Cache::Handle* handle = cache->Lookup(EncodeKey(key));
+    const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle));
    if (handle != nullptr) {
-      cache_->Release(handle);
+      cache->Release(handle);
    }
    return r;
  }

-  void Insert(int key, int value, int charge = 1) {
-    cache_->Release(cache_->Insert(EncodeKey(key), EncodeValue(value), charge,
+  void Insert(shared_ptr<Cache> cache, int key, int value, int charge = 1) {
+    cache->Release(cache->Insert(EncodeKey(key), EncodeValue(value), charge,
                                  &CacheTest::Deleter));
  }

+  void Erase(shared_ptr<Cache> cache, int key) {
+    cache->Erase(EncodeKey(key));
+  }
+
+
+  int Lookup(int key) {
+    return Lookup(cache_, key);
+  }
+
+  void Insert(int key, int value, int charge = 1) {
+    Insert(cache_, key, value, charge);
+  }
+
  void Erase(int key) {
-    cache_->Erase(EncodeKey(key));
+    Erase(cache_, key);
+  }
+
+  int Lookup2(int key) {
+    return Lookup(cache2_, key);
+  }
+
+  void Insert2(int key, int value, int charge = 1) {
+    Insert(cache2_, key, value, charge);
+  }
+
+  void Erase2(int key) {
+    Erase(cache2_, key);
  }
 };
 CacheTest* CacheTest::current_;
@ -147,6 +183,124 @@ TEST(CacheTest, EvictionPolicy) {
  ASSERT_EQ(-1, Lookup(200));
 }

+TEST(CacheTest, EvictionPolicyRef) {
+  Insert(100, 101);
+  Insert(101, 102);
+  Insert(102, 103);
+  Insert(103, 104);
+  Insert(200, 101);
+  Insert(201, 102);
+  Insert(202, 103);
+  Insert(203, 104);
+  Cache::Handle* h201 = cache_->Lookup(EncodeKey(200));
+  Cache::Handle* h202 = cache_->Lookup(EncodeKey(201));
+  Cache::Handle* h203 = cache_->Lookup(EncodeKey(202));
+  Cache::Handle* h204 = cache_->Lookup(EncodeKey(203));
+  Insert(300, 101);
+  Insert(301, 102);
+  Insert(302, 103);
+  Insert(303, 104);
+
+  // Insert entries much more than Cache capacity
+  for (int i = 0; i < kCacheSize + 100; i++) {
+    Insert(1000 + i, 2000 + i);
+  }
+
+  // Check whether the entries inserted in the beginning
+  // are evicted. Ones without extra ref are evicted and
+  // those with are not.
+  ASSERT_EQ(-1, Lookup(100));
+  ASSERT_EQ(-1, Lookup(101));
+  ASSERT_EQ(-1, Lookup(102));
+  ASSERT_EQ(-1, Lookup(103));
+
+  ASSERT_EQ(-1, Lookup(300));
+  ASSERT_EQ(-1, Lookup(301));
+  ASSERT_EQ(-1, Lookup(302));
+  ASSERT_EQ(-1, Lookup(303));
+
+  ASSERT_EQ(101, Lookup(200));
+  ASSERT_EQ(102, Lookup(201));
+  ASSERT_EQ(103, Lookup(202));
+  ASSERT_EQ(104, Lookup(203));
+
+  // Cleaning up all the handles
+  cache_->Release(h201);
+  cache_->Release(h202);
+  cache_->Release(h203);
+  cache_->Release(h204);
+}
+
+TEST(CacheTest, EvictionPolicyRef2) {
+  std::vector<Cache::Handle*> handles;
+
+  Insert(100, 101);
+  // Insert entries much more than Cache capacity
+  for (int i = 0; i < kCacheSize + 100; i++) {
+    Insert(1000 + i, 2000 + i);
+    if (i < kCacheSize ) {
+      handles.push_back(cache_->Lookup(EncodeKey(1000 + i)));
+    }
+  }
+
+  // Make sure referenced keys are also possible to be deleted
+  // if there are not sufficient non-referenced keys
+  for (int i = 0; i < 5; i++) {
+    ASSERT_EQ(-1, Lookup(1000 + i));
+  }
+
+  for (int i = kCacheSize; i < kCacheSize + 100; i++) {
+    ASSERT_EQ(2000 + i, Lookup(1000 + i));
+  }
+  ASSERT_EQ(-1, Lookup(100));
+
+  // Cleaning up all the handles
+  while (handles.size() > 0) {
+    cache_->Release(handles.back());
+    handles.pop_back();
+  }
+}
+
+TEST(CacheTest, EvictionPolicyRefLargeScanLimit) {
+  std::vector<Cache::Handle*> handles2;
+
+  // Cache2 has a cache RemoveScanCountLimit higher than cache size
+  // so it would trigger a boundary condition.
+
+  // Populate the cache with 10 more keys than its size.
+  // Reference all keys except one close to the end.
+  for (int i = 0; i < kCacheSize2 + 10; i++) {
+    Insert2(1000 + i, 2000+i);
+    if (i != kCacheSize2 ) {
+      handles2.push_back(cache2_->Lookup(EncodeKey(1000 + i)));
+    }
+  }
+
+  // Make sure referenced keys are also possible to be deleted
+  // if there are not sufficient non-referenced keys
+  for (int i = 0; i < 3; i++) {
+    ASSERT_EQ(-1, Lookup2(1000 + i));
+  }
+  // The non-referenced value is deleted even if it's accessed
+  // recently.
+  ASSERT_EQ(-1, Lookup2(1000 + kCacheSize2));
+  // Other values recently accessed are not deleted since they
+  // are referenced.
+  for (int i = kCacheSize2 - 10; i < kCacheSize2 + 10; i++) {
+    if (i != kCacheSize2) {
+      ASSERT_EQ(2000 + i, Lookup2(1000 + i));
+    }
+  }
+
+  // Cleaning up all the handles
+  while (handles2.size() > 0) {
+    cache2_->Release(handles2.back());
+    handles2.pop_back();
+  }
+}
+
+
+
 TEST(CacheTest, HeavyEntries) {
  // Add a bunch of light and heavy entries and then count the combined
  // size of items still in the cache, which must be approximately the
--- a/util/options.cc
+++ b/util/options.cc
@ -65,6 +65,7 @@ Options::Options()
      max_manifest_file_size(std::numeric_limits<uint64_t>::max()),
      no_block_cache(false),
      table_cache_numshardbits(4),
+      table_cache_remove_scan_count_limit(16),
      arena_block_size(0),
      disable_auto_compactions(false),
      WAL_ttl_seconds(0),
@ -197,6 +198,8 @@ Options::Dump(Logger* log) const
        no_block_cache);
    Log(log,"               Options.table_cache_numshardbits: %d",
        table_cache_numshardbits);
+    Log(log,"    Options.table_cache_remove_scan_count_limit: %d",
+        table_cache_remove_scan_count_limit);
    Log(log,"                       Options.arena_block_size: %ld",
        arena_block_size);
    Log(log,"    Options.delete_obsolete_files_period_micros: %ld",