diff --git a/db/db_bench.cc b/db/db_bench.cc index 523fc7220..496ca3619 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -196,6 +196,8 @@ static const char* FLAGS_db = nullptr; // if FLAGS_cache_size is non-negative. static int FLAGS_cache_numshardbits = -1; +static int FLAGS_cache_remove_scan_count_limit = 32; + // Verify checksum for every block read from storage static bool FLAGS_verify_checksum = false; @@ -816,7 +818,8 @@ class Benchmark { Benchmark() : cache_(FLAGS_cache_size >= 0 ? (FLAGS_cache_numshardbits >= 1 ? - NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits) : + NewLRUCache(FLAGS_cache_size, FLAGS_cache_numshardbits, + FLAGS_cache_remove_scan_count_limit) : NewLRUCache(FLAGS_cache_size)) : nullptr), filter_policy_(FLAGS_bloom_bits >= 0 ? NewBloomFilterPolicy(FLAGS_bloom_bits) diff --git a/db/table_cache.cc b/db/table_cache.cc index cf544fefe..cf284b60a 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -32,7 +32,10 @@ TableCache::TableCache(const std::string& dbname, dbname_(dbname), options_(options), storage_options_(storage_options), - cache_(NewLRUCache(entries, options->table_cache_numshardbits)) {} + cache_( + NewLRUCache(entries, options->table_cache_numshardbits, + options->table_cache_remove_scan_count_limit)) { +} TableCache::~TableCache() { } diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index f9106511d..c912c9ccd 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -28,10 +28,22 @@ using std::shared_ptr; class Cache; -// Create a new cache with a fixed size capacity. This implementation -// of Cache uses a least-recently-used eviction policy. +// Create a new cache with a fixed size capacity. The cache is sharded +// to 2^numShardBits shards, by hash of the key. The total capacity +// is divided and evenly assigned to each shard. Inside each shard, +// the eviction is done in two passes: first try to free spaces by +// evicting entries that are among the most least used removeScanCountLimit +// entries and do not have reference other than by the cache itself, in +// the least-used order. If not enough space is freed, further free the +// entries in least used order. +// +// The functions without parameter numShardBits and/or removeScanCountLimit +// use default values. removeScanCountLimit's default value is 0, which +// means a strict LRU order inside each shard. extern shared_ptr NewLRUCache(size_t capacity); extern shared_ptr NewLRUCache(size_t capacity, int numShardBits); +extern shared_ptr NewLRUCache(size_t capacity, int numShardBits, + int removeScanCountLimit); class Cache { public: diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index a160acee1..9bb5330bd 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -442,6 +442,15 @@ struct Options { // Number of shards used for table cache. int table_cache_numshardbits; + // During data eviction of table's LRU cache, it would be inefficient + // to strictly follow LRU because this piece of memory will not really + // be released unless its refcount falls to zero. Instead, make two + // passes: the first pass will release items with refcount = 1, + // and if not enough space releases after scanning the number of + // elements specified by this parameter, we will remove items in LRU + // order. + int table_cache_remove_scan_count_limit; + // size of one block in arena memory allocation. // If <= 0, a proper value is automatically calculated (usually 1/10 of // writer_buffer_size). diff --git a/util/cache.cc b/util/cache.cc index df8b63fc5..cfca0aed2 100644 --- a/util/cache.cc +++ b/util/cache.cc @@ -140,6 +140,9 @@ class LRUCache { // Separate from constructor so caller can easily make an array of LRUCache void SetCapacity(size_t capacity) { capacity_ = capacity; } + void SetRemoveScanCountLimit(size_t remove_scan_count_limit) { + remove_scan_count_limit_ = remove_scan_count_limit; + } // Like Cache methods, but with an extra "hash" parameter. Cache::Handle* Insert(const Slice& key, uint32_t hash, @@ -160,6 +163,7 @@ class LRUCache { // Initialized before use. size_t capacity_; + uint32_t remove_scan_count_limit_; // mutex_ protects the following state. port::Mutex mutex_; @@ -270,6 +274,27 @@ Cache::Handle* LRUCache::Insert( } } + if (remove_scan_count_limit_ > 0) { + // Try to free the space by evicting the entries that are only + // referenced by the cache first. + LRUHandle* cur = lru_.next; + for (unsigned int scanCount = 0; + usage_ > capacity_ && cur != &lru_ + && scanCount < remove_scan_count_limit_; scanCount++) { + LRUHandle* next = cur->next; + if (cur->refs <= 1) { + LRU_Remove(cur); + table_.Remove(cur->key(), cur->hash); + if (Unref(cur)) { + last_reference_list.push_back(cur); + } + } + cur = next; + } + } + + // Free the space following strict LRU policy until enough space + // is freed. while (usage_ > capacity_ && lru_.next != &lru_) { LRUHandle* old = lru_.next; LRU_Remove(old); @@ -307,7 +332,8 @@ void LRUCache::Erase(const Slice& key, uint32_t hash) { } } -static int kNumShardBits = 4; // default values, can be overridden +static int kNumShardBits = 4; // default values, can be overridden +static int kRemoveScanCountLimit = 0; // default values, can be overridden class ShardedLRUCache : public Cache { private: @@ -326,7 +352,7 @@ class ShardedLRUCache : public Cache { return (numShardBits > 0) ? (hash >> (32 - numShardBits)) : 0; } - void init(size_t capacity, int numbits) { + void init(size_t capacity, int numbits, int removeScanCountLimit) { numShardBits = numbits; capacity_ = capacity; int numShards = 1 << numShardBits; @@ -334,17 +360,19 @@ class ShardedLRUCache : public Cache { const size_t per_shard = (capacity + (numShards - 1)) / numShards; for (int s = 0; s < numShards; s++) { shard_[s].SetCapacity(per_shard); + shard_[s].SetRemoveScanCountLimit(removeScanCountLimit); } } public: explicit ShardedLRUCache(size_t capacity) : last_id_(0) { - init(capacity, kNumShardBits); + init(capacity, kNumShardBits, kRemoveScanCountLimit); } - ShardedLRUCache(size_t capacity, int numShardBits) + ShardedLRUCache(size_t capacity, int numShardBits, + int removeScanCountLimit) : last_id_(0) { - init(capacity, numShardBits); + init(capacity, numShardBits, removeScanCountLimit); } virtual ~ShardedLRUCache() { delete[] shard_; @@ -381,14 +409,21 @@ class ShardedLRUCache : public Cache { } // end anonymous namespace shared_ptr NewLRUCache(size_t capacity) { - return std::make_shared(capacity); + return NewLRUCache(capacity, kNumShardBits); } shared_ptr NewLRUCache(size_t capacity, int numShardBits) { + return NewLRUCache(capacity, numShardBits, kRemoveScanCountLimit); +} + +shared_ptr NewLRUCache(size_t capacity, int numShardBits, + int removeScanCountLimit) { if (numShardBits >= 20) { return nullptr; // the cache cannot be sharded into too many fine pieces } - return std::make_shared(capacity, numShardBits); + return std::make_shared(capacity, + numShardBits, + removeScanCountLimit); } } // namespace rocksdb diff --git a/util/cache_test.cc b/util/cache_test.cc index 00c02589a..fe3bfd68f 100644 --- a/util/cache_test.cc +++ b/util/cache_test.cc @@ -35,33 +35,69 @@ class CacheTest { } static const int kCacheSize = 1000; + static const int kNumShardBits = 4; + static const int kRemoveScanCountLimit = 16; + + static const int kCacheSize2 = 100; + static const int kNumShardBits2 = 2; + static const int kRemoveScanCountLimit2 = 200; + std::vector deleted_keys_; std::vector deleted_values_; shared_ptr cache_; + shared_ptr cache2_; - CacheTest() : cache_(NewLRUCache(kCacheSize)) { + CacheTest() : + cache_(NewLRUCache(kCacheSize, kNumShardBits, kRemoveScanCountLimit)), + cache2_(NewLRUCache(kCacheSize2, kNumShardBits2, + kRemoveScanCountLimit2)) { current_ = this; } ~CacheTest() { } - int Lookup(int key) { - Cache::Handle* handle = cache_->Lookup(EncodeKey(key)); - const int r = (handle == nullptr) ? -1 : DecodeValue(cache_->Value(handle)); + int Lookup(shared_ptr cache, int key) { + Cache::Handle* handle = cache->Lookup(EncodeKey(key)); + const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle)); if (handle != nullptr) { - cache_->Release(handle); + cache->Release(handle); } return r; } + void Insert(shared_ptr cache, int key, int value, int charge = 1) { + cache->Release(cache->Insert(EncodeKey(key), EncodeValue(value), charge, + &CacheTest::Deleter)); + } + + void Erase(shared_ptr cache, int key) { + cache->Erase(EncodeKey(key)); + } + + + int Lookup(int key) { + return Lookup(cache_, key); + } + void Insert(int key, int value, int charge = 1) { - cache_->Release(cache_->Insert(EncodeKey(key), EncodeValue(value), charge, - &CacheTest::Deleter)); + Insert(cache_, key, value, charge); } void Erase(int key) { - cache_->Erase(EncodeKey(key)); + Erase(cache_, key); + } + + int Lookup2(int key) { + return Lookup(cache2_, key); + } + + void Insert2(int key, int value, int charge = 1) { + Insert(cache2_, key, value, charge); + } + + void Erase2(int key) { + Erase(cache2_, key); } }; CacheTest* CacheTest::current_; @@ -147,6 +183,124 @@ TEST(CacheTest, EvictionPolicy) { ASSERT_EQ(-1, Lookup(200)); } +TEST(CacheTest, EvictionPolicyRef) { + Insert(100, 101); + Insert(101, 102); + Insert(102, 103); + Insert(103, 104); + Insert(200, 101); + Insert(201, 102); + Insert(202, 103); + Insert(203, 104); + Cache::Handle* h201 = cache_->Lookup(EncodeKey(200)); + Cache::Handle* h202 = cache_->Lookup(EncodeKey(201)); + Cache::Handle* h203 = cache_->Lookup(EncodeKey(202)); + Cache::Handle* h204 = cache_->Lookup(EncodeKey(203)); + Insert(300, 101); + Insert(301, 102); + Insert(302, 103); + Insert(303, 104); + + // Insert entries much more than Cache capacity + for (int i = 0; i < kCacheSize + 100; i++) { + Insert(1000 + i, 2000 + i); + } + + // Check whether the entries inserted in the beginning + // are evicted. Ones without extra ref are evicted and + // those with are not. + ASSERT_EQ(-1, Lookup(100)); + ASSERT_EQ(-1, Lookup(101)); + ASSERT_EQ(-1, Lookup(102)); + ASSERT_EQ(-1, Lookup(103)); + + ASSERT_EQ(-1, Lookup(300)); + ASSERT_EQ(-1, Lookup(301)); + ASSERT_EQ(-1, Lookup(302)); + ASSERT_EQ(-1, Lookup(303)); + + ASSERT_EQ(101, Lookup(200)); + ASSERT_EQ(102, Lookup(201)); + ASSERT_EQ(103, Lookup(202)); + ASSERT_EQ(104, Lookup(203)); + + // Cleaning up all the handles + cache_->Release(h201); + cache_->Release(h202); + cache_->Release(h203); + cache_->Release(h204); +} + +TEST(CacheTest, EvictionPolicyRef2) { + std::vector handles; + + Insert(100, 101); + // Insert entries much more than Cache capacity + for (int i = 0; i < kCacheSize + 100; i++) { + Insert(1000 + i, 2000 + i); + if (i < kCacheSize ) { + handles.push_back(cache_->Lookup(EncodeKey(1000 + i))); + } + } + + // Make sure referenced keys are also possible to be deleted + // if there are not sufficient non-referenced keys + for (int i = 0; i < 5; i++) { + ASSERT_EQ(-1, Lookup(1000 + i)); + } + + for (int i = kCacheSize; i < kCacheSize + 100; i++) { + ASSERT_EQ(2000 + i, Lookup(1000 + i)); + } + ASSERT_EQ(-1, Lookup(100)); + + // Cleaning up all the handles + while (handles.size() > 0) { + cache_->Release(handles.back()); + handles.pop_back(); + } +} + +TEST(CacheTest, EvictionPolicyRefLargeScanLimit) { + std::vector handles2; + + // Cache2 has a cache RemoveScanCountLimit higher than cache size + // so it would trigger a boundary condition. + + // Populate the cache with 10 more keys than its size. + // Reference all keys except one close to the end. + for (int i = 0; i < kCacheSize2 + 10; i++) { + Insert2(1000 + i, 2000+i); + if (i != kCacheSize2 ) { + handles2.push_back(cache2_->Lookup(EncodeKey(1000 + i))); + } + } + + // Make sure referenced keys are also possible to be deleted + // if there are not sufficient non-referenced keys + for (int i = 0; i < 3; i++) { + ASSERT_EQ(-1, Lookup2(1000 + i)); + } + // The non-referenced value is deleted even if it's accessed + // recently. + ASSERT_EQ(-1, Lookup2(1000 + kCacheSize2)); + // Other values recently accessed are not deleted since they + // are referenced. + for (int i = kCacheSize2 - 10; i < kCacheSize2 + 10; i++) { + if (i != kCacheSize2) { + ASSERT_EQ(2000 + i, Lookup2(1000 + i)); + } + } + + // Cleaning up all the handles + while (handles2.size() > 0) { + cache2_->Release(handles2.back()); + handles2.pop_back(); + } +} + + + TEST(CacheTest, HeavyEntries) { // Add a bunch of light and heavy entries and then count the combined // size of items still in the cache, which must be approximately the diff --git a/util/options.cc b/util/options.cc index fe7a3689d..c1f0d8b06 100644 --- a/util/options.cc +++ b/util/options.cc @@ -65,6 +65,7 @@ Options::Options() max_manifest_file_size(std::numeric_limits::max()), no_block_cache(false), table_cache_numshardbits(4), + table_cache_remove_scan_count_limit(16), arena_block_size(0), disable_auto_compactions(false), WAL_ttl_seconds(0), @@ -197,6 +198,8 @@ Options::Dump(Logger* log) const no_block_cache); Log(log," Options.table_cache_numshardbits: %d", table_cache_numshardbits); + Log(log," Options.table_cache_remove_scan_count_limit: %d", + table_cache_remove_scan_count_limit); Log(log," Options.arena_block_size: %ld", arena_block_size); Log(log," Options.delete_obsolete_files_period_micros: %ld",