diff --git a/HISTORY.md b/HISTORY.md index ad626711f..bdb3325fe 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ # Rocksdb Change Log +### Unreleased Features +* Changed the LRU caching algorithm so that referenced blocks (by iterators) are never evicted + ### 3.9.0 (12/8/2014) ### New Features diff --git a/util/cache.cc b/util/cache.cc index b1d8a19c3..d64ab00e2 100644 --- a/util/cache.cc +++ b/util/cache.cc @@ -26,8 +26,27 @@ namespace { // LRU cache implementation -// An entry is a variable length heap-allocated structure. Entries -// are kept in a circular doubly linked list ordered by access time. +// An entry is a variable length heap-allocated structure. +// Entries are referenced by cache and/or by any external entity. +// The cache keeps all its entries in table. Some elements +// are also stored on LRU list. +// +// LRUHandle can be in these states: +// 1. Referenced externally AND in hash table. +// In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true) +// 2. Not referenced externally and in hash table. In that case the entry is +// in the LRU and can be freed. (refs == 1 && in_cache == true) +// 3. Referenced externally and not in hash table. In that case the entry is +// in not on LRU and not in table. (refs >= 1 && in_cache == false) +// +// All newly created LRUHandles are in state 1. If you call LRUCache::Release +// on entry in state 1, it will go into state 2. To move from state 1 to +// state 3, either call LRUCache::Erase or LRUCache::Insert with the same key. +// To move from state 2 to state 1, use LRUCache::Lookup. +// Before destruction, make sure that no handles are in state 1. This means +// that any successful LRUCache::Lookup/LRUCache::Insert have a matching +// RUCache::Release (to move into state 2) or LRUCache::Erase (for state 3) + struct LRUHandle { void* value; void (*deleter)(const Slice&, void* value); @@ -36,7 +55,9 @@ struct LRUHandle { LRUHandle* prev; size_t charge; // TODO(opt): Only allow uint32_t? size_t key_length; - uint32_t refs; + uint32_t refs; // a number of refs to this entry + // cache itself is counted as 1 + bool in_cache; // true, if this entry is referenced by the hash table uint32_t hash; // Hash of key(); used for fast sharding and comparisons char key_data[1]; // Beginning of key @@ -49,6 +70,12 @@ struct LRUHandle { return Slice(key_data, key_length); } } + + void Free() { + assert((refs == 1 && in_cache) || (refs == 0 && !in_cache)); + (*deleter)(key(), value); + free(this); + } }; // We provide our own simple hash table since it removes a whole bunch @@ -59,7 +86,28 @@ struct LRUHandle { class HandleTable { public: HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); } - ~HandleTable() { delete[] list_; } + + template + void ApplyToAllCacheEntries(T func) { + for (uint32_t i = 0; i < length_; i++) { + LRUHandle* h = list_[i]; + while (h != nullptr) { + auto n = h->next_hash; + assert(h->in_cache); + func(h); + h = n; + } + } + } + + ~HandleTable() { + ApplyToAllCacheEntries([](LRUHandle* h) { + if (h->refs == 1) { + h->Free(); + } + }); + delete[] list_; + } LRUHandle* Lookup(const Slice& key, uint32_t hash) { return *FindPointer(key, hash); @@ -173,8 +221,6 @@ class LRUCache { // Just reduce the reference count by 1. // Return true if last reference bool Unref(LRUHandle* e); - // Call deleter and free - void FreeEntry(LRUHandle* e); // Initialized before use. size_t capacity_; @@ -188,6 +234,7 @@ class LRUCache { // Dummy head of LRU list. // lru.prev is newest entry, lru.next is oldest entry. + // LRU contains items which can be evicted, ie reference only by cache LRUHandle lru_; HandleTable table_; @@ -200,16 +247,7 @@ LRUCache::LRUCache() lru_.prev = &lru_; } -LRUCache::~LRUCache() { - for (LRUHandle* e = lru_.next; e != &lru_; ) { - LRUHandle* next = e->next; - assert(e->refs == 1); // Error if caller has an unreleased handle - if (Unref(e)) { - FreeEntry(e); - } - e = next; - } -} +LRUCache::~LRUCache() {} bool LRUCache::Unref(LRUHandle* e) { assert(e->refs > 0); @@ -217,47 +255,48 @@ bool LRUCache::Unref(LRUHandle* e) { return e->refs == 0; } -void LRUCache::FreeEntry(LRUHandle* e) { - assert(e->refs == 0); - (*e->deleter)(e->key(), e->value); - free(e); -} +// Call deleter and free void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) { if (thread_safe) { mutex_.Lock(); } - for (auto e = lru_.next; e != &lru_; e = e->next) { - callback(e->value, e->charge); - } + table_.ApplyToAllCacheEntries([callback](LRUHandle* h) { + callback(h->value, h->charge); + }); if (thread_safe) { mutex_.Unlock(); } } void LRUCache::LRU_Remove(LRUHandle* e) { + assert(e->next != nullptr); + assert(e->prev != nullptr); e->next->prev = e->prev; e->prev->next = e->next; - usage_ -= e->charge; + e->prev = e->next = nullptr; } void LRUCache::LRU_Append(LRUHandle* e) { // Make "e" newest entry by inserting just before lru_ + assert(e->next == nullptr); + assert(e->prev == nullptr); e->next = &lru_; e->prev = lru_.prev; e->prev->next = e; e->next->prev = e; - usage_ += e->charge; } Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { MutexLock l(&mutex_); LRUHandle* e = table_.Lookup(key, hash); if (e != nullptr) { + assert(e->in_cache); + if (e->refs == 1) { + LRU_Remove(e); + } e->refs++; - LRU_Remove(e); - LRU_Append(e); } return reinterpret_cast(e); } @@ -268,9 +307,31 @@ void LRUCache::Release(Cache::Handle* handle) { { MutexLock l(&mutex_); last_reference = Unref(e); + if (last_reference) { + usage_ -= e->charge; + } + if (e->refs == 1 && e->in_cache) { + // The item is still in cache, and nobody else holds a reference to it + if (usage_ > capacity_) { + // the cache is full + // The LRU list must be empty since the cache is full + assert(lru_.next == &lru_); + // take this opportunity and remove the item + table_.Remove(e->key(), e->hash); + e->in_cache = false; + Unref(e); + usage_ -= e->charge; + last_reference = true; + } else { + // put the item on the list to be potentially freed + LRU_Append(e); + } + } } + + // free outside of mutex if (last_reference) { - FreeEntry(e); + e->Free(); } } @@ -278,8 +339,11 @@ Cache::Handle* LRUCache::Insert( const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value)) { - LRUHandle* e = reinterpret_cast( - malloc(sizeof(LRUHandle)-1 + key.size())); + // Allocate the memory here outside of the mutex + // If the cache is full, we'll have to release it + // It shouldn't happen very often though. + LRUHandle* e = + reinterpret_cast(malloc(sizeof(LRUHandle) - 1 + key.size())); autovector last_reference_list; e->value = value; @@ -288,47 +352,40 @@ Cache::Handle* LRUCache::Insert( e->key_length = key.size(); e->hash = hash; e->refs = 2; // One from LRUCache, one for the returned handle + e->next = e->prev = nullptr; + e->in_cache = true; memcpy(e->key_data, key.data(), key.size()); { MutexLock l(&mutex_); - LRU_Append(e); - - LRUHandle* old = table_.Insert(e); - if (old != nullptr) { - LRU_Remove(old); - if (Unref(old)) { - last_reference_list.push_back(old); - } - } - - if (remove_scan_count_limit_ > 0) { - // Try to free the space by evicting the entries that are only - // referenced by the cache first. - LRUHandle* cur = lru_.next; - for (unsigned int scanCount = 0; - usage_ > capacity_ && cur != &lru_ - && scanCount < remove_scan_count_limit_; scanCount++) { - LRUHandle* next = cur->next; - if (cur->refs <= 1) { - LRU_Remove(cur); - table_.Remove(cur->key(), cur->hash); - if (Unref(cur)) { - last_reference_list.push_back(cur); - } - } - cur = next; - } - } - // Free the space following strict LRU policy until enough space - // is freed. - while (usage_ > capacity_ && lru_.next != &lru_) { - old = lru_.next; + // is freed or the lru list is empty + while (usage_ + charge > capacity_ && lru_.next != &lru_) { + LRUHandle* old = lru_.next; + assert(old->in_cache); + assert(old->refs == + 1); // LRU list contains elements which may be evicted LRU_Remove(old); table_.Remove(old->key(), old->hash); + old->in_cache = false; + Unref(old); + usage_ -= old->charge; + last_reference_list.push_back(old); + } + + // insert into the cache + // note that the cache might get larger than its capacity if not enough + // space was freed + LRUHandle* old = table_.Insert(e); + usage_ += e->charge; + if (old != nullptr) { + old->in_cache = false; if (Unref(old)) { + usage_ -= old->charge; + // old is on LRU because it's in cache and its reference count + // was just 1 (Unref returned 0) + LRU_Remove(old); last_reference_list.push_back(old); } } @@ -337,7 +394,7 @@ Cache::Handle* LRUCache::Insert( // we free the entries here outside of mutex for // performance reasons for (auto entry : last_reference_list) { - FreeEntry(entry); + entry->Free(); } return reinterpret_cast(e); @@ -350,14 +407,21 @@ void LRUCache::Erase(const Slice& key, uint32_t hash) { MutexLock l(&mutex_); e = table_.Remove(key, hash); if (e != nullptr) { - LRU_Remove(e); last_reference = Unref(e); + if (last_reference) { + usage_ -= e->charge; + } + if (last_reference && e->in_cache) { + LRU_Remove(e); + } + e->in_cache = false; } } + // mutex not held here // last_reference will only be true if e != nullptr if (last_reference) { - FreeEntry(e); + e->Free(); } } diff --git a/util/cache_test.cc b/util/cache_test.cc index 2fed4d867..e40317fd5 100644 --- a/util/cache_test.cc +++ b/util/cache_test.cc @@ -190,25 +190,30 @@ TEST(CacheTest, EntriesArePinned) { Insert(100, 101); Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); ASSERT_EQ(101, DecodeValue(cache_->Value(h1))); + ASSERT_EQ(1, cache_->GetUsage()); Insert(100, 102); Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); ASSERT_EQ(102, DecodeValue(cache_->Value(h2))); ASSERT_EQ(0U, deleted_keys_.size()); + ASSERT_EQ(2, cache_->GetUsage()); cache_->Release(h1); ASSERT_EQ(1U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); ASSERT_EQ(101, deleted_values_[0]); + ASSERT_EQ(1, cache_->GetUsage()); Erase(100); ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(1U, deleted_keys_.size()); + ASSERT_EQ(1, cache_->GetUsage()); cache_->Release(h2); ASSERT_EQ(2U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[1]); ASSERT_EQ(102, deleted_values_[1]); + ASSERT_EQ(0, cache_->GetUsage()); } TEST(CacheTest, EvictionPolicy) { @@ -273,76 +278,28 @@ TEST(CacheTest, EvictionPolicyRef) { cache_->Release(h204); } -TEST(CacheTest, EvictionPolicyRef2) { - std::vector handles; - - Insert(100, 101); - // Insert entries much more than Cache capacity - for (int i = 0; i < kCacheSize + 100; i++) { - Insert(1000 + i, 2000 + i); - if (i < kCacheSize ) { - handles.push_back(cache_->Lookup(EncodeKey(1000 + i))); - } - } - - // Make sure referenced keys are also possible to be deleted - // if there are not sufficient non-referenced keys - for (int i = 0; i < 5; i++) { - ASSERT_EQ(-1, Lookup(1000 + i)); - } +TEST(CacheTest, ErasedHandleState) { + // insert a key and get two handles + Insert(100, 1000); + Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); + Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); + ASSERT_EQ(h1, h2); + ASSERT_EQ(DecodeValue(cache_->Value(h1)), 1000); + ASSERT_EQ(DecodeValue(cache_->Value(h2)), 1000); - for (int i = kCacheSize; i < kCacheSize + 100; i++) { - ASSERT_EQ(2000 + i, Lookup(1000 + i)); - } + // delete the key from the cache + Erase(100); + // can no longer find in the cache ASSERT_EQ(-1, Lookup(100)); - // Cleaning up all the handles - while (handles.size() > 0) { - cache_->Release(handles.back()); - handles.pop_back(); - } -} - -TEST(CacheTest, EvictionPolicyRefLargeScanLimit) { - std::vector handles2; - - // Cache2 has a cache RemoveScanCountLimit higher than cache size - // so it would trigger a boundary condition. - - // Populate the cache with 10 more keys than its size. - // Reference all keys except one close to the end. - for (int i = 0; i < kCacheSize2 + 10; i++) { - Insert2(1000 + i, 2000+i); - if (i != kCacheSize2 ) { - handles2.push_back(cache2_->Lookup(EncodeKey(1000 + i))); - } - } - - // Make sure referenced keys are also possible to be deleted - // if there are not sufficient non-referenced keys - for (int i = 0; i < 3; i++) { - ASSERT_EQ(-1, Lookup2(1000 + i)); - } - // The non-referenced value is deleted even if it's accessed - // recently. - ASSERT_EQ(-1, Lookup2(1000 + kCacheSize2)); - // Other values recently accessed are not deleted since they - // are referenced. - for (int i = kCacheSize2 - 10; i < kCacheSize2 + 10; i++) { - if (i != kCacheSize2) { - ASSERT_EQ(2000 + i, Lookup2(1000 + i)); - } - } + // release one handle + cache_->Release(h1); + // still can't find in cache + ASSERT_EQ(-1, Lookup(100)); - // Cleaning up all the handles - while (handles2.size() > 0) { - cache2_->Release(handles2.back()); - handles2.pop_back(); - } + cache_->Release(h2); } - - TEST(CacheTest, HeavyEntries) { // Add a bunch of light and heavy entries and then count the combined // size of items still in the cache, which must be approximately the @@ -392,7 +349,7 @@ void deleter(const Slice& key, void* value) { } } // namespace -TEST(CacheTest, BadEviction) { +TEST(CacheTest, OverCapacity) { int n = 10; // a LRUCache with n entries and one shard only @@ -411,15 +368,32 @@ TEST(CacheTest, BadEviction) { std::string key = ToString(i+1); auto h = cache->Lookup(key); std::cout << key << (h?" found\n":" not found\n"); - // Only the first entry should be missing - ASSERT_TRUE(h || i == 0); + ASSERT_TRUE(h != nullptr); if (h) cache->Release(h); } + // the cache is over capacity since nothing could be evicted + ASSERT_EQ(n + 1, cache->GetUsage()); for (int i = 0; i < n+1; i++) { cache->Release(handles[i]); } - std::cout << "Poor entries\n"; + + // cache is under capacity now since elements were released + ASSERT_EQ(n, cache->GetUsage()); + + // element 0 is evicted and the rest is there + // This is consistent with the LRU policy since the element 0 + // was released first + for (int i = 0; i < n+1; i++) { + std::string key = ToString(i+1); + auto h = cache->Lookup(key); + if (h) { + ASSERT_NE(i, 0); + cache->Release(h); + } else { + ASSERT_EQ(i, 0); + } + } } namespace {