diff --git a/.gitignore b/.gitignore index 6364dfdc4..180fb4c50 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ ldb manifest_dump sst_dump blob_dump +block_cache_trace_analyzer column_aware_encoding_exp util/build_version.cc build_tools/VALGRIND_LOGS/ diff --git a/cache/cache_test.cc b/cache/cache_test.cc index 46ce78db6..b728c67c7 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -562,6 +562,7 @@ TEST_P(CacheTest, SetStrictCapacityLimit) { ASSERT_OK(s); ASSERT_NE(nullptr, handles[i]); } + ASSERT_EQ(10, cache->GetUsage()); // test2: set the flag to true. Insert and check if it fails. std::string extra_key = "extra"; @@ -571,6 +572,7 @@ TEST_P(CacheTest, SetStrictCapacityLimit) { s = cache->Insert(extra_key, extra_value, 1, &deleter, &handle); ASSERT_TRUE(s.IsIncomplete()); ASSERT_EQ(nullptr, handle); + ASSERT_EQ(10, cache->GetUsage()); for (size_t i = 0; i < 10; i++) { cache->Release(handles[i]); @@ -591,7 +593,7 @@ TEST_P(CacheTest, SetStrictCapacityLimit) { s = cache2->Insert(extra_key, extra_value, 1, &deleter); // AS if the key have been inserted into cache but get evicted immediately. ASSERT_OK(s); - ASSERT_EQ(5, cache->GetUsage()); + ASSERT_EQ(5, cache2->GetUsage()); ASSERT_EQ(nullptr, cache2->Lookup(extra_key)); for (size_t i = 0; i < 5; i++) { diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 676bed305..7c04cb909 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -24,7 +24,7 @@ LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) { LRUHandleTable::~LRUHandleTable() { ApplyToAllCacheEntries([](LRUHandle* h) { - if (h->refs == 1) { + if (!h->HasRefs()) { h->Free(); } }); @@ -113,29 +113,17 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, SetCapacity(capacity); } -LRUCacheShard::~LRUCacheShard() {} - -bool LRUCacheShard::Unref(LRUHandle* e) { - assert(e->refs > 0); - e->refs--; - return e->refs == 0; -} - -// Call deleter and free - void LRUCacheShard::EraseUnRefEntries() { autovector last_reference_list; { MutexLock l(&mutex_); while (lru_.next != &lru_) { LRUHandle* old = lru_.next; - assert(old->InCache()); - assert(old->refs == - 1); // LRU list contains elements which may be evicted + // LRU list contains only elements which can be evicted + assert(old->InCache() && !old->HasRefs()); LRU_Remove(old); table_.Remove(old->key(), old->hash); old->SetInCache(false); - Unref(old); usage_ -= old->charge; last_reference_list.push_back(old); } @@ -148,22 +136,27 @@ void LRUCacheShard::EraseUnRefEntries() { void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) { + const auto applyCallback = [&]() { + table_.ApplyToAllCacheEntries( + [callback](LRUHandle* h) { callback(h->value, h->charge); }); + }; + if (thread_safe) { - mutex_.Lock(); - } - table_.ApplyToAllCacheEntries( - [callback](LRUHandle* h) { callback(h->value, h->charge); }); - if (thread_safe) { - mutex_.Unlock(); + MutexLock l(&mutex_); + applyCallback(); + } else { + applyCallback(); } } void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) { + MutexLock l(&mutex_); *lru = &lru_; *lru_low_pri = lru_low_pri_; } size_t LRUCacheShard::TEST_GetLRUSize() { + MutexLock l(&mutex_); LRUHandle* lru_handle = lru_.next; size_t lru_size = 0; while (lru_handle != &lru_) { @@ -231,14 +224,13 @@ void LRUCacheShard::MaintainPoolSize() { void LRUCacheShard::EvictFromLRU(size_t charge, autovector* deleted) { - while (usage_ + charge > capacity_ && lru_.next != &lru_) { + while ((usage_ + charge) > capacity_ && lru_.next != &lru_) { LRUHandle* old = lru_.next; - assert(old->InCache()); - assert(old->refs == 1); // LRU list contains elements which may be evicted + // LRU list contains only elements which can be evicted + assert(old->InCache() && !old->HasRefs()); LRU_Remove(old); table_.Remove(old->key(), old->hash); old->SetInCache(false); - Unref(old); usage_ -= old->charge; deleted->push_back(old); } @@ -252,8 +244,8 @@ void LRUCacheShard::SetCapacity(size_t capacity) { high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_; EvictFromLRU(0, &last_reference_list); } - // we free the entries here outside of mutex for - // performance reasons + + // Free the entries outside of mutex for performance reasons for (auto entry : last_reference_list) { entry->Free(); } @@ -269,22 +261,22 @@ Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) { LRUHandle* e = table_.Lookup(key, hash); if (e != nullptr) { assert(e->InCache()); - if (e->refs == 1) { + if (!e->HasRefs()) { + // The entry is in LRU since it's in hash and has no external references LRU_Remove(e); } - e->refs++; + e->Ref(); e->SetHit(); } return reinterpret_cast(e); } bool LRUCacheShard::Ref(Cache::Handle* h) { - LRUHandle* handle = reinterpret_cast(h); + LRUHandle* e = reinterpret_cast(h); MutexLock l(&mutex_); - if (handle->InCache() && handle->refs == 1) { - LRU_Remove(handle); - } - handle->refs++; + // To create another reference - entry must be already externally referenced + assert(e->HasRefs()); + e->Ref(); return true; } @@ -303,30 +295,27 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) { bool last_reference = false; { MutexLock l(&mutex_); - last_reference = Unref(e); - if (last_reference) { - usage_ -= e->charge; - } - if (e->refs == 1 && e->InCache()) { + last_reference = e->Unref(); + if (last_reference && e->InCache()) { // The item is still in cache, and nobody else holds a reference to it if (usage_ > capacity_ || force_erase) { - // the cache is full // The LRU list must be empty since the cache is full - assert(!(usage_ > capacity_) || lru_.next == &lru_); - // take this opportunity and remove the item + assert(lru_.next == &lru_ || force_erase); + // Take this opportunity and remove the item table_.Remove(e->key(), e->hash); e->SetInCache(false); - Unref(e); - usage_ -= e->charge; - last_reference = true; } else { - // put the item on the list to be potentially freed + // Put the item back on the LRU list, and don't free it LRU_Insert(e); + last_reference = false; } } + if (last_reference) { + usage_ -= e->charge; + } } - // free outside of mutex + // Free the entry here outside of mutex for performance reasons if (last_reference) { e->Free(); } @@ -342,7 +331,7 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, // It shouldn't happen very often though. LRUHandle* e = reinterpret_cast( new char[sizeof(LRUHandle) - 1 + key.size()]); - Status s; + Status s = Status::OK(); autovector last_reference_list; e->value = value; @@ -351,9 +340,7 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, e->key_length = key.size(); e->flags = 0; e->hash = hash; - e->refs = (handle == nullptr - ? 1 - : 2); // One from LRUCache, one for the returned handle + e->refs = 0; e->next = e->prev = nullptr; e->SetInCache(true); e->SetPriority(priority); @@ -366,11 +353,12 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, // is freed or the lru list is empty EvictFromLRU(charge, &last_reference_list); - if (usage_ - lru_usage_ + charge > capacity_ && + if ((usage_ + charge) > capacity_ && (strict_capacity_limit_ || handle == nullptr)) { if (handle == nullptr) { // Don't insert the entry but still return ok, as if the entry inserted // into cache and get evicted immediately. + e->SetInCache(false); last_reference_list.push_back(e); } else { delete[] reinterpret_cast(e); @@ -378,32 +366,30 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, s = Status::Incomplete("Insert failed due to LRU cache being full."); } } else { - // insert into the cache - // note that the cache might get larger than its capacity if not enough - // space was freed + // Insert into the cache. Note that the cache might get larger than its + // capacity if not enough space was freed up. LRUHandle* old = table_.Insert(e); usage_ += e->charge; if (old != nullptr) { + assert(old->InCache()); old->SetInCache(false); - if (Unref(old)) { - usage_ -= old->charge; - // old is on LRU because it's in cache and its reference count - // was just 1 (Unref returned 0) + if (!old->HasRefs()) { + // old is on LRU because it's in cache and its reference count is 0 LRU_Remove(old); + usage_ -= old->charge; last_reference_list.push_back(old); } } if (handle == nullptr) { LRU_Insert(e); } else { + e->Ref(); *handle = reinterpret_cast(e); } - s = Status::OK(); } } - // we free the entries here outside of mutex for - // performance reasons + // Free the entries here outside of mutex for performance reasons for (auto entry : last_reference_list) { entry->Free(); } @@ -418,18 +404,18 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { MutexLock l(&mutex_); e = table_.Remove(key, hash); if (e != nullptr) { - last_reference = Unref(e); - if (last_reference) { - usage_ -= e->charge; - } - if (last_reference && e->InCache()) { + assert(e->InCache()); + e->SetInCache(false); + if (!e->HasRefs()) { + // The entry is in LRU since it's in hash and has no external references LRU_Remove(e); + usage_ -= e->charge; + last_reference = true; } - e->SetInCache(false); } } - // mutex not held here + // Free the entry here outside of mutex for performance reasons // last_reference will only be true if e != nullptr if (last_reference) { e->Free(); diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 0d9a31748..1ff765d15 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -17,31 +17,34 @@ namespace rocksdb { -// LRU cache implementation +// LRU cache implementation. This class is not thread-safe. // An entry is a variable length heap-allocated structure. // Entries are referenced by cache and/or by any external entity. -// The cache keeps all its entries in table. Some elements +// The cache keeps all its entries in a hash table. Some elements // are also stored on LRU list. // // LRUHandle can be in these states: // 1. Referenced externally AND in hash table. -// In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true) -// 2. Not referenced externally and in hash table. In that case the entry is -// in the LRU and can be freed. (refs == 1 && in_cache == true) -// 3. Referenced externally and not in hash table. In that case the entry is -// in not on LRU and not in table. (refs >= 1 && in_cache == false) +// In that case the entry is *not* in the LRU list +// (refs >= 1 && in_cache == true) +// 2. Not referenced externally AND in hash table. +// In that case the entry is in the LRU list and can be freed. +// (refs == 0 && in_cache == true) +// 3. Referenced externally AND not in hash table. +// In that case the entry is not in the LRU list and not in hash table. +// The entry can be freed when refs becomes 0. +// (refs >= 1 && in_cache == false) // // All newly created LRUHandles are in state 1. If you call -// LRUCacheShard::Release -// on entry in state 1, it will go into state 2. To move from state 1 to -// state 3, either call LRUCacheShard::Erase or LRUCacheShard::Insert with the -// same key. +// LRUCacheShard::Release on entry in state 1, it will go into state 2. +// To move from state 1 to state 3, either call LRUCacheShard::Erase or +// LRUCacheShard::Insert with the same key (but possibly different value). // To move from state 2 to state 1, use LRUCacheShard::Lookup. // Before destruction, make sure that no handles are in state 1. This means // that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a -// matching -// RUCache::Release (to move into state 2) or LRUCacheShard::Erase (for state 3) +// matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase +// (to move into state 3). struct LRUHandle { void* value; @@ -51,37 +54,42 @@ struct LRUHandle { LRUHandle* prev; size_t charge; // TODO(opt): Only allow uint32_t? size_t key_length; - uint32_t refs; // a number of refs to this entry - // cache itself is counted as 1 - - // Include the following flags: - // IN_CACHE: whether this entry is referenced by the hash table. - // IS_HIGH_PRI: whether this entry is high priority entry. - // IN_HIGH_PRI_POOL: whether this entry is in high-pri pool. - // HAS_HIT: whether this entry has had any lookups (hits). + // The hash of key(). Used for fast sharding and comparisons. + uint32_t hash; + // The number of external refs to this entry. The cache itself is not counted. + uint32_t refs; + enum Flags : uint8_t { + // Whether this entry is referenced by the hash table. IN_CACHE = (1 << 0), + // Whether this entry is high priority entry. IS_HIGH_PRI = (1 << 1), + // Whether this entry is in high-pri pool. IN_HIGH_PRI_POOL = (1 << 2), + // Wwhether this entry has had any lookups (hits). HAS_HIT = (1 << 3), }; uint8_t flags; - uint32_t hash; // Hash of key(); used for fast sharding and comparisons + // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) + char key_data[1]; - char key_data[1]; // Beginning of key + Slice key() const { return Slice(key_data, key_length); } - Slice key() const { - // For cheaper lookups, we allow a temporary Handle object - // to store a pointer to a key in "value". - if (next == this) { - return *(reinterpret_cast(value)); - } else { - return Slice(key_data, key_length); - } + // Increase the reference count by 1. + void Ref() { refs++; } + + // Just reduce the reference count by 1. Return true if it was last reference. + bool Unref() { + assert(refs > 0); + refs--; + return refs == 0; } + // Return true if there are external refs, false otherwise. + bool HasRefs() const { return refs > 0; } + bool InCache() const { return flags & IN_CACHE; } bool IsHighPri() const { return flags & IS_HIGH_PRI; } bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; } @@ -114,7 +122,7 @@ struct LRUHandle { void SetHit() { flags |= HAS_HIT; } void Free() { - assert((refs == 1 && InCache()) || (refs == 0 && !InCache())); + assert(refs == 0); if (deleter) { (*deleter)(key(), value); } @@ -169,7 +177,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { public: LRUCacheShard(size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio, bool use_adaptive_mutex); - virtual ~LRUCacheShard(); + virtual ~LRUCacheShard() override = default; // Separate from constructor so caller can easily make an array of LRUCache // if current usage is more than new capacity, the function will attempt to @@ -225,10 +233,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { // high-pri pool is no larger than the size specify by high_pri_pool_pct. void MaintainPoolSize(); - // Just reduce the reference count by 1. - // Return true if last reference - bool Unref(LRUHandle* e); - // Free some space following strict LRU policy until enough space // to hold (usage_ + charge) is freed or the lru list is empty // This function is not thread safe - it needs to be executed while