diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 2a4c0f77a..f833374e7 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -22,7 +22,7 @@ namespace rocksdb { -LRUHandleTable::LRUHandleTable() : length_(0), elems_(0), list_(nullptr) { +LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) { Resize(); } @@ -100,7 +100,7 @@ void LRUHandleTable::Resize() { } LRUCacheShard::LRUCacheShard() - : usage_(0), lru_usage_(0), high_pri_pool_usage_(0) { + : high_pri_pool_usage_(0), usage_(0), lru_usage_(0) { // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; @@ -233,6 +233,14 @@ void LRUCacheShard::EvictFromLRU(size_t charge, } } +void* LRUCacheShard::operator new(size_t size) { + return rocksdb::port::cacheline_aligned_alloc(size); +} + +void LRUCacheShard::operator delete(void *memblock) { + rocksdb::port::cacheline_aligned_free(memblock); +} + void LRUCacheShard::SetCapacity(size_t capacity) { autovector last_reference_list; { @@ -449,7 +457,14 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { num_shards_ = 1 << num_shard_bits; +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4316) // We've validated the alignment with the new operators +#endif shards_ = new LRUCacheShard[num_shards_]; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif SetCapacity(capacity); SetStrictCapacityLimit(strict_capacity_limit); for (int i = 0; i < num_shards_; i++) { diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 5fbe0f264..2fd44bbce 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -148,13 +148,13 @@ class LRUHandleTable { // The table consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. + LRUHandle** list_; uint32_t length_; uint32_t elems_; - LRUHandle** list_; }; // A single shard of sharded cache. -class LRUCacheShard : public CacheShard { +class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { public: LRUCacheShard(); virtual ~LRUCacheShard(); @@ -202,6 +202,11 @@ class LRUCacheShard : public CacheShard { // not threadsafe size_t TEST_GetLRUSize(); + // Overloading to aligned it to cache line size + void* operator new(size_t); + + void operator delete(void *); + private: void LRU_Remove(LRUHandle* e); void LRU_Insert(LRUHandle* e); @@ -223,12 +228,6 @@ class LRUCacheShard : public CacheShard { // Initialized before use. size_t capacity_; - // Memory size for entries residing in the cache - size_t usage_; - - // Memory size for entries residing only in the LRU list - size_t lru_usage_; - // Memory size for entries in high-pri pool. size_t high_pri_pool_usage_; @@ -242,11 +241,6 @@ class LRUCacheShard : public CacheShard { // Remember the value to avoid recomputing each time. double high_pri_pool_capacity_; - // mutex_ protects the following state. - // We don't count mutex_ as the cache's internal state so semantically we - // don't mind mutex_ invoking the non-const actions. - mutable port::Mutex mutex_; - // Dummy head of LRU list. // lru.prev is newest entry, lru.next is oldest entry. // LRU contains items which can be evicted, ie reference only by cache @@ -255,7 +249,29 @@ class LRUCacheShard : public CacheShard { // Pointer to head of low-pri pool in LRU list. LRUHandle* lru_low_pri_; + // ------------^^^^^^^^^^^^^----------- + // Not frequently modified data members + // ------------------------------------ + // + // We separate data members that are updated frequently from the ones that + // are not frequently updated so that they don't share the same cache line + // which will lead into false cache sharing + // + // ------------------------------------ + // Frequently modified data members + // ------------vvvvvvvvvvvvv----------- LRUHandleTable table_; + + // Memory size for entries residing in the cache + size_t usage_; + + // Memory size for entries residing only in the LRU list + size_t lru_usage_; + + // mutex_ protects the following state. + // We don't count mutex_ as the cache's internal state so semantically we + // don't mind mutex_ invoking the non-const actions. + mutable port::Mutex mutex_; }; class LRUCache : public ShardedCache { diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 87794fd16..1b83033c3 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -17,7 +17,16 @@ class LRUCacheTest : public testing::Test { ~LRUCacheTest() {} void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0) { - cache_.reset(new LRUCacheShard()); + cache_.reset( +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4316) // We've validated the alignment with the new operators +#endif + new LRUCacheShard() +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + ); cache_->SetCapacity(capacity); cache_->SetStrictCapacityLimit(false); cache_->SetHighPriorityPoolRatio(high_pri_pool_ratio); diff --git a/port/port_posix.cc b/port/port_posix.cc index 59241daff..ee073a55d 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -184,5 +184,22 @@ int GetMaxOpenFiles() { return -1; } +void *cacheline_aligned_alloc(size_t size) { +#if defined (_ISOC11_SOURCE) + return aligned_alloc(CACHE_LINE_SIZE, size); +#elif ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__)) + void *m; + errno = posix_memalign(&m, CACHE_LINE_SIZE, size); + return errno ? NULL : m; +#else + return malloc(size); +#endif +} + +void cacheline_aligned_free(void *memblock) { + free(memblock); +} + + } // namespace port } // namespace rocksdb diff --git a/port/port_posix.h b/port/port_posix.h index 72beb0409..fe0d42644 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -193,6 +193,13 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #endif #endif + +extern void *cacheline_aligned_alloc(size_t size); + +extern void cacheline_aligned_free(void *memblock); + +#define ALIGN_AS(n) alignas(n) + #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) extern void Crash(const std::string& srcfile, int srcline); diff --git a/port/win/port_win.h b/port/win/port_win.h index bbc5feec3..1ec090683 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -239,6 +240,23 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #define CACHE_LINE_SIZE 64U #endif + +inline void *cacheline_aligned_alloc(size_t size) { + return _aligned_malloc(CACHE_LINE_SIZE, size); +} + +inline void cacheline_aligned_free(void *memblock) { + _aligned_free(memblock); +} + +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52991 for MINGW32 +// could not be worked around with by -mno-ms-bitfields +#ifndef __MINGW32__ +#define ALIGN_AS(n) __declspec(align(n)) +#else +#define ALIGN_AS(n) +#endif + static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64) YieldProcessor();