LRUCache midpoint insertion

Summary:
Implement midpoint insertion strategy where new blocks will be insert to the middle of LRU list, then move the head on the first hit in cache.
Closes https://github.com/facebook/rocksdb/pull/3877

Differential Revision: D8100895

Pulled By: yiwu-arbug

fbshipit-source-id: f4bd83cb8be469e5d02072cfc8bd66011391f3da
main
Yi Wu 7 years ago committed by Facebook Github Bot
parent 3db8504cde
commit bc7e8d472e
  1. 1
      HISTORY.md
  2. 15
      cache/lru_cache.cc
  3. 15
      cache/lru_cache.h
  4. 52
      cache/lru_cache_test.cc
  5. 5
      db/db_block_cache_test.cc
  6. 9
      include/rocksdb/cache.h
  7. 46
      tools/db_bench_tool.cc

@ -2,6 +2,7 @@
## Unreleased ## Unreleased
### Public API Change ### Public API Change
* For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed. * For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed.
* With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents.
## 5.14.0 (5/16/2018) ## 5.14.0 (5/16/2018)
### Public API Change ### Public API Change

15
cache/lru_cache.cc vendored

@ -199,7 +199,7 @@ void LRUCacheShard::LRU_Remove(LRUHandle* e) {
void LRUCacheShard::LRU_Insert(LRUHandle* e) { void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr); assert(e->next == nullptr);
assert(e->prev == nullptr); assert(e->prev == nullptr);
if (high_pri_pool_ratio_ > 0 && e->IsHighPri()) { if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
// Inset "e" to head of LRU list. // Inset "e" to head of LRU list.
e->next = &lru_; e->next = &lru_;
e->prev = lru_.prev; e->prev = lru_.prev;
@ -246,18 +246,6 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
} }
} }
void* LRUCacheShard::operator new(size_t size) {
return port::cacheline_aligned_alloc(size);
}
void* LRUCacheShard::operator new(size_t /*size*/, void* ptr) { return ptr; }
void LRUCacheShard::operator delete(void *memblock) {
port::cacheline_aligned_free(memblock);
}
void LRUCacheShard::operator delete(void* /*memblock*/, void* /*ptr*/) {}
void LRUCacheShard::SetCapacity(size_t capacity) { void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list; autovector<LRUHandle*> last_reference_list;
{ {
@ -287,6 +275,7 @@ Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
LRU_Remove(e); LRU_Remove(e);
} }
e->refs++; e->refs++;
e->SetHit();
} }
return reinterpret_cast<Cache::Handle*>(e); return reinterpret_cast<Cache::Handle*>(e);
} }

15
cache/lru_cache.h vendored

@ -77,6 +77,7 @@ struct LRUHandle {
bool InCache() { return flags & 1; } bool InCache() { return flags & 1; }
bool IsHighPri() { return flags & 2; } bool IsHighPri() { return flags & 2; }
bool InHighPriPool() { return flags & 4; } bool InHighPriPool() { return flags & 4; }
bool HasHit() { return flags & 8; }
void SetInCache(bool in_cache) { void SetInCache(bool in_cache) {
if (in_cache) { if (in_cache) {
@ -102,6 +103,8 @@ struct LRUHandle {
} }
} }
void SetHit() { flags |= 8; }
void Free() { void Free() {
assert((refs == 1 && InCache()) || (refs == 0 && !InCache())); assert((refs == 1 && InCache()) || (refs == 0 && !InCache()));
if (deleter) { if (deleter) {
@ -206,18 +209,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
// Retrives high pri pool ratio // Retrives high pri pool ratio
double GetHighPriPoolRatio(); double GetHighPriPoolRatio();
// Overloading to aligned it to cache line size
// They are used by tests.
void* operator new(size_t);
// placement new
void* operator new(size_t, void*);
void operator delete(void *);
// placement delete, does nothing.
void operator delete(void*, void*);
private: private:
void LRU_Remove(LRUHandle* e); void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e); void LRU_Insert(LRUHandle* e);

@ -15,11 +15,22 @@ namespace rocksdb {
class LRUCacheTest : public testing::Test { class LRUCacheTest : public testing::Test {
public: public:
LRUCacheTest() {} LRUCacheTest() {}
~LRUCacheTest() {} ~LRUCacheTest() { DeleteCache(); }
void DeleteCache() {
if (cache_ != nullptr) {
cache_->~LRUCacheShard();
port::cacheline_aligned_free(cache_);
cache_ = nullptr;
}
}
void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0) { void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0) {
cache_.reset(new LRUCacheShard(capacity, false /*strict_capcity_limit*/, DeleteCache();
high_pri_pool_ratio)); cache_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
new (cache_) LRUCacheShard(capacity, false /*strict_capcity_limit*/,
high_pri_pool_ratio);
} }
void Insert(const std::string& key, void Insert(const std::string& key,
@ -75,7 +86,7 @@ class LRUCacheTest : public testing::Test {
} }
private: private:
std::unique_ptr<LRUCacheShard> cache_; LRUCacheShard* cache_ = nullptr;
}; };
TEST_F(LRUCacheTest, BasicLRU) { TEST_F(LRUCacheTest, BasicLRU) {
@ -104,6 +115,29 @@ TEST_F(LRUCacheTest, BasicLRU) {
ValidateLRUList({"e", "z", "d", "u", "v"}); ValidateLRUList({"e", "z", "d", "u", "v"});
} }
TEST_F(LRUCacheTest, MidpointInsertion) {
// Allocate 2 cache entries to high-pri pool.
NewCache(5, 0.45);
Insert("a", Cache::Priority::LOW);
Insert("b", Cache::Priority::LOW);
Insert("c", Cache::Priority::LOW);
Insert("x", Cache::Priority::HIGH);
Insert("y", Cache::Priority::HIGH);
ValidateLRUList({"a", "b", "c", "x", "y"}, 2);
// Low-pri entries inserted to the tail of low-pri list (the midpoint).
// After lookup, it will move to the tail of the full list.
Insert("d", Cache::Priority::LOW);
ValidateLRUList({"b", "c", "d", "x", "y"}, 2);
ASSERT_TRUE(Lookup("d"));
ValidateLRUList({"b", "c", "x", "y", "d"}, 2);
// High-pri entries will be inserted to the tail of full list.
Insert("z", Cache::Priority::HIGH);
ValidateLRUList({"c", "x", "y", "d", "z"}, 2);
}
TEST_F(LRUCacheTest, EntriesWithPriority) { TEST_F(LRUCacheTest, EntriesWithPriority) {
// Allocate 2 cache entries to high-pri pool. // Allocate 2 cache entries to high-pri pool.
NewCache(5, 0.45); NewCache(5, 0.45);
@ -130,15 +164,15 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
Insert("a", Cache::Priority::LOW); Insert("a", Cache::Priority::LOW);
ValidateLRUList({"v", "X", "a", "Y", "Z"}, 2); ValidateLRUList({"v", "X", "a", "Y", "Z"}, 2);
// Low-pri entries will be inserted to head of low-pri pool after lookup. // Low-pri entries will be inserted to head of high-pri pool after lookup.
ASSERT_TRUE(Lookup("v")); ASSERT_TRUE(Lookup("v"));
ValidateLRUList({"X", "a", "v", "Y", "Z"}, 2); ValidateLRUList({"X", "a", "Y", "Z", "v"}, 2);
// High-pri entries will be inserted to the head of the list after lookup. // High-pri entries will be inserted to the head of the list after lookup.
ASSERT_TRUE(Lookup("X")); ASSERT_TRUE(Lookup("X"));
ValidateLRUList({"a", "v", "Y", "Z", "X"}, 2); ValidateLRUList({"a", "Y", "Z", "v", "X"}, 2);
ASSERT_TRUE(Lookup("Z")); ASSERT_TRUE(Lookup("Z"));
ValidateLRUList({"a", "v", "Y", "X", "Z"}, 2); ValidateLRUList({"a", "Y", "v", "X", "Z"}, 2);
Erase("Y"); Erase("Y");
ValidateLRUList({"a", "v", "X", "Z"}, 2); ValidateLRUList({"a", "v", "X", "Z"}, 2);
@ -151,7 +185,7 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
Insert("g", Cache::Priority::LOW); Insert("g", Cache::Priority::LOW);
ValidateLRUList({"d", "e", "f", "g", "Z"}, 1); ValidateLRUList({"d", "e", "f", "g", "Z"}, 1);
ASSERT_TRUE(Lookup("d")); ASSERT_TRUE(Lookup("d"));
ValidateLRUList({"e", "f", "g", "d", "Z"}, 1); ValidateLRUList({"e", "f", "g", "Z", "d"}, 2);
} }
} // namespace rocksdb } // namespace rocksdb

@ -390,7 +390,10 @@ class MockCache : public LRUCache {
static uint32_t high_pri_insert_count; static uint32_t high_pri_insert_count;
static uint32_t low_pri_insert_count; static uint32_t low_pri_insert_count;
MockCache() : LRUCache(1 << 25, 0, false, 0.0) {} MockCache()
: LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/,
false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) {
}
virtual Status Insert(const Slice& key, void* value, size_t charge, virtual Status Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value), void (*deleter)(const Slice& key, void* value),

@ -47,6 +47,15 @@ struct LRUCacheOptions {
bool strict_capacity_limit = false; bool strict_capacity_limit = false;
// Percentage of cache reserved for high priority entries. // Percentage of cache reserved for high priority entries.
// If greater than zero, the LRU list will be split into a high-pri
// list and a low-pri list. High-pri entries will be insert to the
// tail of high-pri list, while low-pri entries will be first inserted to
// the low-pri list (the midpoint). This is refered to as
// midpoint insertion strategy to make entries never get hit in cache
// age out faster.
//
// See also
// BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority.
double high_pri_pool_ratio = 0.0; double high_pri_pool_ratio = 0.0;
LRUCacheOptions() {} LRUCacheOptions() {}

@ -100,13 +100,13 @@ DEFINE_string(
"readreverse," "readreverse,"
"compact," "compact,"
"compactall," "compactall,"
"readrandom,"
"multireadrandom," "multireadrandom,"
"readseq," "readseq,"
"readtocache," "readtocache,"
"readreverse," "readreverse,"
"readwhilewriting," "readwhilewriting,"
"readwhilemerging," "readwhilemerging,"
"readwhilescanning,"
"readrandomwriterandom," "readrandomwriterandom,"
"updaterandom," "updaterandom,"
"xorupdaterandom," "xorupdaterandom,"
@ -149,6 +149,8 @@ DEFINE_string(
"reads\n" "reads\n"
"\treadwhilemerging -- 1 merger, N threads doing random " "\treadwhilemerging -- 1 merger, N threads doing random "
"reads\n" "reads\n"
"\treadwhilescanning -- 1 thread doing full table scan, "
"N threads doing random reads\n"
"\treadrandomwriterandom -- N threads doing random-read, " "\treadrandomwriterandom -- N threads doing random-read, "
"random-write\n" "random-write\n"
"\tupdaterandom -- N threads doing read-modify-write for random " "\tupdaterandom -- N threads doing read-modify-write for random "
@ -2524,6 +2526,9 @@ void VerifyDBFromDB(std::string& truth_db_name) {
} else if (name == "readwhilemerging") { } else if (name == "readwhilemerging") {
num_threads++; // Add extra thread for writing num_threads++; // Add extra thread for writing
method = &Benchmark::ReadWhileMerging; method = &Benchmark::ReadWhileMerging;
} else if (name == "readwhilescanning") {
num_threads++; // Add extra thread for scaning
method = &Benchmark::ReadWhileScanning;
} else if (name == "readrandomwriterandom") { } else if (name == "readrandomwriterandom") {
method = &Benchmark::ReadRandomWriteRandom; method = &Benchmark::ReadRandomWriteRandom;
} else if (name == "readrandommergerandom") { } else if (name == "readrandommergerandom") {
@ -4507,6 +4512,45 @@ void VerifyDBFromDB(std::string& truth_db_name) {
thread->stats.AddBytes(bytes); thread->stats.AddBytes(bytes);
} }
void ReadWhileScanning(ThreadState* thread) {
if (thread->tid > 0) {
ReadRandom(thread);
} else {
BGScan(thread);
}
}
void BGScan(ThreadState* thread) {
if (FLAGS_num_multi_db > 0) {
fprintf(stderr, "Not supporting multiple DBs.\n");
abort();
}
assert(db_.db != nullptr);
ReadOptions read_options;
Iterator* iter = db_.db->NewIterator(read_options);
fprintf(stderr, "num reads to do %lu\n", reads_);
Duration duration(FLAGS_duration, reads_);
uint64_t num_seek_to_first = 0;
uint64_t num_next = 0;
while (!duration.Done(1)) {
if (!iter->Valid()) {
iter->SeekToFirst();
num_seek_to_first++;
} else if (!iter->status().ok()) {
fprintf(stderr, "Iterator error: %s\n",
iter->status().ToString().c_str());
abort();
} else {
iter->Next();
num_next++;
}
thread->stats.FinishedOps(&db_, db_.db, 1, kSeek);
}
delete iter;
}
// Given a key K and value V, this puts (K+"0", V), (K+"1", V), (K+"2", V) // Given a key K and value V, this puts (K+"0", V), (K+"1", V), (K+"2", V)
// in DB atomically i.e in a single batch. Also refer GetMany. // in DB atomically i.e in a single batch. Also refer GetMany.
Status PutMany(DB* db, const WriteOptions& writeoptions, const Slice& key, Status PutMany(DB* db, const WriteOptions& writeoptions, const Slice& key,

Loading…
Cancel
Save