From 1a59b6e2a97c9933d323bdeb379bb72c43dfc41c Mon Sep 17 00:00:00 2001 From: haoyuhuang Date: Thu, 11 Jul 2019 12:40:08 -0700 Subject: [PATCH] Cache simulator: Add a ghost cache for admission control and a hybrid row-block cache. (#5534) Summary: This PR adds a ghost cache for admission control. Specifically, it admits an entry on its second access. It also adds a hybrid row-block cache that caches the referenced key-value pairs of a Get/MultiGet request instead of its blocks. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5534 Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32 Differential Revision: D16101124 Pulled By: HaoyuHuang fbshipit-source-id: b99edda6418a888e94eb40f71ece45d375e234b1 --- CMakeLists.txt | 1 + Makefile | 4 + TARGETS | 5 + src.mk | 1 + tools/block_cache_trace_analyzer.cc | 29 +- tools/block_cache_trace_analyzer_test.cc | 11 +- trace_replay/block_cache_tracer.cc | 8 + trace_replay/block_cache_tracer.h | 1 + utilities/simulator_cache/cache_simulator.cc | 212 +++++++++-- utilities/simulator_cache/cache_simulator.h | 126 ++++++- .../simulator_cache/cache_simulator_test.cc | 337 ++++++++++++++++++ 11 files changed, 684 insertions(+), 51 deletions(-) create mode 100644 utilities/simulator_cache/cache_simulator_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 50e082662..c47f9811e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1006,6 +1006,7 @@ if(WITH_TESTS) utilities/options/options_util_test.cc utilities/persistent_cache/hash_table_test.cc utilities/persistent_cache/persistent_cache_test.cc + utilities/simulator_cache/cache_simulator_test.cc utilities/simulator_cache/sim_cache_test.cc utilities/table_properties_collectors/compact_on_deletion_collector_test.cc utilities/transactions/optimistic_transaction_test.cc diff --git a/Makefile b/Makefile index f1834e0ec..1828b833b 100644 --- a/Makefile +++ b/Makefile @@ -510,6 +510,7 @@ TESTS = \ cassandra_serialize_test \ ttl_test \ backupable_db_test \ + cache_simulator_test \ sim_cache_test \ version_edit_test \ version_set_test \ @@ -1321,6 +1322,9 @@ backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TE checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +cache_simulator_test: utilities/simulator_cache/cache_simulator_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/TARGETS b/TARGETS index 82e1d375d..6ef3da179 100644 --- a/TARGETS +++ b/TARGETS @@ -423,6 +423,11 @@ ROCKS_TESTS = [ "cache/cache_test.cc", "serial", ], + [ + "cache_simulator_test", + "utilities/simulator_cache/cache_simulator_test.cc", + "serial", + ], [ "cassandra_format_test", "utilities/cassandra/cassandra_format_test.cc", diff --git a/src.mk b/src.mk index 8b0122dbe..bc49b7ce0 100644 --- a/src.mk +++ b/src.mk @@ -405,6 +405,7 @@ MAIN_SOURCES = \ utilities/object_registry_test.cc \ utilities/option_change_migration/option_change_migration_test.cc \ utilities/options/options_util_test.cc \ + utilities/simulator_cache/cache_simulator_test.cc \ utilities/simulator_cache/sim_cache_test.cc \ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \ utilities/transactions/optimistic_transaction_test.cc \ diff --git a/tools/block_cache_trace_analyzer.cc b/tools/block_cache_trace_analyzer.cc index 4770348a7..bd8d8971b 100644 --- a/tools/block_cache_trace_analyzer.cc +++ b/tools/block_cache_trace_analyzer.cc @@ -23,9 +23,12 @@ DEFINE_string( block_cache_sim_config_path, "", "The config file path. One cache configuration per line. The format of a " "cache configuration is " - "cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " - "cache_name is lru or lru_priority. cache_capacity can be xK, xM or xG " - "where x is a positive number."); + "cache_name,num_shard_bits,ghost_capacity,cache_capacity_1,...,cache_" + "capacity_N. Supported cache names are lru, lru_priority, lru_hybrid, and " + "lru_hybrid_no_insert_on_row_miss. User may also add a prefix 'ghost_' to " + "a cache_name to add a ghost cache in front of the real cache. " + "ghost_capacity and cache_capacity can be xK, xM or xG where x is a " + "positive number."); DEFINE_int32(block_cache_trace_downsample_ratio, 1, "The trace collected accesses on one in every " "block_cache_trace_downsample_ratio blocks. We scale " @@ -104,6 +107,10 @@ const std::string kGroupbyAll = "all"; const std::set kGroupbyLabels{ kGroupbyBlock, kGroupbyColumnFamily, kGroupbySSTFile, kGroupbyLevel, kGroupbyBlockType, kGroupbyCaller, kGroupbyAll}; +const std::string kSupportedCacheNames = + " lru ghost_lru lru_priority ghost_lru_priority lru_hybrid " + "ghost_lru_hybrid lru_hybrid_no_insert_on_row_miss " + "ghost_lru_hybrid_no_insert_on_row_miss "; std::string block_type_to_string(TraceType type) { switch (type) { @@ -194,7 +201,8 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const { } // Write header. const std::string header = - "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses"; + "cache_name,num_shard_bits,ghost_capacity,capacity,miss_ratio,total_" + "accesses"; out << header << std::endl; for (auto const& config_caches : cache_simulator_->sim_caches()) { const CacheConfiguration& config = config_caches.first; @@ -205,6 +213,8 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const { out << ","; out << config.num_shard_bits; out << ","; + out << config.ghost_cache_capacity; + out << ","; out << config.cache_capacities[i]; out << ","; out << std::fixed << std::setprecision(4) << miss_ratio; @@ -993,18 +1003,21 @@ std::vector parse_cache_config_file( config_strs.push_back(substr); } // Sanity checks. - if (config_strs.size() < 3) { + if (config_strs.size() < 4) { fprintf(stderr, "Invalid cache simulator configuration %s\n", line.c_str()); exit(1); } - if (config_strs[0] != "lru") { - fprintf(stderr, "We only support LRU cache %s\n", line.c_str()); + if (kSupportedCacheNames.find(" " + config_strs[0] + " ") == + std::string::npos) { + fprintf(stderr, "Invalid cache name %s. Supported cache names are %s\n", + line.c_str(), kSupportedCacheNames.c_str()); exit(1); } cache_config.cache_name = config_strs[0]; cache_config.num_shard_bits = ParseUint32(config_strs[1]); - for (uint32_t i = 2; i < config_strs.size(); i++) { + cache_config.ghost_cache_capacity = ParseUint64(config_strs[2]); + for (uint32_t i = 3; i < config_strs.size(); i++) { uint64_t capacity = ParseUint64(config_strs[i]); if (capacity == 0) { fprintf(stderr, "Invalid cache capacity %s, %s\n", diff --git a/tools/block_cache_trace_analyzer_test.cc b/tools/block_cache_trace_analyzer_test.cc index 21d8bcbbb..efb202cb4 100644 --- a/tools/block_cache_trace_analyzer_test.cc +++ b/tools/block_cache_trace_analyzer_test.cc @@ -205,7 +205,7 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) { } { // Generate a cache sim config. - std::string config = "lru,1,1K,1M,1G"; + std::string config = "lru,1,0,1K,1M,1G"; std::ofstream out(block_cache_sim_config_path_); ASSERT_TRUE(out.is_open()); out << config << std::endl; @@ -230,14 +230,15 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) { getline(ss, substr, ','); result_strs.push_back(substr); } - ASSERT_EQ(5, result_strs.size()); + ASSERT_EQ(6, result_strs.size()); ASSERT_LT(config_index, expected_capacities.size()); ASSERT_EQ("lru", result_strs[0]); // cache_name ASSERT_EQ("1", result_strs[1]); // num_shard_bits + ASSERT_EQ("0", result_strs[2]); // ghost_cache_capacity ASSERT_EQ(std::to_string(expected_capacities[config_index]), - result_strs[2]); // cache_capacity - ASSERT_EQ("100.0000", result_strs[3]); // miss_ratio - ASSERT_EQ("50", result_strs[4]); // number of accesses. + result_strs[3]); // cache_capacity + ASSERT_EQ("100.0000", result_strs[4]); // miss_ratio + ASSERT_EQ("50", result_strs[5]); // number of accesses. config_index++; } ASSERT_EQ(expected_capacities.size(), config_index); diff --git a/trace_replay/block_cache_tracer.cc b/trace_replay/block_cache_tracer.cc index 115a75d92..62db94204 100644 --- a/trace_replay/block_cache_tracer.cc +++ b/trace_replay/block_cache_tracer.cc @@ -45,6 +45,14 @@ bool BlockCacheTraceHelper::ShouldTraceGetId(TableReaderCaller caller) { caller == TableReaderCaller::kUserMultiGet; } +bool BlockCacheTraceHelper::IsUserAccess(TableReaderCaller caller) { + return caller == TableReaderCaller::kUserGet || + caller == TableReaderCaller::kUserMultiGet || + caller == TableReaderCaller::kUserIterator || + caller == TableReaderCaller::kUserApproximateSize || + caller == TableReaderCaller::kUserVerifyChecksum; +} + BlockCacheTraceWriter::BlockCacheTraceWriter( Env* env, const TraceOptions& trace_options, std::unique_ptr&& trace_writer) diff --git a/trace_replay/block_cache_tracer.h b/trace_replay/block_cache_tracer.h index 4788a3f44..66cbb5ade 100644 --- a/trace_replay/block_cache_tracer.h +++ b/trace_replay/block_cache_tracer.h @@ -23,6 +23,7 @@ class BlockCacheTraceHelper { static bool ShouldTraceReferencedKey(TraceType block_type, TableReaderCaller caller); static bool ShouldTraceGetId(TableReaderCaller caller); + static bool IsUserAccess(TableReaderCaller caller); static const std::string kUnknownColumnFamilyName; static const uint64_t kReservedGetId; diff --git a/utilities/simulator_cache/cache_simulator.cc b/utilities/simulator_cache/cache_simulator.cc index 65f626036..ebfc4cd0e 100644 --- a/utilities/simulator_cache/cache_simulator.cc +++ b/utilities/simulator_cache/cache_simulator.cc @@ -4,42 +4,177 @@ // (found in the LICENSE.Apache file in the root directory). #include "utilities/simulator_cache/cache_simulator.h" +#include "db/dbformat.h" namespace rocksdb { -CacheSimulator::CacheSimulator(std::shared_ptr sim_cache) + +namespace { +const std::string kGhostCachePrefix = "ghost_"; +} + +GhostCache::GhostCache(std::shared_ptr sim_cache) : sim_cache_(sim_cache) {} +bool GhostCache::Admit(const Slice& lookup_key) { + auto handle = sim_cache_->Lookup(lookup_key); + if (handle != nullptr) { + sim_cache_->Release(handle); + return true; + } + sim_cache_->Insert(lookup_key, /*value=*/nullptr, lookup_key.size(), + /*deleter=*/nullptr, /*handle=*/nullptr); + return false; +} + +CacheSimulator::CacheSimulator(std::unique_ptr&& ghost_cache, + std::shared_ptr sim_cache) + : ghost_cache_(std::move(ghost_cache)), sim_cache_(sim_cache) {} + void CacheSimulator::Access(const BlockCacheTraceRecord& access) { + bool admit = true; + const bool is_user_access = + BlockCacheTraceHelper::IsUserAccess(access.caller); + bool is_cache_miss = true; + if (ghost_cache_ && access.no_insert == Boolean::kFalse) { + admit = ghost_cache_->Admit(access.block_key); + } auto handle = sim_cache_->Lookup(access.block_key); - if (handle == nullptr && !access.no_insert) { - sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, - /*deleter=*/nullptr, /*handle=*/nullptr); + if (handle != nullptr) { + sim_cache_->Release(handle); + is_cache_miss = false; + } else { + if (access.no_insert == Boolean::kFalse && admit) { + sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, + /*deleter=*/nullptr, /*handle=*/nullptr); + } } + UpdateMetrics(is_user_access, is_cache_miss); } -void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) { - auto handle = sim_cache_->Lookup(access.block_key); - if (handle == nullptr && !access.no_insert) { - Cache::Priority priority = Cache::Priority::LOW; - if (access.block_type == TraceType::kBlockTraceFilterBlock || - access.block_type == TraceType::kBlockTraceIndexBlock || - access.block_type == TraceType::kBlockTraceUncompressionDictBlock) { - priority = Cache::Priority::HIGH; +void CacheSimulator::UpdateMetrics(bool is_user_access, bool is_cache_miss) { + num_accesses_ += 1; + if (is_cache_miss) { + num_misses_ += 1; + } + if (is_user_access) { + user_accesses_ += 1; + if (is_cache_miss) { + user_misses_ += 1; } - sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, + } +} + +Cache::Priority PrioritizedCacheSimulator::ComputeBlockPriority( + const BlockCacheTraceRecord& access) const { + if (access.block_type == TraceType::kBlockTraceFilterBlock || + access.block_type == TraceType::kBlockTraceIndexBlock || + access.block_type == TraceType::kBlockTraceUncompressionDictBlock) { + return Cache::Priority::HIGH; + } + return Cache::Priority::LOW; +} + +void PrioritizedCacheSimulator::AccessKVPair( + const Slice& key, uint64_t value_size, Cache::Priority priority, + bool no_insert, bool is_user_access, bool* is_cache_miss, bool* admitted, + bool update_metrics) { + assert(is_cache_miss); + assert(admitted); + *is_cache_miss = true; + *admitted = true; + if (ghost_cache_ && !no_insert) { + *admitted = ghost_cache_->Admit(key); + } + auto handle = sim_cache_->Lookup(key); + if (handle != nullptr) { + sim_cache_->Release(handle); + *is_cache_miss = false; + } else if (!no_insert && *admitted && value_size > 0) { + sim_cache_->Insert(key, /*value=*/nullptr, value_size, /*deleter=*/nullptr, /*handle=*/nullptr, priority); } + if (update_metrics) { + UpdateMetrics(is_user_access, *is_cache_miss); + } } -double CacheSimulator::miss_ratio() { - uint64_t hits = sim_cache_->get_hit_counter(); - uint64_t misses = sim_cache_->get_miss_counter(); - uint64_t accesses = hits + misses; - return static_cast(misses * 100.0 / accesses); +void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) { + bool is_cache_miss = true; + bool admitted = true; + AccessKVPair(access.block_key, access.block_size, + ComputeBlockPriority(access), access.no_insert, + BlockCacheTraceHelper::IsUserAccess(access.caller), + &is_cache_miss, &admitted, /*update_metrics=*/true); } -uint64_t CacheSimulator::total_accesses() { - return sim_cache_->get_hit_counter() + sim_cache_->get_miss_counter(); +std::string HybridRowBlockCacheSimulator::ComputeRowKey( + const BlockCacheTraceRecord& access) { + assert(access.get_id != BlockCacheTraceHelper::kReservedGetId); + Slice key; + if (access.referenced_key_exist_in_block == Boolean::kTrue) { + key = ExtractUserKey(access.referenced_key); + } else { + key = access.referenced_key; + } + return std::to_string(access.sst_fd_number) + "_" + key.ToString(); +} + +void HybridRowBlockCacheSimulator::Access(const BlockCacheTraceRecord& access) { + bool is_cache_miss = true; + bool admitted = true; + if (access.get_id != BlockCacheTraceHelper::kReservedGetId) { + // This is a Get/MultiGet request. + const std::string& row_key = ComputeRowKey(access); + if (getid_getkeys_map_[access.get_id].find(row_key) == + getid_getkeys_map_[access.get_id].end()) { + // This is the first time that this key is accessed. Look up the key-value + // pair first. Do not update the miss/accesses metrics here since it will + // be updated later. + AccessKVPair(row_key, access.referenced_data_size, Cache::Priority::HIGH, + /*no_insert=*/false, + /*is_user_access=*/true, &is_cache_miss, &admitted, + /*update_metrics=*/false); + InsertResult result = InsertResult::NO_INSERT; + if (admitted && access.referenced_data_size > 0) { + result = InsertResult::INSERTED; + } else if (admitted) { + result = InsertResult::ADMITTED; + } + getid_getkeys_map_[access.get_id][row_key] = + std::make_pair(is_cache_miss, result); + } + std::pair miss_inserted = + getid_getkeys_map_[access.get_id][row_key]; + if (!miss_inserted.first) { + // This is a cache hit. Skip future accesses to its index/filter/data + // blocks. These block lookups are unnecessary if we observe a hit for the + // referenced key-value pair already. Thus, we treat these lookups as + // hits. This is also to ensure the total number of accesses are the same + // when comparing to other policies. + UpdateMetrics(/*is_user_access=*/true, /*is_cache_miss=*/false); + return; + } + // The key-value pair observes a cache miss. We need to access its + // index/filter/data blocks. + AccessKVPair( + access.block_key, access.block_type, ComputeBlockPriority(access), + /*no_insert=*/!insert_blocks_upon_row_kvpair_miss_ || access.no_insert, + /*is_user_access=*/true, &is_cache_miss, &admitted, + /*update_metrics=*/true); + if (access.referenced_data_size > 0 && + miss_inserted.second == InsertResult::ADMITTED) { + sim_cache_->Insert( + row_key, /*value=*/nullptr, access.referenced_data_size, + /*deleter=*/nullptr, /*handle=*/nullptr, Cache::Priority::HIGH); + getid_getkeys_map_[access.get_id][row_key] = + std::make_pair(true, InsertResult::INSERTED); + } + return; + } + AccessKVPair(access.block_key, access.block_size, + ComputeBlockPriority(access), access.no_insert, + BlockCacheTraceHelper::IsUserAccess(access.caller), + &is_cache_miss, &admitted, /*update_metrics=*/true); } BlockCacheTraceSimulator::BlockCacheTraceSimulator( @@ -56,18 +191,41 @@ Status BlockCacheTraceSimulator::InitializeCaches() { // 1/'downsample_ratio' blocks. uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_; std::shared_ptr sim_cache; - if (config.cache_name == "lru") { - sim_cache = std::make_shared(NewSimCache( + std::unique_ptr ghost_cache; + std::string cache_name = config.cache_name; + if (cache_name.find(kGhostCachePrefix) != std::string::npos) { + ghost_cache.reset(new GhostCache( + NewLRUCache(config.ghost_cache_capacity, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + cache_name = cache_name.substr(kGhostCachePrefix.size()); + } + if (cache_name == "lru") { + sim_cache = std::make_shared( + std::move(ghost_cache), NewLRUCache(simulate_cache_capacity, config.num_shard_bits, /*strict_capacity_limit=*/false, - /*high_pri_pool_ratio=*/0), - /*real_cache=*/nullptr, config.num_shard_bits)); - } else if (config.cache_name == "lru_priority") { - sim_cache = std::make_shared(NewSimCache( + /*high_pri_pool_ratio=*/0)); + } else if (cache_name == "lru_priority") { + sim_cache = std::make_shared( + std::move(ghost_cache), + NewLRUCache(simulate_cache_capacity, config.num_shard_bits, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0.5)); + } else if (cache_name == "lru_hybrid") { + sim_cache = std::make_shared( + std::move(ghost_cache), + NewLRUCache(simulate_cache_capacity, config.num_shard_bits, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0.5), + /*insert_blocks_upon_row_kvpair_miss=*/true); + } else if (cache_name == "lru_hybrid_no_insert_on_row_miss") { + sim_cache = std::make_shared( + std::move(ghost_cache), NewLRUCache(simulate_cache_capacity, config.num_shard_bits, /*strict_capacity_limit=*/false, /*high_pri_pool_ratio=*/0.5), - /*real_cache=*/nullptr, config.num_shard_bits)); + /*insert_blocks_upon_row_kvpair_miss=*/false); } else { // Not supported. return Status::InvalidArgument("Unknown cache name " + diff --git a/utilities/simulator_cache/cache_simulator.h b/utilities/simulator_cache/cache_simulator.h index b391d5dc8..b6667eeed 100644 --- a/utilities/simulator_cache/cache_simulator.h +++ b/utilities/simulator_cache/cache_simulator.h @@ -5,7 +5,6 @@ #pragma once -#include "rocksdb/utilities/sim_cache.h" #include "trace_replay/block_cache_tracer.h" namespace rocksdb { @@ -14,22 +13,46 @@ namespace rocksdb { struct CacheConfiguration { std::string cache_name; // LRU. uint32_t num_shard_bits; + uint64_t ghost_cache_capacity; // ghost cache capacity in bytes. std::vector cache_capacities; // simulate cache capacities in bytes. - bool operator=(const CacheConfiguration& o) const { - return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits; + bool operator==(const CacheConfiguration& o) const { + return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits && + ghost_cache_capacity == o.ghost_cache_capacity; } bool operator<(const CacheConfiguration& o) const { return cache_name < o.cache_name || - (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits); + (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits) || + (cache_name == o.cache_name && num_shard_bits == o.num_shard_bits && + ghost_cache_capacity < o.ghost_cache_capacity); } }; +// A ghost cache admits an entry on its second access. +class GhostCache { + public: + explicit GhostCache(std::shared_ptr sim_cache); + ~GhostCache() = default; + // No copy and move. + GhostCache(const GhostCache&) = delete; + GhostCache& operator=(const GhostCache&) = delete; + GhostCache(GhostCache&&) = delete; + GhostCache& operator=(GhostCache&&) = delete; + + // Returns true if the lookup_key is in the ghost cache. + // Returns false otherwise. + bool Admit(const Slice& lookup_key); + + private: + std::shared_ptr sim_cache_; +}; + // A cache simulator that runs against a block cache trace. class CacheSimulator { public: - CacheSimulator(std::shared_ptr sim_cache); + CacheSimulator(std::unique_ptr&& ghost_cache, + std::shared_ptr sim_cache); virtual ~CacheSimulator() = default; // No copy and move. CacheSimulator(const CacheSimulator&) = delete; @@ -38,12 +61,37 @@ class CacheSimulator { CacheSimulator& operator=(CacheSimulator&&) = delete; virtual void Access(const BlockCacheTraceRecord& access); - void reset_counter() { sim_cache_->reset_counter(); } - double miss_ratio(); - uint64_t total_accesses(); + void reset_counter() { + num_misses_ = 0; + num_accesses_ = 0; + user_accesses_ = 0; + user_misses_ = 0; + } + double miss_ratio() const { + if (num_accesses_ == 0) { + return -1; + } + return static_cast(num_misses_ * 100.0 / num_accesses_); + } + uint64_t total_accesses() const { return num_accesses_; } + + double user_miss_ratio() const { + if (user_accesses_ == 0) { + return -1; + } + return static_cast(user_misses_ * 100.0 / user_accesses_); + } + uint64_t user_accesses() const { return user_accesses_; } protected: - std::shared_ptr sim_cache_; + void UpdateMetrics(bool is_user_access, bool is_cache_miss); + + std::unique_ptr ghost_cache_; + std::shared_ptr sim_cache_; + uint64_t num_accesses_ = 0; + uint64_t num_misses_ = 0; + uint64_t user_accesses_ = 0; + uint64_t user_misses_ = 0; }; // A prioritized cache simulator that runs against a block cache trace. @@ -51,9 +99,65 @@ class CacheSimulator { // priority in the cache. class PrioritizedCacheSimulator : public CacheSimulator { public: - PrioritizedCacheSimulator(std::shared_ptr sim_cache) - : CacheSimulator(sim_cache) {} + PrioritizedCacheSimulator(std::unique_ptr&& ghost_cache, + std::shared_ptr sim_cache) + : CacheSimulator(std::move(ghost_cache), sim_cache) {} void Access(const BlockCacheTraceRecord& access) override; + + protected: + // Access the key-value pair and returns true upon a cache miss. + void AccessKVPair(const Slice& key, uint64_t value_size, + Cache::Priority priority, bool no_insert, + bool is_user_access, bool* is_cache_miss, bool* admitted, + bool update_metrics); + + Cache::Priority ComputeBlockPriority( + const BlockCacheTraceRecord& access) const; +}; + +// A hybrid row and block cache simulator. It looks up/inserts key-value pairs +// referenced by Get/MultiGet requests, and not their accessed index/filter/data +// blocks. +// +// Upon a Get/MultiGet request, it looks up the referenced key first. +// If it observes a cache hit, future block accesses on this key-value pair is +// skipped since the request is served already. Otherwise, it continues to look +// up/insert its index/filter/data blocks. It also inserts the referenced +// key-value pair in the cache for future lookups. +class HybridRowBlockCacheSimulator : public PrioritizedCacheSimulator { + public: + HybridRowBlockCacheSimulator(std::unique_ptr&& ghost_cache, + std::shared_ptr sim_cache, + bool insert_blocks_upon_row_kvpair_miss) + : PrioritizedCacheSimulator(std::move(ghost_cache), sim_cache), + insert_blocks_upon_row_kvpair_miss_( + insert_blocks_upon_row_kvpair_miss) {} + void Access(const BlockCacheTraceRecord& access) override; + + private: + // Row key is a concatenation of the access's fd_number and the referenced + // user key. + // TODO(haoyu): the row key should contain sequence number. + std::string ComputeRowKey(const BlockCacheTraceRecord& access); + + enum InsertResult : char { + INSERTED, + ADMITTED, + NO_INSERT, + }; + + // A map stores get_id to a map of row keys. For each row key, it stores a + // boolean and an enum. The first bool is true when we observe a miss upon the + // first time we encounter the row key. The second arg is INSERTED when the + // kv-pair has been inserted into the cache, ADMITTED if it should be inserted + // but haven't been, NO_INSERT if it should not be inserted. + // + // A kv-pair is in ADMITTED state when we encounter this kv-pair but do not + // know its size. This may happen if the first access on the referenced key is + // an index/filter block. + std::map>> + getid_getkeys_map_; + bool insert_blocks_upon_row_kvpair_miss_; }; // A block cache simulator that reports miss ratio curves given a set of cache diff --git a/utilities/simulator_cache/cache_simulator_test.cc b/utilities/simulator_cache/cache_simulator_test.cc new file mode 100644 index 000000000..fb0c9e849 --- /dev/null +++ b/utilities/simulator_cache/cache_simulator_test.cc @@ -0,0 +1,337 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "utilities/simulator_cache/cache_simulator.h" + +#include +#include "rocksdb/env.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" + +namespace rocksdb { +namespace { +const std::string kBlockKeyPrefix = "test-block-"; +const std::string kRefKeyPrefix = "test-get-"; +const uint64_t kGetId = 1; +const uint64_t kGetBlockId = 100; +const uint64_t kCompactionBlockId = 1000; +const uint64_t kCacheSize = 1024 * 1024 * 1024; +const uint64_t kGhostCacheSize = 1024 * 1024; +} // namespace + +class CacheSimulatorTest : public testing::Test { + public: + const size_t kNumBlocks = 5; + const size_t kValueSize = 1000; + + CacheSimulatorTest() { env_ = rocksdb::Env::Default(); } + + BlockCacheTraceRecord GenerateGetRecord(uint64_t getid) { + BlockCacheTraceRecord record; + record.block_type = TraceType::kBlockTraceDataBlock; + record.block_size = 4096; + record.block_key = kBlockKeyPrefix + std::to_string(kGetBlockId); + record.access_timestamp = env_->NowMicros(); + record.cf_id = 0; + record.cf_name = "test"; + record.caller = TableReaderCaller::kUserGet; + record.level = 6; + record.sst_fd_number = kGetBlockId; + record.get_id = getid; + record.is_cache_hit = Boolean::kFalse; + record.no_insert = Boolean::kFalse; + record.referenced_key = + kRefKeyPrefix + std::to_string(kGetId) + std::string(8, 'c'); + record.referenced_key_exist_in_block = Boolean::kTrue; + record.referenced_data_size = 100; + record.num_keys_in_block = 300; + return record; + } + + BlockCacheTraceRecord GenerateCompactionRecord() { + BlockCacheTraceRecord record; + record.block_type = TraceType::kBlockTraceDataBlock; + record.block_size = 4096; + record.block_key = kBlockKeyPrefix + std::to_string(kCompactionBlockId); + record.access_timestamp = env_->NowMicros(); + record.cf_id = 0; + record.cf_name = "test"; + record.caller = TableReaderCaller::kCompaction; + record.level = 6; + record.sst_fd_number = kCompactionBlockId; + record.is_cache_hit = Boolean::kFalse; + record.no_insert = Boolean::kTrue; + return record; + } + + Env* env_; +}; + +TEST_F(CacheSimulatorTest, GhostCache) { + const std::string key1 = "test1"; + const std::string key2 = "test2"; + std::unique_ptr ghost_cache(new GhostCache( + NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + EXPECT_FALSE(ghost_cache->Admit(key1)); + EXPECT_TRUE(ghost_cache->Admit(key1)); + EXPECT_TRUE(ghost_cache->Admit(key1)); + EXPECT_FALSE(ghost_cache->Admit(key2)); + EXPECT_TRUE(ghost_cache->Admit(key2)); +} + +TEST_F(CacheSimulatorTest, CacheSimulator) { + const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId); + const BlockCacheTraceRecord& compaction_access = GenerateCompactionRecord(); + std::shared_ptr sim_cache = + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0); + std::unique_ptr cache_simulator( + new CacheSimulator(nullptr, sim_cache)); + cache_simulator->Access(access); + cache_simulator->Access(access); + ASSERT_EQ(2, cache_simulator->total_accesses()); + ASSERT_EQ(50, cache_simulator->miss_ratio()); + ASSERT_EQ(2, cache_simulator->user_accesses()); + ASSERT_EQ(50, cache_simulator->user_miss_ratio()); + + cache_simulator->Access(compaction_access); + cache_simulator->Access(compaction_access); + ASSERT_EQ(4, cache_simulator->total_accesses()); + ASSERT_EQ(75, cache_simulator->miss_ratio()); + ASSERT_EQ(2, cache_simulator->user_accesses()); + ASSERT_EQ(50, cache_simulator->user_miss_ratio()); + + cache_simulator->reset_counter(); + ASSERT_EQ(0, cache_simulator->total_accesses()); + ASSERT_EQ(-1, cache_simulator->miss_ratio()); + auto handle = sim_cache->Lookup(access.block_key); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); + handle = sim_cache->Lookup(compaction_access.block_key); + ASSERT_EQ(nullptr, handle); +} + +TEST_F(CacheSimulatorTest, GhostCacheSimulator) { + const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId); + std::unique_ptr ghost_cache(new GhostCache( + NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + std::unique_ptr cache_simulator(new CacheSimulator( + std::move(ghost_cache), + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + cache_simulator->Access(access); + cache_simulator->Access(access); + ASSERT_EQ(2, cache_simulator->total_accesses()); + // Both of them will be miss since we have a ghost cache. + ASSERT_EQ(100, cache_simulator->miss_ratio()); +} + +TEST_F(CacheSimulatorTest, PrioritizedCacheSimulator) { + const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId); + std::shared_ptr sim_cache = + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0); + std::unique_ptr cache_simulator( + new PrioritizedCacheSimulator(nullptr, sim_cache)); + cache_simulator->Access(access); + cache_simulator->Access(access); + ASSERT_EQ(2, cache_simulator->total_accesses()); + ASSERT_EQ(50, cache_simulator->miss_ratio()); + + auto handle = sim_cache->Lookup(access.block_key); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); +} + +TEST_F(CacheSimulatorTest, GhostPrioritizedCacheSimulator) { + const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId); + std::unique_ptr ghost_cache(new GhostCache( + NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + std::unique_ptr cache_simulator( + new PrioritizedCacheSimulator( + std::move(ghost_cache), + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + cache_simulator->Access(access); + cache_simulator->Access(access); + ASSERT_EQ(2, cache_simulator->total_accesses()); + // Both of them will be miss since we have a ghost cache. + ASSERT_EQ(100, cache_simulator->miss_ratio()); +} + +TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulator) { + uint64_t block_id = 100; + BlockCacheTraceRecord first_get = GenerateGetRecord(kGetId); + BlockCacheTraceRecord second_get = GenerateGetRecord(kGetId + 1); + second_get.referenced_data_size = 0; + second_get.referenced_key_exist_in_block = Boolean::kFalse; + second_get.referenced_key = kRefKeyPrefix + std::to_string(kGetId); + BlockCacheTraceRecord third_get = GenerateGetRecord(kGetId + 2); + third_get.referenced_data_size = 0; + third_get.referenced_key_exist_in_block = Boolean::kFalse; + third_get.referenced_key = kRefKeyPrefix + "third_get"; + // We didn't find the referenced key in the third get. + third_get.referenced_key_exist_in_block = Boolean::kFalse; + third_get.referenced_data_size = 0; + std::shared_ptr sim_cache = + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0); + std::unique_ptr cache_simulator( + new HybridRowBlockCacheSimulator( + nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/true)); + // The first get request accesses 10 blocks. We should only report 10 accesses + // and 100% miss. + for (uint32_t i = 0; i < 10; i++) { + first_get.block_key = kBlockKeyPrefix + std::to_string(block_id); + cache_simulator->Access(first_get); + block_id++; + } + ASSERT_EQ(10, cache_simulator->total_accesses()); + ASSERT_EQ(100, cache_simulator->miss_ratio()); + ASSERT_EQ(10, cache_simulator->user_accesses()); + ASSERT_EQ(100, cache_simulator->user_miss_ratio()); + auto handle = + sim_cache->Lookup(ExtractUserKey(std::to_string(first_get.sst_fd_number) + + "_" + first_get.referenced_key)); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); + for (uint32_t i = 100; i < block_id; i++) { + handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i)); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); + } + + // The second get request accesses the same key. We should report 15 + // access and 66% miss, 10 misses with 15 accesses. + // We do not consider these 5 block lookups as misses since the row hits the + // cache. + for (uint32_t i = 0; i < 5; i++) { + second_get.block_key = kBlockKeyPrefix + std::to_string(block_id); + cache_simulator->Access(second_get); + block_id++; + } + ASSERT_EQ(15, cache_simulator->total_accesses()); + ASSERT_EQ(66, static_cast(cache_simulator->miss_ratio())); + ASSERT_EQ(15, cache_simulator->user_accesses()); + ASSERT_EQ(66, static_cast(cache_simulator->user_miss_ratio())); + handle = sim_cache->Lookup(std::to_string(second_get.sst_fd_number) + "_" + + second_get.referenced_key); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); + for (uint32_t i = 100; i < block_id; i++) { + handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i)); + if (i < 110) { + ASSERT_NE(nullptr, handle) << i; + sim_cache->Release(handle); + } else { + ASSERT_EQ(nullptr, handle) << i; + } + } + + // The third get on a different key and does not have a size. + // This key should not be inserted into the cache. + for (uint32_t i = 0; i < 5; i++) { + third_get.block_key = kBlockKeyPrefix + std::to_string(block_id); + cache_simulator->Access(third_get); + block_id++; + } + ASSERT_EQ(20, cache_simulator->total_accesses()); + ASSERT_EQ(75, static_cast(cache_simulator->miss_ratio())); + ASSERT_EQ(20, cache_simulator->user_accesses()); + ASSERT_EQ(75, static_cast(cache_simulator->user_miss_ratio())); + // Assert that the third key is not inserted into the cache. + handle = sim_cache->Lookup(std::to_string(third_get.sst_fd_number) + "_" + + third_get.referenced_key); + ASSERT_EQ(nullptr, handle); + for (uint32_t i = 100; i < block_id; i++) { + if (i < 110 || i >= 115) { + handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i)); + ASSERT_NE(nullptr, handle) << i; + sim_cache->Release(handle); + } else { + handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i)); + ASSERT_EQ(nullptr, handle) << i; + } + } +} + +TEST_F(CacheSimulatorTest, HybridRowBlockNoInsertCacheSimulator) { + uint64_t block_id = 100; + BlockCacheTraceRecord first_get = GenerateGetRecord(kGetId); + std::shared_ptr sim_cache = + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0); + std::unique_ptr cache_simulator( + new HybridRowBlockCacheSimulator( + nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/false)); + for (uint32_t i = 0; i < 9; i++) { + first_get.block_key = kBlockKeyPrefix + std::to_string(block_id); + cache_simulator->Access(first_get); + block_id++; + } + auto handle = + sim_cache->Lookup(ExtractUserKey(std::to_string(first_get.sst_fd_number) + + "_" + first_get.referenced_key)); + ASSERT_NE(nullptr, handle); + sim_cache->Release(handle); + // All blocks are missing from the cache since insert_blocks_row_kvpair_misses + // is set to false. + for (uint32_t i = 100; i < block_id; i++) { + handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i)); + ASSERT_EQ(nullptr, handle); + } +} + +TEST_F(CacheSimulatorTest, GhostHybridRowBlockCacheSimulator) { + std::unique_ptr ghost_cache(new GhostCache( + NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0))); + const BlockCacheTraceRecord& first_get = GenerateGetRecord(kGetId); + const BlockCacheTraceRecord& second_get = GenerateGetRecord(kGetId + 1); + const BlockCacheTraceRecord& third_get = GenerateGetRecord(kGetId + 2); + std::unique_ptr cache_simulator( + new HybridRowBlockCacheSimulator( + std::move(ghost_cache), + NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0), + /*insert_blocks_row_kvpair_misses=*/false)); + // Two get requests access the same key. + cache_simulator->Access(first_get); + cache_simulator->Access(second_get); + ASSERT_EQ(2, cache_simulator->total_accesses()); + ASSERT_EQ(100, cache_simulator->miss_ratio()); + ASSERT_EQ(2, cache_simulator->user_accesses()); + ASSERT_EQ(100, cache_simulator->user_miss_ratio()); + // We insert the key-value pair upon the second get request. A third get + // request should observe a hit. + for (uint32_t i = 0; i < 10; i++) { + cache_simulator->Access(third_get); + } + ASSERT_EQ(12, cache_simulator->total_accesses()); + ASSERT_EQ(16, static_cast(cache_simulator->miss_ratio())); + ASSERT_EQ(12, cache_simulator->user_accesses()); + ASSERT_EQ(16, static_cast(cache_simulator->user_miss_ratio())); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}