Support computing miss ratio curves using sim_cache. (#5449)

Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".

When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.

This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449

Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.

Differential Revision: D15797073

Pulled By: HaoyuHuang

fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
main
haoyuhuang 6 years ago committed by Facebook Github Bot
parent 7d8d56413d
commit 2d1dd5bce7
  1. 4
      Makefile
  2. 4
      include/rocksdb/utilities/sim_cache.h
  3. 1
      src.mk
  4. 254
      tools/block_cache_trace_analyzer.cc
  5. 60
      tools/block_cache_trace_analyzer.h
  6. 111
      tools/block_cache_trace_analyzer_test.cc
  7. 25
      tools/block_cache_trace_analyzer_tool.cc
  8. 22
      utilities/simulator_cache/sim_cache.cc

@ -608,6 +608,7 @@ TOOLS = \
rocksdb_undump \ rocksdb_undump \
blob_dump \ blob_dump \
trace_analyzer \ trace_analyzer \
block_cache_trace_analyzer \
TEST_LIBS = \ TEST_LIBS = \
librocksdb_env_basic_test.a librocksdb_env_basic_test.a
@ -1109,6 +1110,9 @@ db_bench: tools/db_bench.o $(BENCHTOOLOBJECTS)
trace_analyzer: tools/trace_analyzer.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS) trace_analyzer: tools/trace_analyzer.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS)
$(AM_LINK) $(AM_LINK)
block_cache_trace_analyzer: tools/block_cache_trace_analyzer_tool.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS)
$(AM_LINK)
cache_bench: cache/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) cache_bench: cache/cache_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(AM_LINK) $(AM_LINK)

@ -36,6 +36,10 @@ extern std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
size_t sim_capacity, size_t sim_capacity,
int num_shard_bits); int num_shard_bits);
extern std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> sim_cache,
std::shared_ptr<Cache> cache,
int num_shard_bits);
class SimCache : public Cache { class SimCache : public Cache {
public: public:
SimCache() {} SimCache() {}

@ -369,6 +369,7 @@ MAIN_SOURCES = \
table/table_test.cc \ table/table_test.cc \
third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \ third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \
tools/block_cache_trace_analyzer_test.cc \ tools/block_cache_trace_analyzer_test.cc \
tools/block_cache_trace_analyzer_tool.cc \
tools/db_bench.cc \ tools/db_bench.cc \
tools/db_bench_tool_test.cc \ tools/db_bench_tool_test.cc \
tools/db_sanity_test.cc \ tools/db_sanity_test.cc \

@ -3,11 +3,44 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#ifdef GFLAGS
#include "tools/block_cache_trace_analyzer.h" #include "tools/block_cache_trace_analyzer.h"
#include <cinttypes> #include <cinttypes>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set> #include <set>
#include <sstream>
#include "monitoring/histogram.h" #include "monitoring/histogram.h"
#include "util/gflags_compat.h"
#include "util/string_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_string(block_cache_trace_path, "", "The trace file path.");
DEFINE_string(
block_cache_sim_config_path, "",
"The config file path. One cache configuration per line. The format of a "
"cache configuration is "
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. "
"cache_name is lru. cache_capacity can be xK, xM or xG "
"where x is a positive number.");
DEFINE_bool(print_block_size_stats, false,
"Print block size distribution and the distribution break down by "
"block type and column family.");
DEFINE_bool(print_access_count_stats, false,
"Print access count distribution and the distribution break down "
"by block type and column family.");
DEFINE_bool(print_data_block_access_count_stats, false,
"Print data block accesses by user Get and Multi-Get.");
DEFINE_int32(cache_sim_warmup_seconds, 0,
"The number of seconds to warmup simulated caches. The hit/miss "
"counters are reset after the warmup completes.");
DEFINE_string(output_miss_ratio_curve_path, "",
"The output file to save the computed miss ratios. File format: "
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses");
namespace rocksdb { namespace rocksdb {
namespace { namespace {
@ -48,11 +81,101 @@ std::string caller_to_string(BlockCacheLookupCaller caller) {
// This cannot happen. // This cannot happen.
return "InvalidCaller"; return "InvalidCaller";
} }
const char kBreakLine[] =
"***************************************************************\n";
void print_break_lines(uint32_t num_break_lines) {
for (uint32_t i = 0; i < num_break_lines; i++) {
fprintf(stdout, kBreakLine);
}
}
} // namespace } // namespace
BlockCacheTraceSimulator::BlockCacheTraceSimulator(
uint64_t warmup_seconds,
const std::vector<CacheConfiguration>& cache_configurations)
: warmup_seconds_(warmup_seconds),
cache_configurations_(cache_configurations) {
for (auto const& config : cache_configurations_) {
for (auto cache_capacity : config.cache_capacities) {
sim_caches_.push_back(
NewSimCache(NewLRUCache(cache_capacity, config.num_shard_bits),
/*real_cache=*/nullptr, config.num_shard_bits));
}
}
}
void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) {
if (trace_start_time_ == 0) {
trace_start_time_ = access.access_timestamp;
}
// access.access_timestamp is in microseconds.
if (!warmup_complete_ && trace_start_time_ + warmup_seconds_ * 1000000 <=
access.access_timestamp) {
for (auto& sim_cache : sim_caches_) {
sim_cache->reset_counter();
}
warmup_complete_ = true;
}
for (auto& sim_cache : sim_caches_) {
auto handle = sim_cache->Lookup(access.block_key);
if (handle == nullptr && !access.no_insert) {
sim_cache->Insert(access.block_key, /*value=*/nullptr, access.block_size,
/*deleter=*/nullptr);
}
}
}
void BlockCacheTraceAnalyzer::PrintMissRatioCurves() const {
if (!cache_simulator_) {
return;
}
if (output_miss_ratio_curve_path_.empty()) {
return;
}
std::ofstream out(output_miss_ratio_curve_path_);
if (!out.is_open()) {
return;
}
// Write header.
const std::string header =
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses";
out << header << std::endl;
uint64_t sim_cache_index = 0;
for (auto const& config : cache_simulator_->cache_configurations()) {
for (auto cache_capacity : config.cache_capacities) {
uint64_t hits =
cache_simulator_->sim_caches()[sim_cache_index]->get_hit_counter();
uint64_t misses =
cache_simulator_->sim_caches()[sim_cache_index]->get_miss_counter();
uint64_t total_accesses = hits + misses;
double miss_ratio = static_cast<double>(misses * 100.0 / total_accesses);
// Write the body.
out << config.cache_name;
out << ",";
out << config.num_shard_bits;
out << ",";
out << cache_capacity;
out << ",";
out << std::fixed << std::setprecision(4) << miss_ratio;
out << ",";
out << total_accesses;
out << std::endl;
sim_cache_index++;
}
}
out.close();
}
BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer( BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
const std::string& trace_file_path) const std::string& trace_file_path,
: trace_file_path_(trace_file_path) { const std::string& output_miss_ratio_curve_path,
std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator)
: trace_file_path_(trace_file_path),
output_miss_ratio_curve_path_(output_miss_ratio_curve_path),
cache_simulator_(std::move(cache_simulator)) {
env_ = rocksdb::Env::Default(); env_ = rocksdb::Env::Default();
} }
@ -88,6 +211,9 @@ Status BlockCacheTraceAnalyzer::Analyze() {
return s; return s;
} }
RecordAccess(access); RecordAccess(access);
if (cache_simulator_) {
cache_simulator_->Access(access);
}
} }
return Status::OK(); return Status::OK();
} }
@ -118,6 +244,7 @@ void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
} }
fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str()); fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str());
for (auto const& bt_stats : bt_stats_map) { for (auto const& bt_stats : bt_stats_map) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, "Block size stats for block type %s: \n%s", fprintf(stdout, "Block size stats for block type %s: \n%s",
block_type_to_string(bt_stats.first).c_str(), block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str()); bt_stats.second.ToString().c_str());
@ -125,6 +252,7 @@ void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
for (auto const& cf_bt_stats : cf_bt_stats_map) { for (auto const& cf_bt_stats : cf_bt_stats_map) {
const std::string& cf_name = cf_bt_stats.first; const std::string& cf_name = cf_bt_stats.first;
for (auto const& bt_stats : cf_bt_stats.second) { for (auto const& bt_stats : cf_bt_stats.second) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, fprintf(stdout,
"Block size stats for column family %s and block type %s: \n%s", "Block size stats for column family %s and block type %s: \n%s",
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(), cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
@ -160,6 +288,7 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
fprintf(stdout, "Block access count stats: \n%s", fprintf(stdout, "Block access count stats: \n%s",
access_stats.ToString().c_str()); access_stats.ToString().c_str());
for (auto const& bt_stats : bt_stats_map) { for (auto const& bt_stats : bt_stats_map) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, "Block access count stats for block type %s: \n%s", fprintf(stdout, "Block access count stats for block type %s: \n%s",
block_type_to_string(bt_stats.first).c_str(), block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str()); bt_stats.second.ToString().c_str());
@ -167,6 +296,7 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
for (auto const& cf_bt_stats : cf_bt_stats_map) { for (auto const& cf_bt_stats : cf_bt_stats_map) {
const std::string& cf_name = cf_bt_stats.first; const std::string& cf_name = cf_bt_stats.first;
for (auto const& bt_stats : cf_bt_stats.second) { for (auto const& bt_stats : cf_bt_stats.second) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, fprintf(stdout,
"Block access count stats for column family %s and block type " "Block access count stats for column family %s and block type "
"%s: \n%s", "%s: \n%s",
@ -230,23 +360,28 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
"the total number of keys in a block: \n%s", "the total number of keys in a block: \n%s",
existing_keys_stats.ToString().c_str()); existing_keys_stats.ToString().c_str());
for (auto const& cf_stats : cf_existing_keys_stats_map) { for (auto const& cf_stats : cf_existing_keys_stats_map) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, "Break down by column family %s: \n%s", fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
} }
print_break_lines(/*num_break_lines=*/1);
fprintf( fprintf(
stdout, stdout,
"Histogram on percentage of referenced keys DO NOT exist in a block over " "Histogram on percentage of referenced keys DO NOT exist in a block over "
"the total number of keys in a block: \n%s", "the total number of keys in a block: \n%s",
non_existing_keys_stats.ToString().c_str()); non_existing_keys_stats.ToString().c_str());
for (auto const& cf_stats : cf_non_existing_keys_stats_map) { for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, "Break down by column family %s: \n%s", fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
} }
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, fprintf(stdout,
"Histogram on percentage of accesses on keys exist in a block over " "Histogram on percentage of accesses on keys exist in a block over "
"the total number of accesses in a block: \n%s", "the total number of accesses in a block: \n%s",
block_access_stats.ToString().c_str()); block_access_stats.ToString().c_str());
for (auto const& cf_stats : cf_block_access_info) { for (auto const& cf_stats : cf_block_access_info) {
print_break_lines(/*num_break_lines=*/1);
fprintf(stdout, "Break down by column family %s: \n%s", fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
} }
@ -318,15 +453,7 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
} }
// Print stats. // Print stats.
fprintf( print_break_lines(/*num_break_lines=*/3);
stdout,
"***************************************************************\n");
fprintf(
stdout,
"***************************************************************\n");
fprintf(
stdout,
"***************************************************************\n");
fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str()); fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
fprintf(stdout, fprintf(stdout,
"Number of files:%" PRIu64 "Number of blocks: %" PRIu64 "Number of files:%" PRIu64 "Number of blocks: %" PRIu64
@ -338,9 +465,7 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
block_type.second); block_type.second);
} }
for (auto caller : cf_caller_num_accesses_map) { for (auto caller : cf_caller_num_accesses_map) {
fprintf( print_break_lines(/*num_break_lines=*/1);
stdout,
"***************************************************************\n");
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
caller_to_string(caller.first).c_str(), caller.second); caller_to_string(caller.first).c_str(), caller.second);
fprintf(stdout, "Caller %s: Number of accesses per level break down\n", fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
@ -368,12 +493,7 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
} }
} }
} }
fprintf(stdout, print_break_lines(/*num_break_lines=*/3);
"***************************************************************\n");
fprintf(stdout,
"***************************************************************\n");
fprintf(stdout,
"***************************************************************\n");
fprintf(stdout, "Overall statistics:\n"); fprintf(stdout, "Overall statistics:\n");
fprintf(stdout, fprintf(stdout,
"Number of files: %" PRIu64 " Number of blocks: %" PRIu64 "Number of files: %" PRIu64 " Number of blocks: %" PRIu64
@ -384,9 +504,7 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
block_type_to_string(block_type.first).c_str(), block_type.second); block_type_to_string(block_type.first).c_str(), block_type.second);
} }
for (auto caller : caller_num_access_map) { for (auto caller : caller_num_access_map) {
fprintf( print_break_lines(/*num_break_lines=*/1);
stdout,
"***************************************************************\n");
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
caller_to_string(caller.first).c_str(), caller.second); caller_to_string(caller.first).c_str(), caller.second);
fprintf(stdout, "Caller %s: Number of accesses per level break down\n", fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
@ -405,4 +523,94 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
} }
} }
std::vector<CacheConfiguration> parse_cache_config_file(
const std::string& config_path) {
std::ifstream file(config_path);
if (!file.is_open()) {
return {};
}
std::vector<CacheConfiguration> configs;
std::string line;
while (getline(file, line)) {
CacheConfiguration cache_config;
std::stringstream ss(line);
std::vector<std::string> config_strs;
while (ss.good()) {
std::string substr;
getline(ss, substr, ',');
config_strs.push_back(substr);
}
// Sanity checks.
if (config_strs.size() < 3) {
fprintf(stderr, "Invalid cache simulator configuration %s\n",
line.c_str());
exit(1);
}
if (config_strs[0] != "lru") {
fprintf(stderr, "We only support LRU cache %s\n", line.c_str());
exit(1);
}
cache_config.cache_name = config_strs[0];
cache_config.num_shard_bits = ParseUint32(config_strs[1]);
for (uint32_t i = 2; i < config_strs.size(); i++) {
uint64_t capacity = ParseUint64(config_strs[i]);
if (capacity == 0) {
fprintf(stderr, "Invalid cache capacity %s, %s\n",
config_strs[i].c_str(), line.c_str());
exit(1);
}
cache_config.cache_capacities.push_back(capacity);
}
configs.push_back(cache_config);
}
file.close();
return configs;
}
int block_cache_trace_analyzer_tool(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_block_cache_trace_path.empty()) {
fprintf(stderr, "block cache trace path is empty\n");
exit(1);
}
uint64_t warmup_seconds =
FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0;
std::vector<CacheConfiguration> cache_configs =
parse_cache_config_file(FLAGS_block_cache_sim_config_path);
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator;
if (!cache_configs.empty()) {
cache_simulator.reset(
new BlockCacheTraceSimulator(warmup_seconds, cache_configs));
}
BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path,
FLAGS_output_miss_ratio_curve_path,
std::move(cache_simulator));
Status s = analyzer.Analyze();
if (!s.IsIncomplete()) {
// Read all traces.
fprintf(stderr, "Cannot process the trace %s\n", s.ToString().c_str());
exit(1);
}
analyzer.PrintStatsSummary();
if (FLAGS_print_access_count_stats) {
print_break_lines(/*num_break_lines=*/3);
analyzer.PrintAccessCountStats();
}
if (FLAGS_print_block_size_stats) {
print_break_lines(/*num_break_lines=*/3);
analyzer.PrintBlockSizeStats();
}
if (FLAGS_print_data_block_access_count_stats) {
print_break_lines(/*num_break_lines=*/3);
analyzer.PrintDataBlockAccessStats();
}
print_break_lines(/*num_break_lines=*/3);
analyzer.PrintMissRatioCurves();
return 0;
}
} // namespace rocksdb } // namespace rocksdb
#endif // GFLAGS
#endif // ROCKSDB_LITE

@ -9,10 +9,56 @@
#include <vector> #include <vector>
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/utilities/sim_cache.h"
#include "trace_replay/block_cache_tracer.h" #include "trace_replay/block_cache_tracer.h"
namespace rocksdb { namespace rocksdb {
class BlockCacheTraceAnalyzer;
// A cache configuration provided by user.
struct CacheConfiguration {
std::string cache_name; // LRU.
uint32_t num_shard_bits;
std::vector<uint64_t>
cache_capacities; // simulate cache capacities in bytes.
};
// A block cache simulator that reports miss ratio curves given a set of cache
// configurations.
class BlockCacheTraceSimulator {
public:
// warmup_seconds: The number of seconds to warmup simulated caches. The
// hit/miss counters are reset after the warmup completes.
BlockCacheTraceSimulator(
uint64_t warmup_seconds,
const std::vector<CacheConfiguration>& cache_configurations);
~BlockCacheTraceSimulator() = default;
// No copy and move.
BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete;
BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete;
void Access(const BlockCacheTraceRecord& access);
const std::vector<std::shared_ptr<SimCache>>& sim_caches() const {
return sim_caches_;
}
const std::vector<CacheConfiguration>& cache_configurations() const {
return cache_configurations_;
}
private:
const uint64_t warmup_seconds_;
const std::vector<CacheConfiguration> cache_configurations_;
bool warmup_complete_ = false;
std::vector<std::shared_ptr<SimCache>> sim_caches_;
uint64_t trace_start_time_ = 0;
};
// Statistics of a block. // Statistics of a block.
struct BlockAccessInfo { struct BlockAccessInfo {
uint64_t num_accesses = 0; uint64_t num_accesses = 0;
@ -67,7 +113,10 @@ struct ColumnFamilyAccessInfoAggregate {
class BlockCacheTraceAnalyzer { class BlockCacheTraceAnalyzer {
public: public:
BlockCacheTraceAnalyzer(const std::string& trace_file_path); BlockCacheTraceAnalyzer(
const std::string& trace_file_path,
const std::string& output_miss_ratio_curve_path,
std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator);
~BlockCacheTraceAnalyzer() = default; ~BlockCacheTraceAnalyzer() = default;
// No copy and move. // No copy and move.
BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete; BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete;
@ -115,6 +164,8 @@ class BlockCacheTraceAnalyzer {
// accesses on keys exist in a data block and its break down by column family. // accesses on keys exist in a data block and its break down by column family.
void PrintDataBlockAccessStats() const; void PrintDataBlockAccessStats() const;
void PrintMissRatioCurves() const;
const std::map<std::string, ColumnFamilyAccessInfoAggregate>& const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
TEST_cf_aggregates_map() const { TEST_cf_aggregates_map() const {
return cf_aggregates_map_; return cf_aggregates_map_;
@ -124,9 +175,14 @@ class BlockCacheTraceAnalyzer {
void RecordAccess(const BlockCacheTraceRecord& access); void RecordAccess(const BlockCacheTraceRecord& access);
rocksdb::Env* env_; rocksdb::Env* env_;
std::string trace_file_path_; const std::string trace_file_path_;
const std::string output_miss_ratio_curve_path_;
BlockCacheTraceHeader header_; BlockCacheTraceHeader header_;
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_;
std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_; std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
}; };
int block_cache_trace_analyzer_tool(int argc, char** argv);
} // namespace rocksdb } // namespace rocksdb

@ -3,6 +3,18 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr,
"Please install gflags to run block_cache_trace_analyzer_test\n");
return 1;
}
#else
#include <fstream>
#include <iostream>
#include <map> #include <map>
#include <vector> #include <vector>
@ -25,6 +37,8 @@ const uint64_t kSSTStoringEvenKeys = 100;
const uint64_t kSSTStoringOddKeys = 101; const uint64_t kSSTStoringOddKeys = 101;
const std::string kRefKeyPrefix = "test-get-"; const std::string kRefKeyPrefix = "test-get-";
const uint64_t kNumKeysInBlock = 1024; const uint64_t kNumKeysInBlock = 1024;
const int kMaxArgCount = 100;
const size_t kArgBufferSize = 100000;
} // namespace } // namespace
class BlockCacheTracerTest : public testing::Test { class BlockCacheTracerTest : public testing::Test {
@ -34,6 +48,8 @@ class BlockCacheTracerTest : public testing::Test {
env_ = rocksdb::Env::Default(); env_ = rocksdb::Env::Default();
EXPECT_OK(env_->CreateDir(test_path_)); EXPECT_OK(env_->CreateDir(test_path_));
trace_file_path_ = test_path_ + "/block_cache_trace"; trace_file_path_ = test_path_ + "/block_cache_trace";
block_cache_sim_config_path_ = test_path_ + "/block_cache_sim_config";
output_miss_ratio_curve_path_ = test_path_ + "/out_miss_ratio_curve";
} }
~BlockCacheTracerTest() override { ~BlockCacheTracerTest() override {
@ -125,12 +141,94 @@ class BlockCacheTracerTest : public testing::Test {
} }
} }
void RunBlockCacheTraceAnalyzer() {
std::vector<std::string> params = {
"./block_cache_trace_analyzer",
"-block_cache_trace_path=" + trace_file_path_,
"-block_cache_sim_config_path=" + block_cache_sim_config_path_,
"-output_miss_ratio_curve_path=" + output_miss_ratio_curve_path_,
"-print_block_size_stats",
"-print_access_count_stats",
"-print_data_block_access_count_stats",
"-cache_sim_warmup_seconds=0"};
char arg_buffer[kArgBufferSize];
char* argv[kMaxArgCount];
int argc = 0;
int cursor = 0;
for (const auto& arg : params) {
ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize);
ASSERT_LE(argc + 1, kMaxArgCount);
snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str());
argv[argc++] = arg_buffer + cursor;
cursor += static_cast<int>(arg.size()) + 1;
}
ASSERT_EQ(0, rocksdb::block_cache_trace_analyzer_tool(argc, argv));
}
Env* env_; Env* env_;
EnvOptions env_options_; EnvOptions env_options_;
std::string output_miss_ratio_curve_path_;
std::string block_cache_sim_config_path_;
std::string trace_file_path_; std::string trace_file_path_;
std::string test_path_; std::string test_path_;
}; };
TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
{
// Generate a trace file.
TraceOptions trace_opt;
std::unique_ptr<TraceWriter> trace_writer;
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
&trace_writer));
BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer));
ASSERT_OK(writer.WriteHeader());
WriteBlockAccess(&writer, 0, TraceType::kBlockTraceDataBlock, 50);
ASSERT_OK(env_->FileExists(trace_file_path_));
}
{
// Generate a cache sim config.
std::string config = "lru,1,1K,1M,1G";
std::ofstream out(block_cache_sim_config_path_);
ASSERT_TRUE(out.is_open());
out << config << std::endl;
out.close();
}
RunBlockCacheTraceAnalyzer();
{
// Validate the cache miss ratios.
const std::vector<uint64_t> expected_capacities{1024, 1024 * 1024,
1024 * 1024 * 1024};
std::ifstream infile(output_miss_ratio_curve_path_);
uint32_t config_index = 0;
std::string line;
// Read header.
ASSERT_TRUE(getline(infile, line));
while (getline(infile, line)) {
std::stringstream ss(line);
std::vector<std::string> result_strs;
while (ss.good()) {
std::string substr;
getline(ss, substr, ',');
result_strs.push_back(substr);
}
ASSERT_EQ(5, result_strs.size());
ASSERT_LT(config_index, expected_capacities.size());
ASSERT_EQ("lru", result_strs[0]); // cache_name
ASSERT_EQ("1", result_strs[1]); // num_shard_bits
ASSERT_EQ(std::to_string(expected_capacities[config_index]),
result_strs[2]); // cache_capacity
ASSERT_EQ("100.0000", result_strs[3]); // miss_ratio
ASSERT_EQ("50", result_strs[4]); // number of accesses.
config_index++;
}
ASSERT_EQ(expected_capacities.size(), config_index);
infile.close();
}
ASSERT_OK(env_->DeleteFile(output_miss_ratio_curve_path_));
ASSERT_OK(env_->DeleteFile(block_cache_sim_config_path_));
}
TEST_F(BlockCacheTracerTest, MixedBlocks) { TEST_F(BlockCacheTracerTest, MixedBlocks) {
{ {
// Generate a trace file containing a mix of blocks. // Generate a trace file containing a mix of blocks.
@ -164,7 +262,9 @@ TEST_F(BlockCacheTracerTest, MixedBlocks) {
ASSERT_EQ(kMajorVersion, header.rocksdb_major_version); ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version); ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
// Read blocks. // Read blocks.
BlockCacheTraceAnalyzer analyzer(trace_file_path_); BlockCacheTraceAnalyzer analyzer(trace_file_path_,
/*output_miss_ratio_curve_path=*/"",
/*simulator=*/nullptr);
// The analyzer ends when it detects an incomplete access record. // The analyzer ends when it detects an incomplete access record.
ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze()); ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze());
const uint64_t expected_num_cfs = 1; const uint64_t expected_num_cfs = 1;
@ -228,3 +328,12 @@ int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
#endif // GFLAG
#else
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr,
"block_cache_trace_analyzer_test is not supported in ROCKSDB_LITE\n");
return 0;
}
#endif // ROCKSDB_LITE

@ -0,0 +1,25 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else // GFLAGS
#include "tools/block_cache_trace_analyzer.h"
int main(int argc, char** argv) {
return rocksdb::block_cache_trace_analyzer_tool(argc, argv);
}
#endif // GFLAGS
#else // ROCKSDB_LITE
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr, "Not supported in lite mode.\n");
return 1;
}
#endif // ROCKSDB_LITE

@ -152,10 +152,9 @@ class SimCacheImpl : public SimCache {
public: public:
// capacity for real cache (ShardedLRUCache) // capacity for real cache (ShardedLRUCache)
// test_capacity for key only cache // test_capacity for key only cache
SimCacheImpl(std::shared_ptr<Cache> cache, size_t sim_capacity, SimCacheImpl(std::shared_ptr<Cache> sim_cache, std::shared_ptr<Cache> cache)
int num_shard_bits)
: cache_(cache), : cache_(cache),
key_only_cache_(NewLRUCache(sim_capacity, num_shard_bits)), key_only_cache_(sim_cache),
miss_times_(0), miss_times_(0),
hit_times_(0), hit_times_(0),
stats_(nullptr) {} stats_(nullptr) {}
@ -185,7 +184,9 @@ class SimCacheImpl : public SimCache {
} }
cache_activity_logger_.ReportAdd(key, charge); cache_activity_logger_.ReportAdd(key, charge);
if (!cache_) {
return Status::OK();
}
return cache_->Insert(key, value, charge, deleter, handle, priority); return cache_->Insert(key, value, charge, deleter, handle, priority);
} }
@ -201,7 +202,9 @@ class SimCacheImpl : public SimCache {
} }
cache_activity_logger_.ReportLookup(key); cache_activity_logger_.ReportLookup(key);
if (!cache_) {
return nullptr;
}
return cache_->Lookup(key, stats); return cache_->Lookup(key, stats);
} }
@ -326,10 +329,17 @@ class SimCacheImpl : public SimCache {
// For instrumentation purpose, use NewSimCache instead // For instrumentation purpose, use NewSimCache instead
std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache, std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
size_t sim_capacity, int num_shard_bits) { size_t sim_capacity, int num_shard_bits) {
return NewSimCache(NewLRUCache(sim_capacity, num_shard_bits), cache,
num_shard_bits);
}
std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> sim_cache,
std::shared_ptr<Cache> cache,
int num_shard_bits) {
if (num_shard_bits >= 20) { if (num_shard_bits >= 20) {
return nullptr; // the cache cannot be sharded into too many fine pieces return nullptr; // the cache cannot be sharded into too many fine pieces
} }
return std::make_shared<SimCacheImpl>(cache, sim_capacity, num_shard_bits); return std::make_shared<SimCacheImpl>(sim_cache, cache);
} }
} // end namespace rocksdb } // end namespace rocksdb

Loading…
Cancel
Save