|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#ifdef GFLAGS
|
|
|
|
#include "tools/block_cache_trace_analyzer.h"
|
|
|
|
|
|
|
|
#include <cinttypes>
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
#include <fstream>
|
|
|
|
#include <iomanip>
|
|
|
|
#include <iostream>
|
|
|
|
#include <set>
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
#include <sstream>
|
|
|
|
#include "monitoring/histogram.h"
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
#include "util/gflags_compat.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
|
|
|
|
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
|
|
|
|
|
|
|
DEFINE_string(block_cache_trace_path, "", "The trace file path.");
|
|
|
|
DEFINE_string(
|
|
|
|
block_cache_sim_config_path, "",
|
|
|
|
"The config file path. One cache configuration per line. The format of a "
|
|
|
|
"cache configuration is "
|
|
|
|
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. "
|
|
|
|
"cache_name is lru. cache_capacity can be xK, xM or xG "
|
|
|
|
"where x is a positive number.");
|
|
|
|
DEFINE_int32(block_cache_trace_downsample_ratio, 1,
|
|
|
|
"The trace collected accesses on one in every "
|
|
|
|
"block_cache_trace_downsample_ratio blocks. We scale "
|
|
|
|
"down the simulated cache size by this ratio.");
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
DEFINE_bool(print_block_size_stats, false,
|
|
|
|
"Print block size distribution and the distribution break down by "
|
|
|
|
"block type and column family.");
|
|
|
|
DEFINE_bool(print_access_count_stats, false,
|
|
|
|
"Print access count distribution and the distribution break down "
|
|
|
|
"by block type and column family.");
|
|
|
|
DEFINE_bool(print_data_block_access_count_stats, false,
|
|
|
|
"Print data block accesses by user Get and Multi-Get.");
|
|
|
|
DEFINE_int32(cache_sim_warmup_seconds, 0,
|
|
|
|
"The number of seconds to warmup simulated caches. The hit/miss "
|
|
|
|
"counters are reset after the warmup completes.");
|
|
|
|
DEFINE_string(output_miss_ratio_curve_path, "",
|
|
|
|
"The output file to save the computed miss ratios. File format: "
|
|
|
|
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses");
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
namespace {
|
|
|
|
std::string block_type_to_string(TraceType type) {
|
|
|
|
switch (type) {
|
|
|
|
case kBlockTraceFilterBlock:
|
|
|
|
return "Filter";
|
|
|
|
case kBlockTraceDataBlock:
|
|
|
|
return "Data";
|
|
|
|
case kBlockTraceIndexBlock:
|
|
|
|
return "Index";
|
|
|
|
case kBlockTraceRangeDeletionBlock:
|
|
|
|
return "RangeDeletion";
|
|
|
|
case kBlockTraceUncompressionDictBlock:
|
|
|
|
return "UncompressionDict";
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// This cannot happen.
|
|
|
|
return "InvalidType";
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string caller_to_string(TableReaderCaller caller) {
|
|
|
|
switch (caller) {
|
|
|
|
case kUserGet:
|
|
|
|
return "Get";
|
|
|
|
case kUserMultiGet:
|
|
|
|
return "MultiGet";
|
|
|
|
case kUserIterator:
|
|
|
|
return "Iterator";
|
|
|
|
case kUserApproximateSize:
|
|
|
|
return "ApproximateSize";
|
|
|
|
case kUserVerifyChecksum:
|
|
|
|
return "VerifyChecksum";
|
|
|
|
case kSSTDumpTool:
|
|
|
|
return "SSTDumpTool";
|
|
|
|
case kExternalSSTIngestion:
|
|
|
|
return "ExternalSSTIngestion";
|
|
|
|
case kRepair:
|
|
|
|
return "Repair";
|
|
|
|
case kPrefetch:
|
|
|
|
return "Prefetch";
|
|
|
|
case kCompaction:
|
|
|
|
return "Compaction";
|
|
|
|
case kCompactionRefill:
|
|
|
|
return "CompactionRefill";
|
|
|
|
case kFlush:
|
|
|
|
return "Flush";
|
|
|
|
case kSSTFileReader:
|
|
|
|
return "SSTFileReader";
|
|
|
|
case kUncategorized:
|
|
|
|
return "Uncategorized";
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// This cannot happen.
|
|
|
|
return "InvalidCaller";
|
|
|
|
}
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
|
|
|
|
const char kBreakLine[] =
|
|
|
|
"***************************************************************\n";
|
|
|
|
|
|
|
|
void print_break_lines(uint32_t num_break_lines) {
|
|
|
|
for (uint32_t i = 0; i < num_break_lines; i++) {
|
|
|
|
fprintf(stdout, kBreakLine);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
double percent(uint64_t numerator, uint64_t denomenator) {
|
|
|
|
if (denomenator == 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return static_cast<double>(numerator * 100.0 / denomenator);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
BlockCacheTraceSimulator::BlockCacheTraceSimulator(
|
|
|
|
uint64_t warmup_seconds, uint32_t downsample_ratio,
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
const std::vector<CacheConfiguration>& cache_configurations)
|
|
|
|
: warmup_seconds_(warmup_seconds),
|
|
|
|
downsample_ratio_(downsample_ratio),
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
cache_configurations_(cache_configurations) {
|
|
|
|
for (auto const& config : cache_configurations_) {
|
|
|
|
for (auto cache_capacity : config.cache_capacities) {
|
|
|
|
// Scale down the cache capacity since the trace contains accesses on
|
|
|
|
// 1/'downsample_ratio' blocks.
|
|
|
|
uint64_t simulate_cache_capacity =
|
|
|
|
cache_capacity / downsample_ratio_;
|
|
|
|
sim_caches_.push_back(NewSimCache(
|
|
|
|
NewLRUCache(simulate_cache_capacity, config.num_shard_bits),
|
|
|
|
/*real_cache=*/nullptr, config.num_shard_bits));
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) {
|
|
|
|
if (trace_start_time_ == 0) {
|
|
|
|
trace_start_time_ = access.access_timestamp;
|
|
|
|
}
|
|
|
|
// access.access_timestamp is in microseconds.
|
|
|
|
if (!warmup_complete_ && trace_start_time_ + warmup_seconds_ * 1000000 <=
|
|
|
|
access.access_timestamp) {
|
|
|
|
for (auto& sim_cache : sim_caches_) {
|
|
|
|
sim_cache->reset_counter();
|
|
|
|
}
|
|
|
|
warmup_complete_ = true;
|
|
|
|
}
|
|
|
|
for (auto& sim_cache : sim_caches_) {
|
|
|
|
auto handle = sim_cache->Lookup(access.block_key);
|
|
|
|
if (handle == nullptr && !access.no_insert) {
|
|
|
|
sim_cache->Insert(access.block_key, /*value=*/nullptr, access.block_size,
|
|
|
|
/*deleter=*/nullptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintMissRatioCurves() const {
|
|
|
|
if (!cache_simulator_) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (output_miss_ratio_curve_path_.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::ofstream out(output_miss_ratio_curve_path_);
|
|
|
|
if (!out.is_open()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Write header.
|
|
|
|
const std::string header =
|
|
|
|
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses";
|
|
|
|
out << header << std::endl;
|
|
|
|
uint64_t sim_cache_index = 0;
|
|
|
|
for (auto const& config : cache_simulator_->cache_configurations()) {
|
|
|
|
for (auto cache_capacity : config.cache_capacities) {
|
|
|
|
uint64_t hits =
|
|
|
|
cache_simulator_->sim_caches()[sim_cache_index]->get_hit_counter();
|
|
|
|
uint64_t misses =
|
|
|
|
cache_simulator_->sim_caches()[sim_cache_index]->get_miss_counter();
|
|
|
|
uint64_t total_accesses = hits + misses;
|
|
|
|
double miss_ratio = static_cast<double>(misses * 100.0 / total_accesses);
|
|
|
|
// Write the body.
|
|
|
|
out << config.cache_name;
|
|
|
|
out << ",";
|
|
|
|
out << config.num_shard_bits;
|
|
|
|
out << ",";
|
|
|
|
out << cache_capacity;
|
|
|
|
out << ",";
|
|
|
|
out << std::fixed << std::setprecision(4) << miss_ratio;
|
|
|
|
out << ",";
|
|
|
|
out << total_accesses;
|
|
|
|
out << std::endl;
|
|
|
|
sim_cache_index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
const std::string& trace_file_path,
|
|
|
|
const std::string& output_miss_ratio_curve_path,
|
|
|
|
std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator)
|
|
|
|
: trace_file_path_(trace_file_path),
|
|
|
|
output_miss_ratio_curve_path_(output_miss_ratio_curve_path),
|
|
|
|
cache_simulator_(std::move(cache_simulator)) {
|
|
|
|
env_ = rocksdb::Env::Default();
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::RecordAccess(
|
|
|
|
const BlockCacheTraceRecord& access) {
|
|
|
|
ColumnFamilyAccessInfoAggregate& cf_aggr = cf_aggregates_map_[access.cf_name];
|
|
|
|
SSTFileAccessInfoAggregate& file_aggr =
|
|
|
|
cf_aggr.fd_aggregates_map[access.sst_fd_number];
|
|
|
|
file_aggr.level = access.level;
|
|
|
|
BlockTypeAccessInfoAggregate& block_type_aggr =
|
|
|
|
file_aggr.block_type_aggregates_map[access.block_type];
|
|
|
|
BlockAccessInfo& block_access_info =
|
|
|
|
block_type_aggr.block_access_info_map[access.block_key];
|
|
|
|
block_access_info.AddAccess(access);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status BlockCacheTraceAnalyzer::Analyze() {
|
|
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
|
|
Status s =
|
|
|
|
NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
BlockCacheTraceReader reader(std::move(trace_reader));
|
|
|
|
s = reader.ReadHeader(&header_);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
while (s.ok()) {
|
|
|
|
BlockCacheTraceRecord access;
|
|
|
|
s = reader.ReadAccess(&access);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
RecordAccess(access);
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
if (cache_simulator_) {
|
|
|
|
cache_simulator_->Access(access);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
|
|
|
|
HistogramStat bs_stats;
|
|
|
|
std::map<TraceType, HistogramStat> bt_stats_map;
|
|
|
|
std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
|
|
|
|
for (auto const& cf_aggregates : cf_aggregates_map_) {
|
|
|
|
// Stats per column family.
|
|
|
|
const std::string& cf_name = cf_aggregates.first;
|
|
|
|
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
|
|
|
|
// Stats per SST file.
|
|
|
|
for (auto const& block_type_aggregates :
|
|
|
|
file_aggregates.second.block_type_aggregates_map) {
|
|
|
|
// Stats per block type.
|
|
|
|
const TraceType type = block_type_aggregates.first;
|
|
|
|
for (auto const& block_access_info :
|
|
|
|
block_type_aggregates.second.block_access_info_map) {
|
|
|
|
// Stats per block.
|
|
|
|
bs_stats.Add(block_access_info.second.block_size);
|
|
|
|
bt_stats_map[type].Add(block_access_info.second.block_size);
|
|
|
|
cf_bt_stats_map[cf_name][type].Add(
|
|
|
|
block_access_info.second.block_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str());
|
|
|
|
for (auto const& bt_stats : bt_stats_map) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout, "Block size stats for block type %s: \n%s",
|
|
|
|
block_type_to_string(bt_stats.first).c_str(),
|
|
|
|
bt_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
for (auto const& cf_bt_stats : cf_bt_stats_map) {
|
|
|
|
const std::string& cf_name = cf_bt_stats.first;
|
|
|
|
for (auto const& bt_stats : cf_bt_stats.second) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout,
|
|
|
|
"Block size stats for column family %s and block type %s: \n%s",
|
|
|
|
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
|
|
|
|
bt_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
|
|
|
|
HistogramStat access_stats;
|
|
|
|
std::map<TraceType, HistogramStat> bt_stats_map;
|
|
|
|
std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
|
|
|
|
for (auto const& cf_aggregates : cf_aggregates_map_) {
|
|
|
|
// Stats per column family.
|
|
|
|
const std::string& cf_name = cf_aggregates.first;
|
|
|
|
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
|
|
|
|
// Stats per SST file.
|
|
|
|
for (auto const& block_type_aggregates :
|
|
|
|
file_aggregates.second.block_type_aggregates_map) {
|
|
|
|
// Stats per block type.
|
|
|
|
const TraceType type = block_type_aggregates.first;
|
|
|
|
for (auto const& block_access_info :
|
|
|
|
block_type_aggregates.second.block_access_info_map) {
|
|
|
|
// Stats per block.
|
|
|
|
access_stats.Add(block_access_info.second.num_accesses);
|
|
|
|
bt_stats_map[type].Add(block_access_info.second.num_accesses);
|
|
|
|
cf_bt_stats_map[cf_name][type].Add(
|
|
|
|
block_access_info.second.num_accesses);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stdout,
|
|
|
|
"Block access count stats: The number of accesses per block.\n%s",
|
|
|
|
access_stats.ToString().c_str());
|
|
|
|
for (auto const& bt_stats : bt_stats_map) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout, "Break down by block type %s: \n%s",
|
|
|
|
block_type_to_string(bt_stats.first).c_str(),
|
|
|
|
bt_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
for (auto const& cf_bt_stats : cf_bt_stats_map) {
|
|
|
|
const std::string& cf_name = cf_bt_stats.first;
|
|
|
|
for (auto const& bt_stats : cf_bt_stats.second) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout,
|
|
|
|
"Break down by column family %s and block type "
|
|
|
|
"%s: \n%s",
|
|
|
|
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
|
|
|
|
bt_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
|
|
|
HistogramStat existing_keys_stats;
|
|
|
|
std::map<std::string, HistogramStat> cf_existing_keys_stats_map;
|
|
|
|
HistogramStat non_existing_keys_stats;
|
|
|
|
std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map;
|
|
|
|
HistogramStat block_access_stats;
|
|
|
|
std::map<std::string, HistogramStat> cf_block_access_info;
|
|
|
|
HistogramStat percent_referenced_bytes;
|
|
|
|
std::map<std::string, HistogramStat> cf_percent_referenced_bytes;
|
|
|
|
// Total number of accesses in a data block / number of keys in a data block.
|
|
|
|
HistogramStat avg_naccesses_per_key_in_a_data_block;
|
|
|
|
std::map<std::string, HistogramStat> cf_avg_naccesses_per_key_in_a_data_block;
|
|
|
|
// The standard deviation on the number of accesses of a key in a data block.
|
|
|
|
HistogramStat stdev_naccesses_per_key_in_a_data_block;
|
|
|
|
std::map<std::string, HistogramStat>
|
|
|
|
cf_stdev_naccesses_per_key_in_a_data_block;
|
|
|
|
|
|
|
|
for (auto const& cf_aggregates : cf_aggregates_map_) {
|
|
|
|
// Stats per column family.
|
|
|
|
const std::string& cf_name = cf_aggregates.first;
|
|
|
|
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
|
|
|
|
// Stats per SST file.
|
|
|
|
for (auto const& block_type_aggregates :
|
|
|
|
file_aggregates.second.block_type_aggregates_map) {
|
|
|
|
// Stats per block type.
|
|
|
|
for (auto const& block_access_info :
|
|
|
|
block_type_aggregates.second.block_access_info_map) {
|
|
|
|
// Stats per block.
|
|
|
|
if (block_access_info.second.num_keys == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Use four decimal points.
|
|
|
|
uint64_t percent_referenced_for_existing_keys = (uint64_t)(
|
|
|
|
((double)block_access_info.second.key_num_access_map.size() /
|
|
|
|
(double)block_access_info.second.num_keys) *
|
|
|
|
10000.0);
|
|
|
|
uint64_t percent_referenced_for_non_existing_keys =
|
|
|
|
(uint64_t)(((double)block_access_info.second
|
|
|
|
.non_exist_key_num_access_map.size() /
|
|
|
|
(double)block_access_info.second.num_keys) *
|
|
|
|
10000.0);
|
|
|
|
uint64_t percent_accesses_for_existing_keys = (uint64_t)(
|
|
|
|
((double)
|
|
|
|
block_access_info.second.num_referenced_key_exist_in_block /
|
|
|
|
(double)block_access_info.second.num_accesses) *
|
|
|
|
10000.0);
|
|
|
|
|
|
|
|
HistogramStat hist_naccess_per_key;
|
|
|
|
for (auto const& key_access :
|
|
|
|
block_access_info.second.key_num_access_map) {
|
|
|
|
hist_naccess_per_key.Add(key_access.second);
|
|
|
|
}
|
|
|
|
uint64_t avg_accesses = hist_naccess_per_key.Average();
|
|
|
|
uint64_t stdev_accesses = hist_naccess_per_key.StandardDeviation();
|
|
|
|
avg_naccesses_per_key_in_a_data_block.Add(avg_accesses);
|
|
|
|
cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses);
|
|
|
|
stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses);
|
|
|
|
cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add(
|
|
|
|
stdev_accesses);
|
|
|
|
|
|
|
|
existing_keys_stats.Add(percent_referenced_for_existing_keys);
|
|
|
|
cf_existing_keys_stats_map[cf_name].Add(
|
|
|
|
percent_referenced_for_existing_keys);
|
|
|
|
non_existing_keys_stats.Add(percent_referenced_for_non_existing_keys);
|
|
|
|
cf_non_existing_keys_stats_map[cf_name].Add(
|
|
|
|
percent_referenced_for_non_existing_keys);
|
|
|
|
block_access_stats.Add(percent_accesses_for_existing_keys);
|
|
|
|
cf_block_access_info[cf_name].Add(percent_accesses_for_existing_keys);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stdout,
|
|
|
|
"Histogram on the number of referenced keys existing in a block over "
|
|
|
|
"the total number of keys in a block: \n%s",
|
|
|
|
existing_keys_stats.ToString().c_str());
|
|
|
|
for (auto const& cf_stats : cf_existing_keys_stats_map) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s",
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
|
|
|
}
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(
|
|
|
|
stdout,
|
|
|
|
"Histogram on the number of referenced keys DO NOT exist in a block over "
|
|
|
|
"the total number of keys in a block: \n%s",
|
|
|
|
non_existing_keys_stats.ToString().c_str());
|
|
|
|
for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s",
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
|
|
|
}
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout,
|
|
|
|
"Histogram on the number of accesses on keys exist in a block over "
|
|
|
|
"the total number of accesses in a block: \n%s",
|
|
|
|
block_access_stats.ToString().c_str());
|
|
|
|
for (auto const& cf_stats : cf_block_access_info) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s",
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(
|
|
|
|
stdout,
|
|
|
|
"Histogram on the average number of accesses per key in a block: \n%s",
|
|
|
|
avg_naccesses_per_key_in_a_data_block.ToString().c_str());
|
|
|
|
for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) {
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s",
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout,
|
|
|
|
"Histogram on the standard deviation of the number of accesses per "
|
|
|
|
"key in a block: \n%s",
|
|
|
|
stdev_naccesses_per_key_in_a_data_block.ToString().c_str());
|
|
|
|
for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) {
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s",
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
|
|
|
|
uint64_t total_num_files = 0;
|
|
|
|
uint64_t total_num_blocks = 0;
|
|
|
|
uint64_t total_num_accesses = 0;
|
|
|
|
std::map<TraceType, uint64_t> bt_num_blocks_map;
|
|
|
|
std::map<TableReaderCaller, uint64_t> caller_num_access_map;
|
|
|
|
std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
|
|
|
|
caller_bt_num_access_map;
|
|
|
|
std::map<TableReaderCaller, std::map<uint32_t, uint64_t>>
|
|
|
|
caller_level_num_access_map;
|
|
|
|
for (auto const& cf_aggregates : cf_aggregates_map_) {
|
|
|
|
// Stats per column family.
|
|
|
|
const std::string& cf_name = cf_aggregates.first;
|
|
|
|
uint64_t cf_num_files = 0;
|
|
|
|
uint64_t cf_num_blocks = 0;
|
|
|
|
std::map<TraceType, uint64_t> cf_bt_blocks;
|
|
|
|
uint64_t cf_num_accesses = 0;
|
|
|
|
std::map<TableReaderCaller, uint64_t> cf_caller_num_accesses_map;
|
|
|
|
std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
|
|
|
|
cf_caller_level_num_accesses_map;
|
|
|
|
std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
|
|
|
|
cf_caller_file_num_accesses_map;
|
|
|
|
std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
|
|
|
|
cf_caller_bt_num_accesses_map;
|
|
|
|
total_num_files += cf_aggregates.second.fd_aggregates_map.size();
|
|
|
|
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
|
|
|
|
// Stats per SST file.
|
|
|
|
const uint64_t fd = file_aggregates.first;
|
|
|
|
const uint32_t level = file_aggregates.second.level;
|
|
|
|
cf_num_files++;
|
|
|
|
for (auto const& block_type_aggregates :
|
|
|
|
file_aggregates.second.block_type_aggregates_map) {
|
|
|
|
// Stats per block type.
|
|
|
|
const TraceType type = block_type_aggregates.first;
|
|
|
|
cf_bt_blocks[type] +=
|
|
|
|
block_type_aggregates.second.block_access_info_map.size();
|
|
|
|
total_num_blocks +=
|
|
|
|
block_type_aggregates.second.block_access_info_map.size();
|
|
|
|
bt_num_blocks_map[type] +=
|
|
|
|
block_type_aggregates.second.block_access_info_map.size();
|
|
|
|
for (auto const& block_access_info :
|
|
|
|
block_type_aggregates.second.block_access_info_map) {
|
|
|
|
// Stats per block.
|
|
|
|
cf_num_blocks++;
|
|
|
|
for (auto const& stats :
|
|
|
|
block_access_info.second.caller_num_access_map) {
|
|
|
|
// Stats per caller.
|
|
|
|
const TableReaderCaller caller = stats.first;
|
|
|
|
const uint64_t num_accesses = stats.second;
|
|
|
|
// Overall stats.
|
|
|
|
total_num_accesses += num_accesses;
|
|
|
|
caller_num_access_map[caller] += num_accesses;
|
|
|
|
caller_bt_num_access_map[caller][type] += num_accesses;
|
|
|
|
caller_level_num_access_map[caller][level] += num_accesses;
|
|
|
|
// Column Family stats.
|
Block cache tracing: Fix minor bugs with downsampling and some benchmark results. (#5473)
Summary:
As the code changes for block cache tracing are almost complete, I did a benchmark to compare the performance when block cache tracing is enabled/disabled.
With 1% downsampling ratio, the performance overhead of block cache tracing is negligible. When we trace all block accesses, the throughput drops by 6 folds with 16 threads issuing random reads and all reads are served in block cache.
Setup:
RocksDB: version 6.2
Date: Mon Jun 17 17:11:13 2019
CPU: 24 * Intel Core Processor (Skylake)
CPUCache: 16384 KB
Keys: 20 bytes each
Values: 100 bytes each (100 bytes after compression)
Entries: 10000000
Prefix: 20 bytes
Keys per prefix: 0
RawSize: 1144.4 MB (estimated)
FileSize: 1144.4 MB (estimated)
Write rate: 0 bytes/second
Read rate: 0 ops/second
Compression: NoCompression
Compression sampling rate: 0
Memtablerep: skip_list
Perf Level: 1
I ran the readrandom workload for 1 minute. Detailed throughput results: (ops/second)
Sample rate 0: no block cache tracing.
Sample rate 1: trace all block accesses.
Sample rate 100: trace accesses 1% blocks.
1 thread | | | -- | -- | -- | --
Sample rate | 0 | 1 | 100
1 MB block cache size | 13,094 | 13,166 | 13,341
10 GB block cache size | 202,243 | 188,677 | 229,182
16 threads | | | -- | -- | -- | --
Sample rate | 0 | 1 | 100
1 MB block cache size | 208,761 | 178,700 | 201,872
10 GB block cache size | 2,645,996 | 426,295 | 2,587,605
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5473
Differential Revision: D15869479
Pulled By: HaoyuHuang
fbshipit-source-id: 7ae802abe84811281a6af8649f489887cd7c4618
5 years ago
|
|
|
cf_num_accesses += num_accesses;
|
|
|
|
cf_caller_num_accesses_map[caller] += num_accesses;
|
|
|
|
cf_caller_level_num_accesses_map[caller][level] += num_accesses;
|
|
|
|
cf_caller_file_num_accesses_map[caller][fd] += num_accesses;
|
|
|
|
cf_caller_bt_num_accesses_map[caller][type] += num_accesses;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Print stats.
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
|
|
|
|
fprintf(stdout,
|
|
|
|
" Number of files:%" PRIu64 " Number of blocks: %" PRIu64
|
|
|
|
" Number of accesses: %" PRIu64 "\n",
|
|
|
|
cf_num_files, cf_num_blocks, cf_num_accesses);
|
|
|
|
for (auto block_type : cf_bt_blocks) {
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
|
|
|
|
block_type_to_string(block_type.first).c_str(), block_type.second,
|
|
|
|
percent(block_type.second, cf_num_blocks));
|
|
|
|
}
|
|
|
|
for (auto caller : cf_caller_num_accesses_map) {
|
|
|
|
const uint64_t naccesses = caller.second;
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
fprintf(stdout,
|
|
|
|
"Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
|
|
|
|
caller_to_string(caller.first).c_str(), naccesses,
|
|
|
|
percent(naccesses, cf_num_accesses));
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
|
|
|
|
caller_to_string(caller.first).c_str());
|
|
|
|
for (auto naccess_level :
|
|
|
|
cf_caller_level_num_accesses_map[caller.first]) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64
|
|
|
|
" Percent: %.2f\n",
|
|
|
|
naccess_level.first, naccess_level.second,
|
|
|
|
percent(naccess_level.second, naccesses));
|
|
|
|
}
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per file break down\n",
|
|
|
|
caller_to_string(caller.first).c_str());
|
|
|
|
for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"\t File %" PRIu64 ": Number of accesses: %" PRIu64
|
|
|
|
" Percent: %.2f\n",
|
|
|
|
naccess_file.first, naccess_file.second,
|
|
|
|
percent(naccess_file.second, naccesses));
|
|
|
|
}
|
|
|
|
fprintf(stdout,
|
|
|
|
"Caller %s: Number of accesses per block type break down\n",
|
|
|
|
caller_to_string(caller.first).c_str());
|
|
|
|
for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"\t Block Type %s: Number of accesses: %" PRIu64
|
|
|
|
" Percent: %.2f\n",
|
|
|
|
block_type_to_string(naccess_type.first).c_str(),
|
|
|
|
naccess_type.second, percent(naccess_type.second, naccesses));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
fprintf(stdout, "Overall statistics:\n");
|
|
|
|
fprintf(stdout,
|
|
|
|
"Number of files: %" PRIu64 " Number of blocks: %" PRIu64
|
|
|
|
" Number of accesses: %" PRIu64 "\n",
|
|
|
|
total_num_files, total_num_blocks, total_num_accesses);
|
|
|
|
for (auto block_type : bt_num_blocks_map) {
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
|
|
|
|
block_type_to_string(block_type.first).c_str(), block_type.second,
|
|
|
|
percent(block_type.second, total_num_blocks));
|
|
|
|
}
|
|
|
|
for (auto caller : caller_num_access_map) {
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
print_break_lines(/*num_break_lines=*/1);
|
|
|
|
uint64_t naccesses = caller.second;
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
|
|
|
|
caller_to_string(caller.first).c_str(), naccesses,
|
|
|
|
percent(naccesses, total_num_accesses));
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
|
|
|
|
caller_to_string(caller.first).c_str());
|
|
|
|
for (auto naccess_level : caller_level_num_access_map[caller.first]) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n",
|
|
|
|
naccess_level.first, naccess_level.second,
|
|
|
|
percent(naccess_level.second, naccesses));
|
|
|
|
}
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per block type break down\n",
|
|
|
|
caller_to_string(caller.first).c_str());
|
|
|
|
for (auto naccess_type : caller_bt_num_access_map[caller.first]) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"\t Block Type %s: Number of accesses: %" PRIu64
|
|
|
|
" Percent: %.2f\n",
|
|
|
|
block_type_to_string(naccess_type.first).c_str(),
|
|
|
|
naccess_type.second, percent(naccess_type.second, naccesses));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
std::vector<CacheConfiguration> parse_cache_config_file(
|
|
|
|
const std::string& config_path) {
|
|
|
|
std::ifstream file(config_path);
|
|
|
|
if (!file.is_open()) {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
std::vector<CacheConfiguration> configs;
|
|
|
|
std::string line;
|
|
|
|
while (getline(file, line)) {
|
|
|
|
CacheConfiguration cache_config;
|
|
|
|
std::stringstream ss(line);
|
|
|
|
std::vector<std::string> config_strs;
|
|
|
|
while (ss.good()) {
|
|
|
|
std::string substr;
|
|
|
|
getline(ss, substr, ',');
|
|
|
|
config_strs.push_back(substr);
|
|
|
|
}
|
|
|
|
// Sanity checks.
|
|
|
|
if (config_strs.size() < 3) {
|
|
|
|
fprintf(stderr, "Invalid cache simulator configuration %s\n",
|
|
|
|
line.c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (config_strs[0] != "lru") {
|
|
|
|
fprintf(stderr, "We only support LRU cache %s\n", line.c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
cache_config.cache_name = config_strs[0];
|
|
|
|
cache_config.num_shard_bits = ParseUint32(config_strs[1]);
|
|
|
|
for (uint32_t i = 2; i < config_strs.size(); i++) {
|
|
|
|
uint64_t capacity = ParseUint64(config_strs[i]);
|
|
|
|
if (capacity == 0) {
|
|
|
|
fprintf(stderr, "Invalid cache capacity %s, %s\n",
|
|
|
|
config_strs[i].c_str(), line.c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
cache_config.cache_capacities.push_back(capacity);
|
|
|
|
}
|
|
|
|
configs.push_back(cache_config);
|
|
|
|
}
|
|
|
|
file.close();
|
|
|
|
return configs;
|
|
|
|
}
|
|
|
|
|
|
|
|
int block_cache_trace_analyzer_tool(int argc, char** argv) {
|
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
|
|
|
if (FLAGS_block_cache_trace_path.empty()) {
|
|
|
|
fprintf(stderr, "block cache trace path is empty\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
uint64_t warmup_seconds =
|
|
|
|
FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0;
|
|
|
|
uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0
|
|
|
|
? FLAGS_block_cache_trace_downsample_ratio
|
|
|
|
: 0;
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
std::vector<CacheConfiguration> cache_configs =
|
|
|
|
parse_cache_config_file(FLAGS_block_cache_sim_config_path);
|
|
|
|
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator;
|
|
|
|
if (!cache_configs.empty()) {
|
|
|
|
cache_simulator.reset(new BlockCacheTraceSimulator(
|
|
|
|
warmup_seconds, downsample_ratio, cache_configs));
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
}
|
|
|
|
BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path,
|
|
|
|
FLAGS_output_miss_ratio_curve_path,
|
|
|
|
std::move(cache_simulator));
|
|
|
|
Status s = analyzer.Analyze();
|
|
|
|
if (!s.IsIncomplete()) {
|
|
|
|
// Read all traces.
|
|
|
|
fprintf(stderr, "Cannot process the trace %s\n", s.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
analyzer.PrintStatsSummary();
|
|
|
|
if (FLAGS_print_access_count_stats) {
|
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
analyzer.PrintAccessCountStats();
|
|
|
|
}
|
|
|
|
if (FLAGS_print_block_size_stats) {
|
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
analyzer.PrintBlockSizeStats();
|
|
|
|
}
|
|
|
|
if (FLAGS_print_data_block_access_count_stats) {
|
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
analyzer.PrintDataBlockAccessStats();
|
|
|
|
}
|
|
|
|
print_break_lines(/*num_break_lines=*/3);
|
|
|
|
analyzer.PrintMissRatioCurves();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
Support computing miss ratio curves using sim_cache. (#5449)
Summary:
This PR adds a BlockCacheTraceSimulator that reports the miss ratios given different cache configurations. A cache configuration contains "cache_name,num_shard_bits,cache_capacities". For example, "lru, 1, 1K, 2K, 4M, 4G".
When we replay the trace, we also perform lookups and inserts on the simulated caches.
In the end, it reports the miss ratio for each tuple <cache_name, num_shard_bits, cache_capacity> in a output file.
This PR also adds a main source block_cache_trace_analyzer so that we can run the analyzer in command line.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5449
Test Plan:
Added tests for block_cache_trace_analyzer.
COMPILE_WITH_ASAN=1 make check -j32.
Differential Revision: D15797073
Pulled By: HaoyuHuang
fbshipit-source-id: aef0c5c2e7938f3e8b6a10d4a6a50e6928ecf408
5 years ago
|
|
|
|
|
|
|
#endif // GFLAGS
|
|
|
|
#endif // ROCKSDB_LITE
|