|
|
|
@ -27,6 +27,10 @@ DEFINE_string( |
|
|
|
|
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " |
|
|
|
|
"cache_name is lru. cache_capacity can be xK, xM or xG " |
|
|
|
|
"where x is a positive number."); |
|
|
|
|
DEFINE_int32(block_cache_trace_downsample_ratio, 1, |
|
|
|
|
"The trace collected accesses on one in every " |
|
|
|
|
"block_cache_trace_downsample_ratio blocks. We scale " |
|
|
|
|
"down the simulated cache size by this ratio."); |
|
|
|
|
DEFINE_bool(print_block_size_stats, false, |
|
|
|
|
"Print block size distribution and the distribution break down by " |
|
|
|
|
"block type and column family."); |
|
|
|
@ -91,17 +95,29 @@ void print_break_lines(uint32_t num_break_lines) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
double percent(uint64_t numerator, uint64_t denomenator) { |
|
|
|
|
if (denomenator == 0) { |
|
|
|
|
return -1; |
|
|
|
|
} |
|
|
|
|
return static_cast<double>(numerator * 100.0 / denomenator); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
BlockCacheTraceSimulator::BlockCacheTraceSimulator( |
|
|
|
|
uint64_t warmup_seconds, |
|
|
|
|
uint64_t warmup_seconds, uint32_t downsample_ratio, |
|
|
|
|
const std::vector<CacheConfiguration>& cache_configurations) |
|
|
|
|
: warmup_seconds_(warmup_seconds), |
|
|
|
|
downsample_ratio_(downsample_ratio), |
|
|
|
|
cache_configurations_(cache_configurations) { |
|
|
|
|
for (auto const& config : cache_configurations_) { |
|
|
|
|
for (auto cache_capacity : config.cache_capacities) { |
|
|
|
|
sim_caches_.push_back( |
|
|
|
|
NewSimCache(NewLRUCache(cache_capacity, config.num_shard_bits), |
|
|
|
|
// Scale down the cache capacity since the trace contains accesses on
|
|
|
|
|
// 1/'downsample_ratio' blocks.
|
|
|
|
|
uint64_t simulate_cache_capacity = |
|
|
|
|
cache_capacity / downsample_ratio_; |
|
|
|
|
sim_caches_.push_back(NewSimCache( |
|
|
|
|
NewLRUCache(simulate_cache_capacity, config.num_shard_bits), |
|
|
|
|
/*real_cache=*/nullptr, config.num_shard_bits)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -285,11 +301,12 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
fprintf(stdout, "Block access count stats: \n%s", |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Block access count stats: The number of accesses per block.\n%s", |
|
|
|
|
access_stats.ToString().c_str()); |
|
|
|
|
for (auto const& bt_stats : bt_stats_map) { |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, "Block access count stats for block type %s: \n%s", |
|
|
|
|
fprintf(stdout, "Break down by block type %s: \n%s", |
|
|
|
|
block_type_to_string(bt_stats.first).c_str(), |
|
|
|
|
bt_stats.second.ToString().c_str()); |
|
|
|
|
} |
|
|
|
@ -298,7 +315,7 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const { |
|
|
|
|
for (auto const& bt_stats : cf_bt_stats.second) { |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Block access count stats for column family %s and block type " |
|
|
|
|
"Break down by column family %s and block type " |
|
|
|
|
"%s: \n%s", |
|
|
|
|
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(), |
|
|
|
|
bt_stats.second.ToString().c_str()); |
|
|
|
@ -313,6 +330,15 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map; |
|
|
|
|
HistogramStat block_access_stats; |
|
|
|
|
std::map<std::string, HistogramStat> cf_block_access_info; |
|
|
|
|
HistogramStat percent_referenced_bytes; |
|
|
|
|
std::map<std::string, HistogramStat> cf_percent_referenced_bytes; |
|
|
|
|
// Total number of accesses in a data block / number of keys in a data block.
|
|
|
|
|
HistogramStat avg_naccesses_per_key_in_a_data_block; |
|
|
|
|
std::map<std::string, HistogramStat> cf_avg_naccesses_per_key_in_a_data_block; |
|
|
|
|
// The standard deviation on the number of accesses of a key in a data block.
|
|
|
|
|
HistogramStat stdev_naccesses_per_key_in_a_data_block; |
|
|
|
|
std::map<std::string, HistogramStat> |
|
|
|
|
cf_stdev_naccesses_per_key_in_a_data_block; |
|
|
|
|
|
|
|
|
|
for (auto const& cf_aggregates : cf_aggregates_map_) { |
|
|
|
|
// Stats per column family.
|
|
|
|
@ -343,6 +369,20 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
block_access_info.second.num_referenced_key_exist_in_block / |
|
|
|
|
(double)block_access_info.second.num_accesses) * |
|
|
|
|
10000.0); |
|
|
|
|
|
|
|
|
|
HistogramStat hist_naccess_per_key; |
|
|
|
|
for (auto const& key_access : |
|
|
|
|
block_access_info.second.key_num_access_map) { |
|
|
|
|
hist_naccess_per_key.Add(key_access.second); |
|
|
|
|
} |
|
|
|
|
uint64_t avg_accesses = hist_naccess_per_key.Average(); |
|
|
|
|
uint64_t stdev_accesses = hist_naccess_per_key.StandardDeviation(); |
|
|
|
|
avg_naccesses_per_key_in_a_data_block.Add(avg_accesses); |
|
|
|
|
cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses); |
|
|
|
|
stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses); |
|
|
|
|
cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add( |
|
|
|
|
stdev_accesses); |
|
|
|
|
|
|
|
|
|
existing_keys_stats.Add(percent_referenced_for_existing_keys); |
|
|
|
|
cf_existing_keys_stats_map[cf_name].Add( |
|
|
|
|
percent_referenced_for_existing_keys); |
|
|
|
@ -356,7 +396,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Histogram on percentage of referenced keys existing in a block over " |
|
|
|
|
"Histogram on the number of referenced keys existing in a block over " |
|
|
|
|
"the total number of keys in a block: \n%s", |
|
|
|
|
existing_keys_stats.ToString().c_str()); |
|
|
|
|
for (auto const& cf_stats : cf_existing_keys_stats_map) { |
|
|
|
@ -367,7 +407,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf( |
|
|
|
|
stdout, |
|
|
|
|
"Histogram on percentage of referenced keys DO NOT exist in a block over " |
|
|
|
|
"Histogram on the number of referenced keys DO NOT exist in a block over " |
|
|
|
|
"the total number of keys in a block: \n%s", |
|
|
|
|
non_existing_keys_stats.ToString().c_str()); |
|
|
|
|
for (auto const& cf_stats : cf_non_existing_keys_stats_map) { |
|
|
|
@ -377,7 +417,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
} |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Histogram on percentage of accesses on keys exist in a block over " |
|
|
|
|
"Histogram on the number of accesses on keys exist in a block over " |
|
|
|
|
"the total number of accesses in a block: \n%s", |
|
|
|
|
block_access_stats.ToString().c_str()); |
|
|
|
|
for (auto const& cf_stats : cf_block_access_info) { |
|
|
|
@ -385,6 +425,24 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { |
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s", |
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); |
|
|
|
|
} |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf( |
|
|
|
|
stdout, |
|
|
|
|
"Histogram on the average number of accesses per key in a block: \n%s", |
|
|
|
|
avg_naccesses_per_key_in_a_data_block.ToString().c_str()); |
|
|
|
|
for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) { |
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s", |
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); |
|
|
|
|
} |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Histogram on the standard deviation of the number of accesses per " |
|
|
|
|
"key in a block: \n%s", |
|
|
|
|
stdev_naccesses_per_key_in_a_data_block.ToString().c_str()); |
|
|
|
|
for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) { |
|
|
|
|
fprintf(stdout, "Break down by column family %s: \n%s", |
|
|
|
|
cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void BlockCacheTraceAnalyzer::PrintStatsSummary() const { |
|
|
|
@ -460,36 +518,45 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const { |
|
|
|
|
" Number of accesses: %" PRIu64 "\n", |
|
|
|
|
cf_num_files, cf_num_blocks, cf_num_accesses); |
|
|
|
|
for (auto block_type : cf_bt_blocks) { |
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", |
|
|
|
|
block_type_to_string(block_type.first).c_str(), |
|
|
|
|
block_type.second); |
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n", |
|
|
|
|
block_type_to_string(block_type.first).c_str(), block_type.second, |
|
|
|
|
percent(block_type.second, cf_num_blocks)); |
|
|
|
|
} |
|
|
|
|
for (auto caller : cf_caller_num_accesses_map) { |
|
|
|
|
const uint64_t naccesses = caller.second; |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", |
|
|
|
|
caller_to_string(caller.first).c_str(), caller.second); |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n", |
|
|
|
|
caller_to_string(caller.first).c_str(), naccesses, |
|
|
|
|
percent(naccesses, cf_num_accesses)); |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per level break down\n", |
|
|
|
|
caller_to_string(caller.first).c_str()); |
|
|
|
|
for (auto naccess_level : |
|
|
|
|
cf_caller_level_num_accesses_map[caller.first]) { |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64 "\n", |
|
|
|
|
naccess_level.first, naccess_level.second); |
|
|
|
|
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64 |
|
|
|
|
" Percent: %.2f\n", |
|
|
|
|
naccess_level.first, naccess_level.second, |
|
|
|
|
percent(naccess_level.second, naccesses)); |
|
|
|
|
} |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per file break down\n", |
|
|
|
|
caller_to_string(caller.first).c_str()); |
|
|
|
|
for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) { |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"\t File %" PRIu64 ": Number of accesses: %" PRIu64 "\n", |
|
|
|
|
naccess_file.first, naccess_file.second); |
|
|
|
|
"\t File %" PRIu64 ": Number of accesses: %" PRIu64 |
|
|
|
|
" Percent: %.2f\n", |
|
|
|
|
naccess_file.first, naccess_file.second, |
|
|
|
|
percent(naccess_file.second, naccesses)); |
|
|
|
|
} |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"Caller %s: Number of accesses per block type break down\n", |
|
|
|
|
caller_to_string(caller.first).c_str()); |
|
|
|
|
for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) { |
|
|
|
|
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"\t Block Type %s: Number of accesses: %" PRIu64 |
|
|
|
|
" Percent: %.2f\n", |
|
|
|
|
block_type_to_string(naccess_type.first).c_str(), |
|
|
|
|
naccess_type.second); |
|
|
|
|
naccess_type.second, percent(naccess_type.second, naccesses)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -500,25 +567,32 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const { |
|
|
|
|
" Number of accesses: %" PRIu64 "\n", |
|
|
|
|
total_num_files, total_num_blocks, total_num_accesses); |
|
|
|
|
for (auto block_type : bt_num_blocks_map) { |
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", |
|
|
|
|
block_type_to_string(block_type.first).c_str(), block_type.second); |
|
|
|
|
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n", |
|
|
|
|
block_type_to_string(block_type.first).c_str(), block_type.second, |
|
|
|
|
percent(block_type.second, total_num_blocks)); |
|
|
|
|
} |
|
|
|
|
for (auto caller : caller_num_access_map) { |
|
|
|
|
print_break_lines(/*num_break_lines=*/1); |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", |
|
|
|
|
caller_to_string(caller.first).c_str(), caller.second); |
|
|
|
|
uint64_t naccesses = caller.second; |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n", |
|
|
|
|
caller_to_string(caller.first).c_str(), naccesses, |
|
|
|
|
percent(naccesses, total_num_accesses)); |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per level break down\n", |
|
|
|
|
caller_to_string(caller.first).c_str()); |
|
|
|
|
for (auto naccess_level : caller_level_num_access_map[caller.first]) { |
|
|
|
|
fprintf(stdout, "\t Level %d: Number of accesses: %" PRIu64 "\n", |
|
|
|
|
naccess_level.first, naccess_level.second); |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n", |
|
|
|
|
naccess_level.first, naccess_level.second, |
|
|
|
|
percent(naccess_level.second, naccesses)); |
|
|
|
|
} |
|
|
|
|
fprintf(stdout, "Caller %s: Number of accesses per block type break down\n", |
|
|
|
|
caller_to_string(caller.first).c_str()); |
|
|
|
|
for (auto naccess_type : caller_bt_num_access_map[caller.first]) { |
|
|
|
|
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", |
|
|
|
|
fprintf(stdout, |
|
|
|
|
"\t Block Type %s: Number of accesses: %" PRIu64 |
|
|
|
|
" Percent: %.2f\n", |
|
|
|
|
block_type_to_string(naccess_type.first).c_str(), |
|
|
|
|
naccess_type.second); |
|
|
|
|
naccess_type.second, percent(naccess_type.second, naccesses)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -575,12 +649,15 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) { |
|
|
|
|
} |
|
|
|
|
uint64_t warmup_seconds = |
|
|
|
|
FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0; |
|
|
|
|
uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0 |
|
|
|
|
? FLAGS_block_cache_trace_downsample_ratio |
|
|
|
|
: 0; |
|
|
|
|
std::vector<CacheConfiguration> cache_configs = |
|
|
|
|
parse_cache_config_file(FLAGS_block_cache_sim_config_path); |
|
|
|
|
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator; |
|
|
|
|
if (!cache_configs.empty()) { |
|
|
|
|
cache_simulator.reset( |
|
|
|
|
new BlockCacheTraceSimulator(warmup_seconds, cache_configs)); |
|
|
|
|
cache_simulator.reset(new BlockCacheTraceSimulator( |
|
|
|
|
warmup_seconds, downsample_ratio, cache_configs)); |
|
|
|
|
} |
|
|
|
|
BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path, |
|
|
|
|
FLAGS_output_miss_ratio_curve_path, |
|
|
|
|