First commit for block cache trace analyzer (#5425)

Summary:
This PR contains the first commit for block cache trace analyzer. It reads a block cache trace file and prints statistics of the traces.

We will extend this class to provide more functionalities.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5425

Differential Revision: D15709580

Pulled By: HaoyuHuang

fbshipit-source-id: 2f43bd2311f460ab569880819d95eeae217c20bb
main
haoyuhuang 5 years ago committed by Facebook Github Bot
parent 58c4aee42e
commit 9bbccda01e
  1. 2
      CMakeLists.txt
  2. 4
      Makefile
  3. 2
      src.mk
  4. 408
      tools/block_cache_trace_analyzer.cc
  5. 131
      tools/block_cache_trace_analyzer.h
  6. 229
      tools/block_cache_trace_analyzer_test.cc
  7. 3
      trace_replay/block_cache_tracer.cc
  8. 2
      trace_replay/block_cache_tracer.h

@ -621,6 +621,7 @@ set(SOURCES
test_util/sync_point_impl.cc
test_util/testutil.cc
test_util/transaction_test_util.cc
tools/block_cache_trace_analyzer.cc
tools/db_bench_tool.cc
tools/dump/db_dump_tool.cc
tools/ldb_cmd.cc
@ -966,6 +967,7 @@ if(WITH_TESTS)
table/merger_test.cc
table/sst_file_reader_test.cc
table/table_test.cc
tools/block_cache_trace_analyzer_test.cc
tools/ldb_cmd_test.cc
tools/reduce_levels_test.cc
tools/sst_dump_test.cc

@ -562,6 +562,7 @@ TESTS = \
sst_file_reader_test \
db_secondary_test \
block_cache_tracer_test \
block_cache_trace_analyzer_test \
PARALLEL_TEST = \
backupable_db_test \
@ -1592,6 +1593,9 @@ db_secondary_test: db/db_impl/db_secondary_test.o db/db_test_util.o $(LIBOBJECTS
block_cache_tracer_test: trace_replay/block_cache_tracer_test.o trace_replay/block_cache_tracer.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
block_cache_trace_analyzer_test: tools/block_cache_trace_analyzer_test.o tools/block_cache_trace_analyzer.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
#-------------------------------------------------
# make install related stuff
INSTALL_PATH ?= /usr/local

@ -240,6 +240,7 @@ TOOL_LIB_SOURCES = \
utilities/blob_db/blob_dump_tool.cc \
ANALYZER_LIB_SOURCES = \
tools/block_cache_trace_analyzer.cc \
tools/trace_analyzer_tool.cc \
MOCK_LIB_SOURCES = \
@ -365,6 +366,7 @@ MAIN_SOURCES = \
table/table_reader_bench.cc \
table/table_test.cc \
third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \
tools/block_cache_trace_analyzer_test.cc \
tools/db_bench.cc \
tools/db_bench_tool_test.cc \
tools/db_sanity_test.cc \

@ -0,0 +1,408 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "tools/block_cache_trace_analyzer.h"
#include <cinttypes>
#include <set>
#include "monitoring/histogram.h"
namespace rocksdb {
namespace {
std::string block_type_to_string(TraceType type) {
switch (type) {
case kBlockTraceFilterBlock:
return "Filter";
case kBlockTraceDataBlock:
return "Data";
case kBlockTraceIndexBlock:
return "Index";
case kBlockTraceRangeDeletionBlock:
return "RangeDeletion";
case kBlockTraceUncompressionDictBlock:
return "UncompressionDict";
default:
break;
}
// This cannot happen.
return "InvalidType";
}
std::string caller_to_string(BlockCacheLookupCaller caller) {
switch (caller) {
case kUserGet:
return "Get";
case kUserMGet:
return "MultiGet";
case kUserIterator:
return "Iterator";
case kPrefetch:
return "Prefetch";
case kCompaction:
return "Compaction";
default:
break;
}
// This cannot happen.
return "InvalidCaller";
}
} // namespace
BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
const std::string& trace_file_path)
: trace_file_path_(trace_file_path) {
env_ = rocksdb::Env::Default();
}
void BlockCacheTraceAnalyzer::RecordAccess(
const BlockCacheTraceRecord& access) {
ColumnFamilyAccessInfoAggregate& cf_aggr = cf_aggregates_map_[access.cf_name];
SSTFileAccessInfoAggregate& file_aggr =
cf_aggr.fd_aggregates_map[access.sst_fd_number];
file_aggr.level = access.level;
BlockTypeAccessInfoAggregate& block_type_aggr =
file_aggr.block_type_aggregates_map[access.block_type];
BlockAccessInfo& block_access_info =
block_type_aggr.block_access_info_map[access.block_key];
block_access_info.AddAccess(access);
}
Status BlockCacheTraceAnalyzer::Analyze() {
std::unique_ptr<TraceReader> trace_reader;
Status s =
NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
if (!s.ok()) {
return s;
}
BlockCacheTraceReader reader(std::move(trace_reader));
s = reader.ReadHeader(&header_);
if (!s.ok()) {
return s;
}
while (s.ok()) {
BlockCacheTraceRecord access;
s = reader.ReadAccess(&access);
if (!s.ok()) {
return s;
}
RecordAccess(access);
}
return Status::OK();
}
void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
HistogramStat bs_stats;
std::map<TraceType, HistogramStat> bt_stats_map;
std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
for (auto const& cf_aggregates : cf_aggregates_map_) {
// Stats per column family.
const std::string& cf_name = cf_aggregates.first;
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
// Stats per SST file.
for (auto const& block_type_aggregates :
file_aggregates.second.block_type_aggregates_map) {
// Stats per block type.
const TraceType type = block_type_aggregates.first;
for (auto const& block_access_info :
block_type_aggregates.second.block_access_info_map) {
// Stats per block.
bs_stats.Add(block_access_info.second.block_size);
bt_stats_map[type].Add(block_access_info.second.block_size);
cf_bt_stats_map[cf_name][type].Add(
block_access_info.second.block_size);
}
}
}
}
fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str());
for (auto const& bt_stats : bt_stats_map) {
fprintf(stdout, "Block size stats for block type %s: \n%s",
block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str());
}
for (auto const& cf_bt_stats : cf_bt_stats_map) {
const std::string& cf_name = cf_bt_stats.first;
for (auto const& bt_stats : cf_bt_stats.second) {
fprintf(stdout,
"Block size stats for column family %s and block type %s: \n%s",
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str());
}
}
}
void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
HistogramStat access_stats;
std::map<TraceType, HistogramStat> bt_stats_map;
std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
for (auto const& cf_aggregates : cf_aggregates_map_) {
// Stats per column family.
const std::string& cf_name = cf_aggregates.first;
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
// Stats per SST file.
for (auto const& block_type_aggregates :
file_aggregates.second.block_type_aggregates_map) {
// Stats per block type.
const TraceType type = block_type_aggregates.first;
for (auto const& block_access_info :
block_type_aggregates.second.block_access_info_map) {
// Stats per block.
access_stats.Add(block_access_info.second.num_accesses);
bt_stats_map[type].Add(block_access_info.second.num_accesses);
cf_bt_stats_map[cf_name][type].Add(
block_access_info.second.num_accesses);
}
}
}
}
fprintf(stdout, "Block access count stats: \n%s",
access_stats.ToString().c_str());
for (auto const& bt_stats : bt_stats_map) {
fprintf(stdout, "Block access count stats for block type %s: \n%s",
block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str());
}
for (auto const& cf_bt_stats : cf_bt_stats_map) {
const std::string& cf_name = cf_bt_stats.first;
for (auto const& bt_stats : cf_bt_stats.second) {
fprintf(stdout,
"Block access count stats for column family %s and block type "
"%s: \n%s",
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
bt_stats.second.ToString().c_str());
}
}
}
void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
HistogramStat existing_keys_stats;
std::map<std::string, HistogramStat> cf_existing_keys_stats_map;
HistogramStat non_existing_keys_stats;
std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map;
HistogramStat block_access_stats;
std::map<std::string, HistogramStat> cf_block_access_info;
for (auto const& cf_aggregates : cf_aggregates_map_) {
// Stats per column family.
const std::string& cf_name = cf_aggregates.first;
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
// Stats per SST file.
for (auto const& block_type_aggregates :
file_aggregates.second.block_type_aggregates_map) {
// Stats per block type.
for (auto const& block_access_info :
block_type_aggregates.second.block_access_info_map) {
// Stats per block.
if (block_access_info.second.num_keys == 0) {
continue;
}
// Use four decimal points.
uint64_t percent_referenced_for_existing_keys = (uint64_t)(
((double)block_access_info.second.key_num_access_map.size() /
(double)block_access_info.second.num_keys) *
10000.0);
uint64_t percent_referenced_for_non_existing_keys =
(uint64_t)(((double)block_access_info.second
.non_exist_key_num_access_map.size() /
(double)block_access_info.second.num_keys) *
10000.0);
uint64_t percent_accesses_for_existing_keys = (uint64_t)(
((double)
block_access_info.second.num_referenced_key_exist_in_block /
(double)block_access_info.second.num_accesses) *
10000.0);
existing_keys_stats.Add(percent_referenced_for_existing_keys);
cf_existing_keys_stats_map[cf_name].Add(
percent_referenced_for_existing_keys);
non_existing_keys_stats.Add(percent_referenced_for_non_existing_keys);
cf_non_existing_keys_stats_map[cf_name].Add(
percent_referenced_for_non_existing_keys);
block_access_stats.Add(percent_accesses_for_existing_keys);
cf_block_access_info[cf_name].Add(percent_accesses_for_existing_keys);
}
}
}
}
fprintf(stdout,
"Histogram on percentage of referenced keys existing in a block over "
"the total number of keys in a block: \n%s",
existing_keys_stats.ToString().c_str());
for (auto const& cf_stats : cf_existing_keys_stats_map) {
fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
}
fprintf(
stdout,
"Histogram on percentage of referenced keys DO NOT exist in a block over "
"the total number of keys in a block: \n%s",
non_existing_keys_stats.ToString().c_str());
for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
}
fprintf(stdout,
"Histogram on percentage of accesses on keys exist in a block over "
"the total number of accesses in a block: \n%s",
block_access_stats.ToString().c_str());
for (auto const& cf_stats : cf_block_access_info) {
fprintf(stdout, "Break down by column family %s: \n%s",
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
}
}
void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
uint64_t total_num_files = 0;
uint64_t total_num_blocks = 0;
uint64_t total_num_accesses = 0;
std::map<TraceType, uint64_t> bt_num_blocks_map;
std::map<BlockCacheLookupCaller, uint64_t> caller_num_access_map;
std::map<BlockCacheLookupCaller, std::map<TraceType, uint64_t>>
caller_bt_num_access_map;
std::map<BlockCacheLookupCaller, std::map<uint32_t, uint64_t>>
caller_level_num_access_map;
for (auto const& cf_aggregates : cf_aggregates_map_) {
// Stats per column family.
const std::string& cf_name = cf_aggregates.first;
uint64_t cf_num_files = 0;
uint64_t cf_num_blocks = 0;
std::map<TraceType, uint64_t> cf_bt_blocks;
uint64_t cf_num_accesses = 0;
std::map<BlockCacheLookupCaller, uint64_t> cf_caller_num_accesses_map;
std::map<BlockCacheLookupCaller, std::map<uint64_t, uint64_t>>
cf_caller_level_num_accesses_map;
std::map<BlockCacheLookupCaller, std::map<uint64_t, uint64_t>>
cf_caller_file_num_accesses_map;
std::map<BlockCacheLookupCaller, std::map<TraceType, uint64_t>>
cf_caller_bt_num_accesses_map;
total_num_files += cf_aggregates.second.fd_aggregates_map.size();
for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
// Stats per SST file.
const uint64_t fd = file_aggregates.first;
const uint32_t level = file_aggregates.second.level;
cf_num_files++;
for (auto const& block_type_aggregates :
file_aggregates.second.block_type_aggregates_map) {
// Stats per block type.
const TraceType type = block_type_aggregates.first;
cf_bt_blocks[type] +=
block_type_aggregates.second.block_access_info_map.size();
total_num_blocks +=
block_type_aggregates.second.block_access_info_map.size();
bt_num_blocks_map[type] +=
block_type_aggregates.second.block_access_info_map.size();
for (auto const& block_access_info :
block_type_aggregates.second.block_access_info_map) {
// Stats per block.
cf_num_blocks++;
for (auto const& stats :
block_access_info.second.caller_num_access_map) {
// Stats per caller.
const BlockCacheLookupCaller caller = stats.first;
const uint64_t num_accesses = stats.second;
// Overall stats.
total_num_accesses += num_accesses;
caller_num_access_map[caller] += num_accesses;
caller_bt_num_access_map[caller][type] += num_accesses;
caller_level_num_access_map[caller][level] += num_accesses;
// Column Family stats.
cf_num_accesses++;
cf_caller_num_accesses_map[caller] += num_accesses;
cf_caller_level_num_accesses_map[caller][level] += num_accesses;
cf_caller_file_num_accesses_map[caller][fd] += num_accesses;
cf_caller_bt_num_accesses_map[caller][type] += num_accesses;
}
}
}
}
// Print stats.
fprintf(
stdout,
"***************************************************************\n");
fprintf(
stdout,
"***************************************************************\n");
fprintf(
stdout,
"***************************************************************\n");
fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
fprintf(stdout,
"Number of files:%" PRIu64 "Number of blocks: %" PRIu64
"Number of accesses: %" PRIu64 "\n",
cf_num_files, cf_num_blocks, cf_num_accesses);
for (auto block_type : cf_bt_blocks) {
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n",
block_type_to_string(block_type.first).c_str(),
block_type.second);
}
for (auto caller : cf_caller_num_accesses_map) {
fprintf(
stdout,
"***************************************************************\n");
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
caller_to_string(caller.first).c_str(), caller.second);
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
caller_to_string(caller.first).c_str());
for (auto naccess_level :
cf_caller_level_num_accesses_map[caller.first]) {
fprintf(stdout,
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64 "\n",
naccess_level.first, naccess_level.second);
}
fprintf(stdout, "Caller %s: Number of accesses per file break down\n",
caller_to_string(caller.first).c_str());
for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) {
fprintf(stdout,
"\t File %" PRIu64 ": Number of accesses: %" PRIu64 "\n",
naccess_file.first, naccess_file.second);
}
fprintf(stdout,
"Caller %s: Number of accesses per block type break down\n",
caller_to_string(caller.first).c_str());
for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) {
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n",
block_type_to_string(naccess_type.first).c_str(),
naccess_type.second);
}
}
}
fprintf(stdout,
"***************************************************************\n");
fprintf(stdout,
"***************************************************************\n");
fprintf(stdout,
"***************************************************************\n");
fprintf(stdout, "Overall statistics:\n");
fprintf(stdout,
"Number of files: %" PRIu64 " Number of blocks: %" PRIu64
" Number of accesses: %" PRIu64 "\n",
total_num_files, total_num_blocks, total_num_accesses);
for (auto block_type : bt_num_blocks_map) {
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n",
block_type_to_string(block_type.first).c_str(), block_type.second);
}
for (auto caller : caller_num_access_map) {
fprintf(
stdout,
"***************************************************************\n");
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
caller_to_string(caller.first).c_str(), caller.second);
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
caller_to_string(caller.first).c_str());
for (auto naccess_level : caller_level_num_access_map[caller.first]) {
fprintf(stdout, "\t Level %d: Number of accesses: %" PRIu64 "\n",
naccess_level.first, naccess_level.second);
}
fprintf(stdout, "Caller %s: Number of accesses per block type break down\n",
caller_to_string(caller.first).c_str());
for (auto naccess_type : caller_bt_num_access_map[caller.first]) {
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n",
block_type_to_string(naccess_type.first).c_str(),
naccess_type.second);
}
}
}
} // namespace rocksdb

@ -0,0 +1,131 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <map>
#include <vector>
#include "rocksdb/env.h"
#include "trace_replay/block_cache_tracer.h"
namespace rocksdb {
// Statistics of a block.
struct BlockAccessInfo {
uint64_t num_accesses = 0;
uint64_t block_size = 0;
uint64_t first_access_time = 0;
uint64_t last_access_time = 0;
uint64_t num_keys = 0;
std::map<std::string, uint64_t>
key_num_access_map; // for keys exist in this block.
std::map<std::string, uint64_t>
non_exist_key_num_access_map; // for keys do not exist in this block.
uint64_t num_referenced_key_exist_in_block = 0;
std::map<BlockCacheLookupCaller, uint64_t> caller_num_access_map;
void AddAccess(const BlockCacheTraceRecord& access) {
if (first_access_time == 0) {
first_access_time = access.access_timestamp;
}
last_access_time = access.access_timestamp;
block_size = access.block_size;
caller_num_access_map[access.caller]++;
num_accesses++;
if (ShouldTraceReferencedKey(access)) {
num_keys = access.num_keys_in_block;
if (access.is_referenced_key_exist_in_block == Boolean::kTrue) {
key_num_access_map[access.referenced_key]++;
num_referenced_key_exist_in_block++;
} else {
non_exist_key_num_access_map[access.referenced_key]++;
}
}
}
};
// Aggregates stats of a block given a block type.
struct BlockTypeAccessInfoAggregate {
std::map<std::string, BlockAccessInfo> block_access_info_map;
};
// Aggregates BlockTypeAggregate given a SST file.
struct SSTFileAccessInfoAggregate {
uint32_t level;
std::map<TraceType, BlockTypeAccessInfoAggregate> block_type_aggregates_map;
};
// Aggregates SSTFileAggregate given a column family.
struct ColumnFamilyAccessInfoAggregate {
std::map<uint64_t, SSTFileAccessInfoAggregate> fd_aggregates_map;
};
class BlockCacheTraceAnalyzer {
public:
BlockCacheTraceAnalyzer(const std::string& trace_file_path);
~BlockCacheTraceAnalyzer() = default;
// No copy and move.
BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete;
BlockCacheTraceAnalyzer& operator=(const BlockCacheTraceAnalyzer&) = delete;
BlockCacheTraceAnalyzer(BlockCacheTraceAnalyzer&&) = delete;
BlockCacheTraceAnalyzer& operator=(BlockCacheTraceAnalyzer&&) = delete;
// Read all access records in the given trace_file, maintains the stats of
// a block, and aggregates the information by block type, sst file, and column
// family. Subsequently, the caller may call Print* functions to print
// statistics.
Status Analyze();
// Print a summary of statistics of the trace, e.g.,
// Number of files: 2 Number of blocks: 50 Number of accesses: 50
// Number of Index blocks: 10
// Number of Filter blocks: 10
// Number of Data blocks: 10
// Number of UncompressionDict blocks: 10
// Number of RangeDeletion blocks: 10
// ***************************************************************
// Caller Get: Number of accesses 10
// Caller Get: Number of accesses per level break down
// Level 0: Number of accesses: 10
// Caller Get: Number of accesses per block type break down
// Block Type Index: Number of accesses: 2
// Block Type Filter: Number of accesses: 2
// Block Type Data: Number of accesses: 2
// Block Type UncompressionDict: Number of accesses: 2
// Block Type RangeDeletion: Number of accesses: 2
void PrintStatsSummary() const;
// Print block size distribution and the distribution break down by block type
// and column family.
void PrintBlockSizeStats() const;
// Print access count distribution and the distribution break down by block
// type and column family.
void PrintAccessCountStats() const;
// Print data block accesses by user Get and Multi-Get.
// It prints out 1) A histogram on the percentage of keys accessed in a data
// block break down by if a referenced key exists in the data block andthe
// histogram break down by column family. 2) A histogram on the percentage of
// accesses on keys exist in a data block and its break down by column family.
void PrintDataBlockAccessStats() const;
const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
TEST_cf_aggregates_map() const {
return cf_aggregates_map_;
}
private:
void RecordAccess(const BlockCacheTraceRecord& access);
rocksdb::Env* env_;
std::string trace_file_path_;
BlockCacheTraceHeader header_;
std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
};
} // namespace rocksdb

@ -0,0 +1,229 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <map>
#include <vector>
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#include "rocksdb/trace_reader_writer.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "tools/block_cache_trace_analyzer.h"
#include "trace_replay/block_cache_tracer.h"
namespace rocksdb {
namespace {
const uint64_t kBlockSize = 1024;
const std::string kBlockKeyPrefix = "test-block-";
const uint32_t kCFId = 0;
const uint32_t kLevel = 1;
const uint64_t kSSTStoringEvenKeys = 100;
const uint64_t kSSTStoringOddKeys = 101;
const std::string kRefKeyPrefix = "test-get-";
const uint64_t kNumKeysInBlock = 1024;
} // namespace
class BlockCacheTracerTest : public testing::Test {
public:
BlockCacheTracerTest() {
test_path_ = test::PerThreadDBPath("block_cache_tracer_test");
env_ = rocksdb::Env::Default();
EXPECT_OK(env_->CreateDir(test_path_));
trace_file_path_ = test_path_ + "/block_cache_trace";
}
~BlockCacheTracerTest() override {
if (getenv("KEEP_DB")) {
printf("The trace file is still at %s\n", trace_file_path_.c_str());
return;
}
EXPECT_OK(env_->DeleteFile(trace_file_path_));
EXPECT_OK(env_->DeleteDir(test_path_));
}
BlockCacheLookupCaller GetCaller(uint32_t key_id) {
uint32_t n = key_id % 5;
switch (n) {
case 0:
return BlockCacheLookupCaller::kPrefetch;
case 1:
return BlockCacheLookupCaller::kCompaction;
case 2:
return BlockCacheLookupCaller::kUserGet;
case 3:
return BlockCacheLookupCaller::kUserMGet;
case 4:
return BlockCacheLookupCaller::kUserIterator;
}
// This cannot happend.
assert(false);
return BlockCacheLookupCaller::kUserGet;
}
void WriteBlockAccess(BlockCacheTraceWriter* writer, uint32_t from_key_id,
TraceType block_type, uint32_t nblocks) {
assert(writer);
for (uint32_t i = 0; i < nblocks; i++) {
uint32_t key_id = from_key_id + i;
BlockCacheTraceRecord record;
record.block_type = block_type;
record.block_size = kBlockSize + key_id;
record.block_key = kBlockKeyPrefix + std::to_string(key_id);
record.access_timestamp = env_->NowMicros();
record.cf_id = kCFId;
record.cf_name = kDefaultColumnFamilyName;
record.caller = GetCaller(key_id);
record.level = kLevel;
if (key_id % 2 == 0) {
record.sst_fd_number = kSSTStoringEvenKeys;
} else {
record.sst_fd_number = kSSTStoringOddKeys;
}
record.is_cache_hit = Boolean::kFalse;
record.no_insert = Boolean::kFalse;
// Provide these fields for all block types.
// The writer should only write these fields for data blocks and the
// caller is either GET or MGET.
record.referenced_key = kRefKeyPrefix + std::to_string(key_id);
record.is_referenced_key_exist_in_block = Boolean::kTrue;
record.num_keys_in_block = kNumKeysInBlock;
ASSERT_OK(writer->WriteBlockAccess(record));
}
}
void AssertBlockAccessInfo(
uint32_t key_id, TraceType type,
const std::map<std::string, BlockAccessInfo>& block_access_info_map) {
auto key_id_str = kBlockKeyPrefix + std::to_string(key_id);
ASSERT_TRUE(block_access_info_map.find(key_id_str) !=
block_access_info_map.end());
auto& block_access_info = block_access_info_map.find(key_id_str)->second;
ASSERT_EQ(1, block_access_info.num_accesses);
ASSERT_EQ(kBlockSize + key_id, block_access_info.block_size);
ASSERT_GT(block_access_info.first_access_time, 0);
ASSERT_GT(block_access_info.last_access_time, 0);
ASSERT_EQ(1, block_access_info.caller_num_access_map.size());
BlockCacheLookupCaller expected_caller = GetCaller(key_id);
ASSERT_TRUE(block_access_info.caller_num_access_map.find(expected_caller) !=
block_access_info.caller_num_access_map.end());
ASSERT_EQ(
1,
block_access_info.caller_num_access_map.find(expected_caller)->second);
if ((expected_caller == BlockCacheLookupCaller::kUserGet ||
expected_caller == BlockCacheLookupCaller::kUserMGet) &&
type == TraceType::kBlockTraceDataBlock) {
ASSERT_EQ(kNumKeysInBlock, block_access_info.num_keys);
ASSERT_EQ(1, block_access_info.key_num_access_map.size());
ASSERT_EQ(0, block_access_info.non_exist_key_num_access_map.size());
ASSERT_EQ(1, block_access_info.num_referenced_key_exist_in_block);
}
}
Env* env_;
EnvOptions env_options_;
std::string trace_file_path_;
std::string test_path_;
};
TEST_F(BlockCacheTracerTest, MixedBlocks) {
{
// Generate a trace file containing a mix of blocks.
// It contains two SST files with 25 blocks of odd numbered block_key in
// kSSTStoringOddKeys and 25 blocks of even numbered blocks_key in
// kSSTStoringEvenKeys.
TraceOptions trace_opt;
std::unique_ptr<TraceWriter> trace_writer;
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
&trace_writer));
BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer));
ASSERT_OK(writer.WriteHeader());
// Write blocks of different types.
WriteBlockAccess(&writer, 0, TraceType::kBlockTraceUncompressionDictBlock,
10);
WriteBlockAccess(&writer, 10, TraceType::kBlockTraceDataBlock, 10);
WriteBlockAccess(&writer, 20, TraceType::kBlockTraceFilterBlock, 10);
WriteBlockAccess(&writer, 30, TraceType::kBlockTraceIndexBlock, 10);
WriteBlockAccess(&writer, 40, TraceType::kBlockTraceRangeDeletionBlock, 10);
ASSERT_OK(env_->FileExists(trace_file_path_));
}
{
// Verify trace file is generated correctly.
std::unique_ptr<TraceReader> trace_reader;
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
&trace_reader));
BlockCacheTraceReader reader(std::move(trace_reader));
BlockCacheTraceHeader header;
ASSERT_OK(reader.ReadHeader(&header));
ASSERT_EQ(kMajorVersion, header.rocksdb_major_version);
ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version);
// Read blocks.
BlockCacheTraceAnalyzer analyzer(trace_file_path_);
// The analyzer ends when it detects an incomplete access record.
ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze());
const uint64_t expected_num_cfs = 1;
std::vector<uint64_t> expected_fds{kSSTStoringOddKeys, kSSTStoringEvenKeys};
const std::vector<TraceType> expected_types{
TraceType::kBlockTraceUncompressionDictBlock,
TraceType::kBlockTraceDataBlock, TraceType::kBlockTraceFilterBlock,
TraceType::kBlockTraceIndexBlock,
TraceType::kBlockTraceRangeDeletionBlock};
const uint64_t expected_num_keys_per_type = 5;
auto& stats = analyzer.TEST_cf_aggregates_map();
ASSERT_EQ(expected_num_cfs, stats.size());
ASSERT_TRUE(stats.find(kDefaultColumnFamilyName) != stats.end());
auto& cf_stats = stats.find(kDefaultColumnFamilyName)->second;
ASSERT_EQ(expected_fds.size(), cf_stats.fd_aggregates_map.size());
for (auto fd_id : expected_fds) {
ASSERT_TRUE(cf_stats.fd_aggregates_map.find(fd_id) !=
cf_stats.fd_aggregates_map.end());
ASSERT_EQ(kLevel, cf_stats.fd_aggregates_map.find(fd_id)->second.level);
auto& block_type_aggregates_map = cf_stats.fd_aggregates_map.find(fd_id)
->second.block_type_aggregates_map;
ASSERT_EQ(expected_types.size(), block_type_aggregates_map.size());
uint32_t key_id = 0;
for (auto type : expected_types) {
ASSERT_TRUE(block_type_aggregates_map.find(type) !=
block_type_aggregates_map.end());
auto& block_access_info_map =
block_type_aggregates_map.find(type)->second.block_access_info_map;
// Each block type has 5 blocks.
ASSERT_EQ(expected_num_keys_per_type, block_access_info_map.size());
for (uint32_t i = 0; i < 10; i++) {
// Verify that odd numbered blocks are stored in kSSTStoringOddKeys
// and even numbered blocks are stored in kSSTStoringEvenKeys.
auto key_id_str = kBlockKeyPrefix + std::to_string(key_id);
if (fd_id == kSSTStoringOddKeys) {
if (key_id % 2 == 1) {
AssertBlockAccessInfo(key_id, type, block_access_info_map);
} else {
ASSERT_TRUE(block_access_info_map.find(key_id_str) ==
block_access_info_map.end());
}
} else {
if (key_id % 2 == 1) {
ASSERT_TRUE(block_access_info_map.find(key_id_str) ==
block_access_info_map.end());
} else {
AssertBlockAccessInfo(key_id, type, block_access_info_map);
}
}
key_id++;
}
}
}
}
}
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -15,12 +15,13 @@ namespace rocksdb {
namespace {
const unsigned int kCharSize = 1;
} // namespace
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record) {
return (record.block_type == TraceType::kBlockTraceDataBlock) &&
(record.caller == BlockCacheLookupCaller::kUserGet ||
record.caller == BlockCacheLookupCaller::kUserMGet);
}
} // namespace
BlockCacheTraceWriter::BlockCacheTraceWriter(
Env* env, const TraceOptions& trace_options,

@ -77,6 +77,8 @@ struct BlockCacheTraceHeader {
uint32_t rocksdb_minor_version;
};
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record);
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
// user-provided TraceWriter. Every RocksDB operation is written as a single
// trace. Each trace will have a timestamp and type, followed by the trace

Loading…
Cancel
Save