diff --git a/CMakeLists.txt b/CMakeLists.txt index cef1f85d7..006f67986 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -621,6 +621,7 @@ set(SOURCES test_util/sync_point_impl.cc test_util/testutil.cc test_util/transaction_test_util.cc + tools/block_cache_trace_analyzer.cc tools/db_bench_tool.cc tools/dump/db_dump_tool.cc tools/ldb_cmd.cc @@ -966,6 +967,7 @@ if(WITH_TESTS) table/merger_test.cc table/sst_file_reader_test.cc table/table_test.cc + tools/block_cache_trace_analyzer_test.cc tools/ldb_cmd_test.cc tools/reduce_levels_test.cc tools/sst_dump_test.cc diff --git a/Makefile b/Makefile index 3ee85ad67..425c75eb5 100644 --- a/Makefile +++ b/Makefile @@ -562,6 +562,7 @@ TESTS = \ sst_file_reader_test \ db_secondary_test \ block_cache_tracer_test \ + block_cache_trace_analyzer_test \ PARALLEL_TEST = \ backupable_db_test \ @@ -1592,6 +1593,9 @@ db_secondary_test: db/db_impl/db_secondary_test.o db/db_test_util.o $(LIBOBJECTS block_cache_tracer_test: trace_replay/block_cache_tracer_test.o trace_replay/block_cache_tracer.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +block_cache_trace_analyzer_test: tools/block_cache_trace_analyzer_test.o tools/block_cache_trace_analyzer.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + #------------------------------------------------- # make install related stuff INSTALL_PATH ?= /usr/local diff --git a/src.mk b/src.mk index 6303997cd..150b1c10a 100644 --- a/src.mk +++ b/src.mk @@ -240,6 +240,7 @@ TOOL_LIB_SOURCES = \ utilities/blob_db/blob_dump_tool.cc \ ANALYZER_LIB_SOURCES = \ + tools/block_cache_trace_analyzer.cc \ tools/trace_analyzer_tool.cc \ MOCK_LIB_SOURCES = \ @@ -365,6 +366,7 @@ MAIN_SOURCES = \ table/table_reader_bench.cc \ table/table_test.cc \ third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \ + tools/block_cache_trace_analyzer_test.cc \ tools/db_bench.cc \ tools/db_bench_tool_test.cc \ tools/db_sanity_test.cc \ diff --git a/tools/block_cache_trace_analyzer.cc b/tools/block_cache_trace_analyzer.cc new file mode 100644 index 000000000..5d9b2d184 --- /dev/null +++ b/tools/block_cache_trace_analyzer.cc @@ -0,0 +1,408 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "tools/block_cache_trace_analyzer.h" + +#include +#include +#include "monitoring/histogram.h" + +namespace rocksdb { +namespace { +std::string block_type_to_string(TraceType type) { + switch (type) { + case kBlockTraceFilterBlock: + return "Filter"; + case kBlockTraceDataBlock: + return "Data"; + case kBlockTraceIndexBlock: + return "Index"; + case kBlockTraceRangeDeletionBlock: + return "RangeDeletion"; + case kBlockTraceUncompressionDictBlock: + return "UncompressionDict"; + default: + break; + } + // This cannot happen. + return "InvalidType"; +} + +std::string caller_to_string(BlockCacheLookupCaller caller) { + switch (caller) { + case kUserGet: + return "Get"; + case kUserMGet: + return "MultiGet"; + case kUserIterator: + return "Iterator"; + case kPrefetch: + return "Prefetch"; + case kCompaction: + return "Compaction"; + default: + break; + } + // This cannot happen. + return "InvalidCaller"; +} +} // namespace + +BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer( + const std::string& trace_file_path) + : trace_file_path_(trace_file_path) { + env_ = rocksdb::Env::Default(); +} + +void BlockCacheTraceAnalyzer::RecordAccess( + const BlockCacheTraceRecord& access) { + ColumnFamilyAccessInfoAggregate& cf_aggr = cf_aggregates_map_[access.cf_name]; + SSTFileAccessInfoAggregate& file_aggr = + cf_aggr.fd_aggregates_map[access.sst_fd_number]; + file_aggr.level = access.level; + BlockTypeAccessInfoAggregate& block_type_aggr = + file_aggr.block_type_aggregates_map[access.block_type]; + BlockAccessInfo& block_access_info = + block_type_aggr.block_access_info_map[access.block_key]; + block_access_info.AddAccess(access); +} + +Status BlockCacheTraceAnalyzer::Analyze() { + std::unique_ptr trace_reader; + Status s = + NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader); + if (!s.ok()) { + return s; + } + BlockCacheTraceReader reader(std::move(trace_reader)); + s = reader.ReadHeader(&header_); + if (!s.ok()) { + return s; + } + while (s.ok()) { + BlockCacheTraceRecord access; + s = reader.ReadAccess(&access); + if (!s.ok()) { + return s; + } + RecordAccess(access); + } + return Status::OK(); +} + +void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const { + HistogramStat bs_stats; + std::map bt_stats_map; + std::map> cf_bt_stats_map; + for (auto const& cf_aggregates : cf_aggregates_map_) { + // Stats per column family. + const std::string& cf_name = cf_aggregates.first; + for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) { + // Stats per SST file. + for (auto const& block_type_aggregates : + file_aggregates.second.block_type_aggregates_map) { + // Stats per block type. + const TraceType type = block_type_aggregates.first; + for (auto const& block_access_info : + block_type_aggregates.second.block_access_info_map) { + // Stats per block. + bs_stats.Add(block_access_info.second.block_size); + bt_stats_map[type].Add(block_access_info.second.block_size); + cf_bt_stats_map[cf_name][type].Add( + block_access_info.second.block_size); + } + } + } + } + fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str()); + for (auto const& bt_stats : bt_stats_map) { + fprintf(stdout, "Block size stats for block type %s: \n%s", + block_type_to_string(bt_stats.first).c_str(), + bt_stats.second.ToString().c_str()); + } + for (auto const& cf_bt_stats : cf_bt_stats_map) { + const std::string& cf_name = cf_bt_stats.first; + for (auto const& bt_stats : cf_bt_stats.second) { + fprintf(stdout, + "Block size stats for column family %s and block type %s: \n%s", + cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(), + bt_stats.second.ToString().c_str()); + } + } +} + +void BlockCacheTraceAnalyzer::PrintAccessCountStats() const { + HistogramStat access_stats; + std::map bt_stats_map; + std::map> cf_bt_stats_map; + for (auto const& cf_aggregates : cf_aggregates_map_) { + // Stats per column family. + const std::string& cf_name = cf_aggregates.first; + for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) { + // Stats per SST file. + for (auto const& block_type_aggregates : + file_aggregates.second.block_type_aggregates_map) { + // Stats per block type. + const TraceType type = block_type_aggregates.first; + for (auto const& block_access_info : + block_type_aggregates.second.block_access_info_map) { + // Stats per block. + access_stats.Add(block_access_info.second.num_accesses); + bt_stats_map[type].Add(block_access_info.second.num_accesses); + cf_bt_stats_map[cf_name][type].Add( + block_access_info.second.num_accesses); + } + } + } + } + fprintf(stdout, "Block access count stats: \n%s", + access_stats.ToString().c_str()); + for (auto const& bt_stats : bt_stats_map) { + fprintf(stdout, "Block access count stats for block type %s: \n%s", + block_type_to_string(bt_stats.first).c_str(), + bt_stats.second.ToString().c_str()); + } + for (auto const& cf_bt_stats : cf_bt_stats_map) { + const std::string& cf_name = cf_bt_stats.first; + for (auto const& bt_stats : cf_bt_stats.second) { + fprintf(stdout, + "Block access count stats for column family %s and block type " + "%s: \n%s", + cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(), + bt_stats.second.ToString().c_str()); + } + } +} + +void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { + HistogramStat existing_keys_stats; + std::map cf_existing_keys_stats_map; + HistogramStat non_existing_keys_stats; + std::map cf_non_existing_keys_stats_map; + HistogramStat block_access_stats; + std::map cf_block_access_info; + + for (auto const& cf_aggregates : cf_aggregates_map_) { + // Stats per column family. + const std::string& cf_name = cf_aggregates.first; + for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) { + // Stats per SST file. + for (auto const& block_type_aggregates : + file_aggregates.second.block_type_aggregates_map) { + // Stats per block type. + for (auto const& block_access_info : + block_type_aggregates.second.block_access_info_map) { + // Stats per block. + if (block_access_info.second.num_keys == 0) { + continue; + } + // Use four decimal points. + uint64_t percent_referenced_for_existing_keys = (uint64_t)( + ((double)block_access_info.second.key_num_access_map.size() / + (double)block_access_info.second.num_keys) * + 10000.0); + uint64_t percent_referenced_for_non_existing_keys = + (uint64_t)(((double)block_access_info.second + .non_exist_key_num_access_map.size() / + (double)block_access_info.second.num_keys) * + 10000.0); + uint64_t percent_accesses_for_existing_keys = (uint64_t)( + ((double) + block_access_info.second.num_referenced_key_exist_in_block / + (double)block_access_info.second.num_accesses) * + 10000.0); + existing_keys_stats.Add(percent_referenced_for_existing_keys); + cf_existing_keys_stats_map[cf_name].Add( + percent_referenced_for_existing_keys); + non_existing_keys_stats.Add(percent_referenced_for_non_existing_keys); + cf_non_existing_keys_stats_map[cf_name].Add( + percent_referenced_for_non_existing_keys); + block_access_stats.Add(percent_accesses_for_existing_keys); + cf_block_access_info[cf_name].Add(percent_accesses_for_existing_keys); + } + } + } + } + fprintf(stdout, + "Histogram on percentage of referenced keys existing in a block over " + "the total number of keys in a block: \n%s", + existing_keys_stats.ToString().c_str()); + for (auto const& cf_stats : cf_existing_keys_stats_map) { + fprintf(stdout, "Break down by column family %s: \n%s", + cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); + } + fprintf( + stdout, + "Histogram on percentage of referenced keys DO NOT exist in a block over " + "the total number of keys in a block: \n%s", + non_existing_keys_stats.ToString().c_str()); + for (auto const& cf_stats : cf_non_existing_keys_stats_map) { + fprintf(stdout, "Break down by column family %s: \n%s", + cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); + } + fprintf(stdout, + "Histogram on percentage of accesses on keys exist in a block over " + "the total number of accesses in a block: \n%s", + block_access_stats.ToString().c_str()); + for (auto const& cf_stats : cf_block_access_info) { + fprintf(stdout, "Break down by column family %s: \n%s", + cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); + } +} + +void BlockCacheTraceAnalyzer::PrintStatsSummary() const { + uint64_t total_num_files = 0; + uint64_t total_num_blocks = 0; + uint64_t total_num_accesses = 0; + std::map bt_num_blocks_map; + std::map caller_num_access_map; + std::map> + caller_bt_num_access_map; + std::map> + caller_level_num_access_map; + for (auto const& cf_aggregates : cf_aggregates_map_) { + // Stats per column family. + const std::string& cf_name = cf_aggregates.first; + uint64_t cf_num_files = 0; + uint64_t cf_num_blocks = 0; + std::map cf_bt_blocks; + uint64_t cf_num_accesses = 0; + std::map cf_caller_num_accesses_map; + std::map> + cf_caller_level_num_accesses_map; + std::map> + cf_caller_file_num_accesses_map; + std::map> + cf_caller_bt_num_accesses_map; + total_num_files += cf_aggregates.second.fd_aggregates_map.size(); + for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) { + // Stats per SST file. + const uint64_t fd = file_aggregates.first; + const uint32_t level = file_aggregates.second.level; + cf_num_files++; + for (auto const& block_type_aggregates : + file_aggregates.second.block_type_aggregates_map) { + // Stats per block type. + const TraceType type = block_type_aggregates.first; + cf_bt_blocks[type] += + block_type_aggregates.second.block_access_info_map.size(); + total_num_blocks += + block_type_aggregates.second.block_access_info_map.size(); + bt_num_blocks_map[type] += + block_type_aggregates.second.block_access_info_map.size(); + for (auto const& block_access_info : + block_type_aggregates.second.block_access_info_map) { + // Stats per block. + cf_num_blocks++; + for (auto const& stats : + block_access_info.second.caller_num_access_map) { + // Stats per caller. + const BlockCacheLookupCaller caller = stats.first; + const uint64_t num_accesses = stats.second; + // Overall stats. + total_num_accesses += num_accesses; + caller_num_access_map[caller] += num_accesses; + caller_bt_num_access_map[caller][type] += num_accesses; + caller_level_num_access_map[caller][level] += num_accesses; + // Column Family stats. + cf_num_accesses++; + cf_caller_num_accesses_map[caller] += num_accesses; + cf_caller_level_num_accesses_map[caller][level] += num_accesses; + cf_caller_file_num_accesses_map[caller][fd] += num_accesses; + cf_caller_bt_num_accesses_map[caller][type] += num_accesses; + } + } + } + } + + // Print stats. + fprintf( + stdout, + "***************************************************************\n"); + fprintf( + stdout, + "***************************************************************\n"); + fprintf( + stdout, + "***************************************************************\n"); + fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str()); + fprintf(stdout, + "Number of files:%" PRIu64 "Number of blocks: %" PRIu64 + "Number of accesses: %" PRIu64 "\n", + cf_num_files, cf_num_blocks, cf_num_accesses); + for (auto block_type : cf_bt_blocks) { + fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", + block_type_to_string(block_type.first).c_str(), + block_type.second); + } + for (auto caller : cf_caller_num_accesses_map) { + fprintf( + stdout, + "***************************************************************\n"); + fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", + caller_to_string(caller.first).c_str(), caller.second); + fprintf(stdout, "Caller %s: Number of accesses per level break down\n", + caller_to_string(caller.first).c_str()); + for (auto naccess_level : + cf_caller_level_num_accesses_map[caller.first]) { + fprintf(stdout, + "\t Level %" PRIu64 ": Number of accesses: %" PRIu64 "\n", + naccess_level.first, naccess_level.second); + } + fprintf(stdout, "Caller %s: Number of accesses per file break down\n", + caller_to_string(caller.first).c_str()); + for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) { + fprintf(stdout, + "\t File %" PRIu64 ": Number of accesses: %" PRIu64 "\n", + naccess_file.first, naccess_file.second); + } + fprintf(stdout, + "Caller %s: Number of accesses per block type break down\n", + caller_to_string(caller.first).c_str()); + for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) { + fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", + block_type_to_string(naccess_type.first).c_str(), + naccess_type.second); + } + } + } + fprintf(stdout, + "***************************************************************\n"); + fprintf(stdout, + "***************************************************************\n"); + fprintf(stdout, + "***************************************************************\n"); + fprintf(stdout, "Overall statistics:\n"); + fprintf(stdout, + "Number of files: %" PRIu64 " Number of blocks: %" PRIu64 + " Number of accesses: %" PRIu64 "\n", + total_num_files, total_num_blocks, total_num_accesses); + for (auto block_type : bt_num_blocks_map) { + fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", + block_type_to_string(block_type.first).c_str(), block_type.second); + } + for (auto caller : caller_num_access_map) { + fprintf( + stdout, + "***************************************************************\n"); + fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", + caller_to_string(caller.first).c_str(), caller.second); + fprintf(stdout, "Caller %s: Number of accesses per level break down\n", + caller_to_string(caller.first).c_str()); + for (auto naccess_level : caller_level_num_access_map[caller.first]) { + fprintf(stdout, "\t Level %d: Number of accesses: %" PRIu64 "\n", + naccess_level.first, naccess_level.second); + } + fprintf(stdout, "Caller %s: Number of accesses per block type break down\n", + caller_to_string(caller.first).c_str()); + for (auto naccess_type : caller_bt_num_access_map[caller.first]) { + fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", + block_type_to_string(naccess_type.first).c_str(), + naccess_type.second); + } + } +} + +} // namespace rocksdb diff --git a/tools/block_cache_trace_analyzer.h b/tools/block_cache_trace_analyzer.h new file mode 100644 index 000000000..9dde8a939 --- /dev/null +++ b/tools/block_cache_trace_analyzer.h @@ -0,0 +1,131 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "rocksdb/env.h" +#include "trace_replay/block_cache_tracer.h" + +namespace rocksdb { + +// Statistics of a block. +struct BlockAccessInfo { + uint64_t num_accesses = 0; + uint64_t block_size = 0; + uint64_t first_access_time = 0; + uint64_t last_access_time = 0; + uint64_t num_keys = 0; + std::map + key_num_access_map; // for keys exist in this block. + std::map + non_exist_key_num_access_map; // for keys do not exist in this block. + uint64_t num_referenced_key_exist_in_block = 0; + std::map caller_num_access_map; + + void AddAccess(const BlockCacheTraceRecord& access) { + if (first_access_time == 0) { + first_access_time = access.access_timestamp; + } + last_access_time = access.access_timestamp; + block_size = access.block_size; + caller_num_access_map[access.caller]++; + num_accesses++; + if (ShouldTraceReferencedKey(access)) { + num_keys = access.num_keys_in_block; + + if (access.is_referenced_key_exist_in_block == Boolean::kTrue) { + key_num_access_map[access.referenced_key]++; + num_referenced_key_exist_in_block++; + } else { + non_exist_key_num_access_map[access.referenced_key]++; + } + } + } +}; + +// Aggregates stats of a block given a block type. +struct BlockTypeAccessInfoAggregate { + std::map block_access_info_map; +}; + +// Aggregates BlockTypeAggregate given a SST file. +struct SSTFileAccessInfoAggregate { + uint32_t level; + std::map block_type_aggregates_map; +}; + +// Aggregates SSTFileAggregate given a column family. +struct ColumnFamilyAccessInfoAggregate { + std::map fd_aggregates_map; +}; + +class BlockCacheTraceAnalyzer { + public: + BlockCacheTraceAnalyzer(const std::string& trace_file_path); + ~BlockCacheTraceAnalyzer() = default; + // No copy and move. + BlockCacheTraceAnalyzer(const BlockCacheTraceAnalyzer&) = delete; + BlockCacheTraceAnalyzer& operator=(const BlockCacheTraceAnalyzer&) = delete; + BlockCacheTraceAnalyzer(BlockCacheTraceAnalyzer&&) = delete; + BlockCacheTraceAnalyzer& operator=(BlockCacheTraceAnalyzer&&) = delete; + + // Read all access records in the given trace_file, maintains the stats of + // a block, and aggregates the information by block type, sst file, and column + // family. Subsequently, the caller may call Print* functions to print + // statistics. + Status Analyze(); + + // Print a summary of statistics of the trace, e.g., + // Number of files: 2 Number of blocks: 50 Number of accesses: 50 + // Number of Index blocks: 10 + // Number of Filter blocks: 10 + // Number of Data blocks: 10 + // Number of UncompressionDict blocks: 10 + // Number of RangeDeletion blocks: 10 + // *************************************************************** + // Caller Get: Number of accesses 10 + // Caller Get: Number of accesses per level break down + // Level 0: Number of accesses: 10 + // Caller Get: Number of accesses per block type break down + // Block Type Index: Number of accesses: 2 + // Block Type Filter: Number of accesses: 2 + // Block Type Data: Number of accesses: 2 + // Block Type UncompressionDict: Number of accesses: 2 + // Block Type RangeDeletion: Number of accesses: 2 + void PrintStatsSummary() const; + + // Print block size distribution and the distribution break down by block type + // and column family. + void PrintBlockSizeStats() const; + + // Print access count distribution and the distribution break down by block + // type and column family. + void PrintAccessCountStats() const; + + // Print data block accesses by user Get and Multi-Get. + // It prints out 1) A histogram on the percentage of keys accessed in a data + // block break down by if a referenced key exists in the data block andthe + // histogram break down by column family. 2) A histogram on the percentage of + // accesses on keys exist in a data block and its break down by column family. + void PrintDataBlockAccessStats() const; + + const std::map& + TEST_cf_aggregates_map() const { + return cf_aggregates_map_; + } + + private: + void RecordAccess(const BlockCacheTraceRecord& access); + + rocksdb::Env* env_; + std::string trace_file_path_; + BlockCacheTraceHeader header_; + std::map cf_aggregates_map_; +}; + +} // namespace rocksdb diff --git a/tools/block_cache_trace_analyzer_test.cc b/tools/block_cache_trace_analyzer_test.cc new file mode 100644 index 000000000..96f52c1ec --- /dev/null +++ b/tools/block_cache_trace_analyzer_test.cc @@ -0,0 +1,229 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include +#include + +#include "rocksdb/env.h" +#include "rocksdb/status.h" +#include "rocksdb/trace_reader_writer.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "tools/block_cache_trace_analyzer.h" +#include "trace_replay/block_cache_tracer.h" + +namespace rocksdb { + +namespace { +const uint64_t kBlockSize = 1024; +const std::string kBlockKeyPrefix = "test-block-"; +const uint32_t kCFId = 0; +const uint32_t kLevel = 1; +const uint64_t kSSTStoringEvenKeys = 100; +const uint64_t kSSTStoringOddKeys = 101; +const std::string kRefKeyPrefix = "test-get-"; +const uint64_t kNumKeysInBlock = 1024; +} // namespace + +class BlockCacheTracerTest : public testing::Test { + public: + BlockCacheTracerTest() { + test_path_ = test::PerThreadDBPath("block_cache_tracer_test"); + env_ = rocksdb::Env::Default(); + EXPECT_OK(env_->CreateDir(test_path_)); + trace_file_path_ = test_path_ + "/block_cache_trace"; + } + + ~BlockCacheTracerTest() override { + if (getenv("KEEP_DB")) { + printf("The trace file is still at %s\n", trace_file_path_.c_str()); + return; + } + EXPECT_OK(env_->DeleteFile(trace_file_path_)); + EXPECT_OK(env_->DeleteDir(test_path_)); + } + + BlockCacheLookupCaller GetCaller(uint32_t key_id) { + uint32_t n = key_id % 5; + switch (n) { + case 0: + return BlockCacheLookupCaller::kPrefetch; + case 1: + return BlockCacheLookupCaller::kCompaction; + case 2: + return BlockCacheLookupCaller::kUserGet; + case 3: + return BlockCacheLookupCaller::kUserMGet; + case 4: + return BlockCacheLookupCaller::kUserIterator; + } + // This cannot happend. + assert(false); + return BlockCacheLookupCaller::kUserGet; + } + + void WriteBlockAccess(BlockCacheTraceWriter* writer, uint32_t from_key_id, + TraceType block_type, uint32_t nblocks) { + assert(writer); + for (uint32_t i = 0; i < nblocks; i++) { + uint32_t key_id = from_key_id + i; + BlockCacheTraceRecord record; + record.block_type = block_type; + record.block_size = kBlockSize + key_id; + record.block_key = kBlockKeyPrefix + std::to_string(key_id); + record.access_timestamp = env_->NowMicros(); + record.cf_id = kCFId; + record.cf_name = kDefaultColumnFamilyName; + record.caller = GetCaller(key_id); + record.level = kLevel; + if (key_id % 2 == 0) { + record.sst_fd_number = kSSTStoringEvenKeys; + } else { + record.sst_fd_number = kSSTStoringOddKeys; + } + record.is_cache_hit = Boolean::kFalse; + record.no_insert = Boolean::kFalse; + // Provide these fields for all block types. + // The writer should only write these fields for data blocks and the + // caller is either GET or MGET. + record.referenced_key = kRefKeyPrefix + std::to_string(key_id); + record.is_referenced_key_exist_in_block = Boolean::kTrue; + record.num_keys_in_block = kNumKeysInBlock; + ASSERT_OK(writer->WriteBlockAccess(record)); + } + } + + void AssertBlockAccessInfo( + uint32_t key_id, TraceType type, + const std::map& block_access_info_map) { + auto key_id_str = kBlockKeyPrefix + std::to_string(key_id); + ASSERT_TRUE(block_access_info_map.find(key_id_str) != + block_access_info_map.end()); + auto& block_access_info = block_access_info_map.find(key_id_str)->second; + ASSERT_EQ(1, block_access_info.num_accesses); + ASSERT_EQ(kBlockSize + key_id, block_access_info.block_size); + ASSERT_GT(block_access_info.first_access_time, 0); + ASSERT_GT(block_access_info.last_access_time, 0); + ASSERT_EQ(1, block_access_info.caller_num_access_map.size()); + BlockCacheLookupCaller expected_caller = GetCaller(key_id); + ASSERT_TRUE(block_access_info.caller_num_access_map.find(expected_caller) != + block_access_info.caller_num_access_map.end()); + ASSERT_EQ( + 1, + block_access_info.caller_num_access_map.find(expected_caller)->second); + + if ((expected_caller == BlockCacheLookupCaller::kUserGet || + expected_caller == BlockCacheLookupCaller::kUserMGet) && + type == TraceType::kBlockTraceDataBlock) { + ASSERT_EQ(kNumKeysInBlock, block_access_info.num_keys); + ASSERT_EQ(1, block_access_info.key_num_access_map.size()); + ASSERT_EQ(0, block_access_info.non_exist_key_num_access_map.size()); + ASSERT_EQ(1, block_access_info.num_referenced_key_exist_in_block); + } + } + + Env* env_; + EnvOptions env_options_; + std::string trace_file_path_; + std::string test_path_; +}; + +TEST_F(BlockCacheTracerTest, MixedBlocks) { + { + // Generate a trace file containing a mix of blocks. + // It contains two SST files with 25 blocks of odd numbered block_key in + // kSSTStoringOddKeys and 25 blocks of even numbered blocks_key in + // kSSTStoringEvenKeys. + TraceOptions trace_opt; + std::unique_ptr trace_writer; + ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, + &trace_writer)); + BlockCacheTraceWriter writer(env_, trace_opt, std::move(trace_writer)); + ASSERT_OK(writer.WriteHeader()); + // Write blocks of different types. + WriteBlockAccess(&writer, 0, TraceType::kBlockTraceUncompressionDictBlock, + 10); + WriteBlockAccess(&writer, 10, TraceType::kBlockTraceDataBlock, 10); + WriteBlockAccess(&writer, 20, TraceType::kBlockTraceFilterBlock, 10); + WriteBlockAccess(&writer, 30, TraceType::kBlockTraceIndexBlock, 10); + WriteBlockAccess(&writer, 40, TraceType::kBlockTraceRangeDeletionBlock, 10); + ASSERT_OK(env_->FileExists(trace_file_path_)); + } + + { + // Verify trace file is generated correctly. + std::unique_ptr trace_reader; + ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, + &trace_reader)); + BlockCacheTraceReader reader(std::move(trace_reader)); + BlockCacheTraceHeader header; + ASSERT_OK(reader.ReadHeader(&header)); + ASSERT_EQ(kMajorVersion, header.rocksdb_major_version); + ASSERT_EQ(kMinorVersion, header.rocksdb_minor_version); + // Read blocks. + BlockCacheTraceAnalyzer analyzer(trace_file_path_); + // The analyzer ends when it detects an incomplete access record. + ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze()); + const uint64_t expected_num_cfs = 1; + std::vector expected_fds{kSSTStoringOddKeys, kSSTStoringEvenKeys}; + const std::vector expected_types{ + TraceType::kBlockTraceUncompressionDictBlock, + TraceType::kBlockTraceDataBlock, TraceType::kBlockTraceFilterBlock, + TraceType::kBlockTraceIndexBlock, + TraceType::kBlockTraceRangeDeletionBlock}; + const uint64_t expected_num_keys_per_type = 5; + + auto& stats = analyzer.TEST_cf_aggregates_map(); + ASSERT_EQ(expected_num_cfs, stats.size()); + ASSERT_TRUE(stats.find(kDefaultColumnFamilyName) != stats.end()); + auto& cf_stats = stats.find(kDefaultColumnFamilyName)->second; + ASSERT_EQ(expected_fds.size(), cf_stats.fd_aggregates_map.size()); + for (auto fd_id : expected_fds) { + ASSERT_TRUE(cf_stats.fd_aggregates_map.find(fd_id) != + cf_stats.fd_aggregates_map.end()); + ASSERT_EQ(kLevel, cf_stats.fd_aggregates_map.find(fd_id)->second.level); + auto& block_type_aggregates_map = cf_stats.fd_aggregates_map.find(fd_id) + ->second.block_type_aggregates_map; + ASSERT_EQ(expected_types.size(), block_type_aggregates_map.size()); + uint32_t key_id = 0; + for (auto type : expected_types) { + ASSERT_TRUE(block_type_aggregates_map.find(type) != + block_type_aggregates_map.end()); + auto& block_access_info_map = + block_type_aggregates_map.find(type)->second.block_access_info_map; + // Each block type has 5 blocks. + ASSERT_EQ(expected_num_keys_per_type, block_access_info_map.size()); + for (uint32_t i = 0; i < 10; i++) { + // Verify that odd numbered blocks are stored in kSSTStoringOddKeys + // and even numbered blocks are stored in kSSTStoringEvenKeys. + auto key_id_str = kBlockKeyPrefix + std::to_string(key_id); + if (fd_id == kSSTStoringOddKeys) { + if (key_id % 2 == 1) { + AssertBlockAccessInfo(key_id, type, block_access_info_map); + } else { + ASSERT_TRUE(block_access_info_map.find(key_id_str) == + block_access_info_map.end()); + } + } else { + if (key_id % 2 == 1) { + ASSERT_TRUE(block_access_info_map.find(key_id_str) == + block_access_info_map.end()); + } else { + AssertBlockAccessInfo(key_id, type, block_access_info_map); + } + } + key_id++; + } + } + } + } +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/trace_replay/block_cache_tracer.cc b/trace_replay/block_cache_tracer.cc index 8d0119a68..58c7df70b 100644 --- a/trace_replay/block_cache_tracer.cc +++ b/trace_replay/block_cache_tracer.cc @@ -15,12 +15,13 @@ namespace rocksdb { namespace { const unsigned int kCharSize = 1; +} // namespace + bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record) { return (record.block_type == TraceType::kBlockTraceDataBlock) && (record.caller == BlockCacheLookupCaller::kUserGet || record.caller == BlockCacheLookupCaller::kUserMGet); } -} // namespace BlockCacheTraceWriter::BlockCacheTraceWriter( Env* env, const TraceOptions& trace_options, diff --git a/trace_replay/block_cache_tracer.h b/trace_replay/block_cache_tracer.h index 5fd14cbf1..e24d5a5ef 100644 --- a/trace_replay/block_cache_tracer.h +++ b/trace_replay/block_cache_tracer.h @@ -77,6 +77,8 @@ struct BlockCacheTraceHeader { uint32_t rocksdb_minor_version; }; +bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record); + // BlockCacheTraceWriter captures all RocksDB block cache accesses using a // user-provided TraceWriter. Every RocksDB operation is written as a single // trace. Each trace will have a timestamp and type, followed by the trace