You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rocksdb/include/rocksdb/block_cache_trace_writer.h

150 lines
5.2 KiB

// Copyright (c) 2022, Meta Platforms, Inc. and affiliates. All rights
// reserved. This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2 years ago
#pragma once
#include "rocksdb/options.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table_reader_caller.h"
#include "rocksdb/trace_reader_writer.h"
#include "rocksdb/trace_record.h"
namespace ROCKSDB_NAMESPACE {
// A record for block cache lookups/inserts. This is passed by the table
// reader to the BlockCacheTraceWriter for every block cache op.
struct BlockCacheTraceRecord {
// Required fields for all accesses.
uint64_t access_timestamp = 0;
// Info related to the block being looked up or inserted
//
// 1. The cache key for the block
std::string block_key;
// 2. The type of block
TraceType block_type = TraceType::kTraceMax;
// 3. Size of the block
uint64_t block_size = 0;
// Info about the SST file the block is in
//
// 1. Column family ID
uint64_t cf_id = 0;
// 2. Column family name
std::string cf_name;
// 3. LSM level of the file
uint32_t level = 0;
// 4. SST file number
uint64_t sst_fd_number = 0;
// Info about the calling context
//
// 1. The higher level request triggering the block cache request
TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller;
// 2. Cache lookup hit/miss. Not relevant for inserts
bool is_cache_hit = false;
// 3. Whether this request is a lookup
bool no_insert = false;
// Get/MultiGet specific info
//
// 1. A unique ID for Get/MultiGet
uint64_t get_id = kReservedGetId;
// 2. Whether the Get/MultiGet is from a user-specified snapshot
bool get_from_user_specified_snapshot = false;
// 3. The target user key in the block
std::string referenced_key;
// Required fields for data block and user Get/Multi-Get only.
//
// 1. Size of te useful data in the block
uint64_t referenced_data_size = 0;
// 2. Only for MultiGet, number of keys from the batch found in the block
uint64_t num_keys_in_block = 0;
// 3. Whether the key was found in the block or not (false positive)
bool referenced_key_exist_in_block = false;
static const uint64_t kReservedGetId;
BlockCacheTraceRecord() {}
BlockCacheTraceRecord(uint64_t _access_timestamp, std::string _block_key,
TraceType _block_type, uint64_t _block_size,
uint64_t _cf_id, std::string _cf_name, uint32_t _level,
uint64_t _sst_fd_number, TableReaderCaller _caller,
bool _is_cache_hit, bool _no_insert, uint64_t _get_id,
bool _get_from_user_specified_snapshot = false,
std::string _referenced_key = "",
uint64_t _referenced_data_size = 0,
uint64_t _num_keys_in_block = 0,
bool _referenced_key_exist_in_block = false)
: access_timestamp(_access_timestamp),
block_key(_block_key),
block_type(_block_type),
block_size(_block_size),
cf_id(_cf_id),
cf_name(_cf_name),
level(_level),
sst_fd_number(_sst_fd_number),
caller(_caller),
is_cache_hit(_is_cache_hit),
no_insert(_no_insert),
get_id(_get_id),
get_from_user_specified_snapshot(_get_from_user_specified_snapshot),
referenced_key(_referenced_key),
referenced_data_size(_referenced_data_size),
num_keys_in_block(_num_keys_in_block),
referenced_key_exist_in_block(_referenced_key_exist_in_block) {}
};
// Options for tracing block cache accesses
struct BlockCacheTraceOptions {
// Specify trace sampling option, i.e. capture one per how many requests.
// Default to 1 (capture every request).
uint64_t sampling_frequency = 1;
};
// Options for the built-in implementation of BlockCacheTraceWriter
struct BlockCacheTraceWriterOptions {
uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024;
};
// BlockCacheTraceWriter is an abstract class that captures all RocksDB block
// cache accesses. Every RocksDB operation is passed to WriteBlockAccess()
// with a BlockCacheTraceRecord.
class BlockCacheTraceWriter {
public:
virtual ~BlockCacheTraceWriter() {}
// Pass Slice references to avoid copy.
virtual Status WriteBlockAccess(const BlockCacheTraceRecord& record,
const Slice& block_key, const Slice& cf_name,
const Slice& referenced_key) = 0;
// Write a trace header at the beginning, typically on initiating a trace,
// with some metadata like a magic number and RocksDB version.
virtual Status WriteHeader() = 0;
};
// Allocate an instance of the built-in BlockCacheTraceWriter implementation,
// that traces all block cache accesses to a user-provided TraceWriter. Each
// access is traced to a file with a timestamp and type, followed by the
// payload.
std::unique_ptr<BlockCacheTraceWriter> NewBlockCacheTraceWriter(
SystemClock* clock, const BlockCacheTraceWriterOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer);
} // namespace ROCKSDB_NAMESPACE