Refactor block cache tracing APIs (#10811)
Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282main
parent
b6e33dbc0e
commit
0e7b27bfcf
@ -0,0 +1,149 @@ |
|||||||
|
// Copyright (c) 2022, Meta, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/system_clock.h" |
||||||
|
#include "rocksdb/table_reader_caller.h" |
||||||
|
#include "rocksdb/trace_reader_writer.h" |
||||||
|
#include "rocksdb/trace_record.h" |
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE { |
||||||
|
// A record for block cache lookups/inserts. This is passed by the table
|
||||||
|
// reader to the BlockCacheTraceWriter for every block cache op.
|
||||||
|
struct BlockCacheTraceRecord { |
||||||
|
// Required fields for all accesses.
|
||||||
|
uint64_t access_timestamp = 0; |
||||||
|
|
||||||
|
// Info related to the block being looked up or inserted
|
||||||
|
//
|
||||||
|
// 1. The cache key for the block
|
||||||
|
std::string block_key; |
||||||
|
|
||||||
|
// 2. The type of block
|
||||||
|
TraceType block_type = TraceType::kTraceMax; |
||||||
|
|
||||||
|
// 3. Size of the block
|
||||||
|
uint64_t block_size = 0; |
||||||
|
|
||||||
|
// Info about the SST file the block is in
|
||||||
|
//
|
||||||
|
// 1. Column family ID
|
||||||
|
uint64_t cf_id = 0; |
||||||
|
|
||||||
|
// 2. Column family name
|
||||||
|
std::string cf_name; |
||||||
|
|
||||||
|
// 3. LSM level of the file
|
||||||
|
uint32_t level = 0; |
||||||
|
|
||||||
|
// 4. SST file number
|
||||||
|
uint64_t sst_fd_number = 0; |
||||||
|
|
||||||
|
// Info about the calling context
|
||||||
|
//
|
||||||
|
// 1. The higher level request triggering the block cache request
|
||||||
|
TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller; |
||||||
|
|
||||||
|
// 2. Cache lookup hit/miss. Not relevant for inserts
|
||||||
|
bool is_cache_hit = false; |
||||||
|
|
||||||
|
// 3. Whether this request is a lookup
|
||||||
|
bool no_insert = false; |
||||||
|
|
||||||
|
// Get/MultiGet specific info
|
||||||
|
//
|
||||||
|
// 1. A unique ID for Get/MultiGet
|
||||||
|
uint64_t get_id = kReservedGetId; |
||||||
|
|
||||||
|
// 2. Whether the Get/MultiGet is from a user-specified snapshot
|
||||||
|
bool get_from_user_specified_snapshot = false; |
||||||
|
|
||||||
|
// 3. The target user key in the block
|
||||||
|
std::string referenced_key; |
||||||
|
|
||||||
|
// Required fields for data block and user Get/Multi-Get only.
|
||||||
|
//
|
||||||
|
// 1. Size of te useful data in the block
|
||||||
|
uint64_t referenced_data_size = 0; |
||||||
|
|
||||||
|
// 2. Only for MultiGet, number of keys from the batch found in the block
|
||||||
|
uint64_t num_keys_in_block = 0; |
||||||
|
|
||||||
|
// 3. Whether the key was found in the block or not (false positive)
|
||||||
|
bool referenced_key_exist_in_block = false; |
||||||
|
|
||||||
|
static const uint64_t kReservedGetId; |
||||||
|
|
||||||
|
BlockCacheTraceRecord() {} |
||||||
|
|
||||||
|
BlockCacheTraceRecord(uint64_t _access_timestamp, std::string _block_key, |
||||||
|
TraceType _block_type, uint64_t _block_size, |
||||||
|
uint64_t _cf_id, std::string _cf_name, uint32_t _level, |
||||||
|
uint64_t _sst_fd_number, TableReaderCaller _caller, |
||||||
|
bool _is_cache_hit, bool _no_insert, uint64_t _get_id, |
||||||
|
bool _get_from_user_specified_snapshot = false, |
||||||
|
std::string _referenced_key = "", |
||||||
|
uint64_t _referenced_data_size = 0, |
||||||
|
uint64_t _num_keys_in_block = 0, |
||||||
|
bool _referenced_key_exist_in_block = false) |
||||||
|
: access_timestamp(_access_timestamp), |
||||||
|
block_key(_block_key), |
||||||
|
block_type(_block_type), |
||||||
|
block_size(_block_size), |
||||||
|
cf_id(_cf_id), |
||||||
|
cf_name(_cf_name), |
||||||
|
level(_level), |
||||||
|
sst_fd_number(_sst_fd_number), |
||||||
|
caller(_caller), |
||||||
|
is_cache_hit(_is_cache_hit), |
||||||
|
no_insert(_no_insert), |
||||||
|
get_id(_get_id), |
||||||
|
get_from_user_specified_snapshot(_get_from_user_specified_snapshot), |
||||||
|
referenced_key(_referenced_key), |
||||||
|
referenced_data_size(_referenced_data_size), |
||||||
|
num_keys_in_block(_num_keys_in_block), |
||||||
|
referenced_key_exist_in_block(_referenced_key_exist_in_block) {} |
||||||
|
}; |
||||||
|
|
||||||
|
// Options for tracing block cache accesses
|
||||||
|
struct BlockCacheTraceOptions { |
||||||
|
// Specify trace sampling option, i.e. capture one per how many requests.
|
||||||
|
// Default to 1 (capture every request).
|
||||||
|
uint64_t sampling_frequency = 1; |
||||||
|
}; |
||||||
|
|
||||||
|
// Options for the built-in implementation of BlockCacheTraceWriter
|
||||||
|
struct BlockCacheTraceWriterOptions { |
||||||
|
uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024; |
||||||
|
}; |
||||||
|
|
||||||
|
// BlockCacheTraceWriter is an abstract class that captures all RocksDB block
|
||||||
|
// cache accesses. Every RocksDB operation is passed to WriteBlockAccess()
|
||||||
|
// with a BlockCacheTraceRecord.
|
||||||
|
class BlockCacheTraceWriter { |
||||||
|
public: |
||||||
|
virtual ~BlockCacheTraceWriter() {} |
||||||
|
|
||||||
|
// Pass Slice references to avoid copy.
|
||||||
|
virtual Status WriteBlockAccess(const BlockCacheTraceRecord& record, |
||||||
|
const Slice& block_key, const Slice& cf_name, |
||||||
|
const Slice& referenced_key) = 0; |
||||||
|
|
||||||
|
// Write a trace header at the beginning, typically on initiating a trace,
|
||||||
|
// with some metadata like a magic number and RocksDB version.
|
||||||
|
virtual Status WriteHeader() = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// Allocate an instance of the built-in BlockCacheTraceWriter implementation,
|
||||||
|
// that traces all block cache accesses to a user-provided TraceWriter. Each
|
||||||
|
// access is traced to a file with a timestamp and type, followed by the
|
||||||
|
// payload.
|
||||||
|
std::unique_ptr<BlockCacheTraceWriter> NewBlockCacheTraceWriter( |
||||||
|
SystemClock* clock, const BlockCacheTraceWriterOptions& trace_options, |
||||||
|
std::unique_ptr<TraceWriter>&& trace_writer); |
||||||
|
|
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
Loading…
Reference in new issue