Block cache tracing: Associate a unique id with Get and MultiGet (#5514)

Summary:
This PR associates a unique id with Get and MultiGet. This enables us to track how many blocks a Get/MultiGet request accesses. We can also measure the impact of row cache vs block cache.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5514

Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32

Differential Revision: D16032681

Pulled By: HaoyuHuang

fbshipit-source-id: 775b05f4440badd58de6667e3ec9f4fc87a0af4c
main
haoyuhuang 6 years ago committed by Facebook Github Bot
parent 84c5c9aab1
commit 6edc5d0719
  1. 16
      db/version_set.cc
  2. 63
      table/block_based/block_based_table_reader.cc
  3. 19
      table/get_context.cc
  4. 8
      table/get_context.h
  5. 46
      table/table_test.cc
  6. 29
      trace_replay/block_cache_tracer.cc
  7. 38
      trace_replay/block_cache_tracer.h
  8. 38
      trace_replay/block_cache_tracer_test.cc

@ -1663,11 +1663,17 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
} }
PinnedIteratorsManager pinned_iters_mgr; PinnedIteratorsManager pinned_iters_mgr;
uint64_t tracing_get_id = BlockCacheTraceHelper::kReservedGetId;
if (vset_ && vset_->block_cache_tracer_ &&
vset_->block_cache_tracer_->is_tracing_enabled()) {
tracing_get_id = vset_->block_cache_tracer_->NextGetId();
}
GetContext get_context( GetContext get_context(
user_comparator(), merge_operator_, info_log_, db_statistics_, user_comparator(), merge_operator_, info_log_, db_statistics_,
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key, status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
value, value_found, merge_context, max_covering_tombstone_seq, this->env_, value, value_found, merge_context, max_covering_tombstone_seq, this->env_,
seq, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob); seq, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob,
tracing_get_id);
// Pin blocks that we read to hold merge operands // Pin blocks that we read to hold merge operands
if (merge_operator_) { if (merge_operator_) {
@ -1785,7 +1791,12 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
if (merge_operator_) { if (merge_operator_) {
pinned_iters_mgr.StartPinning(); pinned_iters_mgr.StartPinning();
} }
uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
if (vset_ && vset_->block_cache_tracer_ &&
vset_->block_cache_tracer_->is_tracing_enabled()) {
tracing_mget_id = vset_->block_cache_tracer_->NextGetId();
}
// Even though we know the batch size won't be > MAX_BATCH_SIZE, // Even though we know the batch size won't be > MAX_BATCH_SIZE,
// use autovector in order to avoid unnecessary construction of GetContext // use autovector in order to avoid unnecessary construction of GetContext
// objects, which is expensive // objects, which is expensive
@ -1797,7 +1808,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge, iter->ukey, iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge, iter->ukey,
iter->value, nullptr, &(iter->merge_context), iter->value, nullptr, &(iter->merge_context),
&iter->max_covering_tombstone_seq, this->env_, &iter->seq, &iter->max_covering_tombstone_seq, this->env_, &iter->seq,
merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob); merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob,
tracing_mget_id);
} }
int get_ctx_index = 0; int get_ctx_index = 0;
for (auto iter = range->begin(); iter != range->end(); for (auto iter = range->begin(); iter != range->end();

@ -1983,7 +1983,7 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
/*block_size=*/usage, rep_->cf_id_for_tracing(), /*block_size=*/usage, rep_->cf_id_for_tracing(),
/*cf_name=*/"", rep_->level_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(),
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit, rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
/*no_insert=*/no_io); /*no_insert=*/no_io, lookup_context->get_id);
block_cache_tracer_->WriteBlockAccess(access_record, key, block_cache_tracer_->WriteBlockAccess(access_record, key,
rep_->cf_name_for_tracing(), rep_->cf_name_for_tracing(),
/*referenced_key=*/nullptr); /*referenced_key=*/nullptr);
@ -2065,7 +2065,7 @@ CachableEntry<UncompressionDict> BlockBasedTable::GetUncompressionDict(
/*block_size=*/usage, rep_->cf_id_for_tracing(), /*block_size=*/usage, rep_->cf_id_for_tracing(),
/*cf_name=*/"", rep_->level_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(),
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit, rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
/*no_insert=*/no_io); /*no_insert=*/no_io, lookup_context->get_id);
block_cache_tracer_->WriteBlockAccess(access_record, cache_key, block_cache_tracer_->WriteBlockAccess(access_record, cache_key,
rep_->cf_name_for_tracing(), rep_->cf_name_for_tracing(),
/*referenced_key=*/nullptr); /*referenced_key=*/nullptr);
@ -2426,7 +2426,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
/*block_size=*/usage, rep_->cf_id_for_tracing(), /*block_size=*/usage, rep_->cf_id_for_tracing(),
/*cf_name=*/"", rep_->level_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(),
rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit, rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
no_insert); no_insert, lookup_context->get_id);
block_cache_tracer_->WriteBlockAccess(access_record, key, block_cache_tracer_->WriteBlockAccess(access_record, key,
rep_->cf_name_for_tracing(), rep_->cf_name_for_tracing(),
/*referenced_key=*/nullptr); /*referenced_key=*/nullptr);
@ -3340,7 +3340,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
CachableEntry<FilterBlockReader> filter_entry; CachableEntry<FilterBlockReader> filter_entry;
bool may_match; bool may_match;
FilterBlockReader* filter = nullptr; FilterBlockReader* filter = nullptr;
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserGet}; uint64_t tracing_get_id = get_context ? get_context->tracing_get_id()
: BlockCacheTraceHelper::kReservedGetId;
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserGet,
tracing_get_id};
{ {
if (!skip_filters) { if (!skip_filters) {
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
@ -3406,7 +3409,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
} }
BlockCacheLookupContext lookup_data_block_context{ BlockCacheLookupContext lookup_data_block_context{
TableReaderCaller::kUserGet}; TableReaderCaller::kUserGet, tracing_get_id};
bool does_referenced_key_exist = false; bool does_referenced_key_exist = false;
DataBlockIter biter; DataBlockIter biter;
uint64_t referenced_data_size = 0; uint64_t referenced_data_size = 0;
@ -3447,8 +3450,10 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
if (!get_context->SaveValue( if (!get_context->SaveValue(
parsed_key, biter.value(), &matched, parsed_key, biter.value(), &matched,
biter.IsValuePinned() ? &biter : nullptr)) { biter.IsValuePinned() ? &biter : nullptr)) {
does_referenced_key_exist = true; if (get_context->State() == GetContext::GetState::kFound) {
referenced_data_size = biter.key().size() + biter.value().size(); does_referenced_key_exist = true;
referenced_data_size = biter.key().size() + biter.value().size();
}
done = true; done = true;
break; break;
} }
@ -3459,6 +3464,12 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) { if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
// Avoid making copy of block_key, cf_name, and referenced_key when // Avoid making copy of block_key, cf_name, and referenced_key when
// constructing the access record. // constructing the access record.
Slice referenced_key;
if (does_referenced_key_exist) {
referenced_key = biter.key();
} else {
referenced_key = ExtractUserKey(key);
}
BlockCacheTraceRecord access_record( BlockCacheTraceRecord access_record(
rep_->ioptions.env->NowMicros(), rep_->ioptions.env->NowMicros(),
/*block_key=*/"", lookup_data_block_context.block_type, /*block_key=*/"", lookup_data_block_context.block_type,
@ -3467,12 +3478,13 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
rep_->sst_number_for_tracing(), lookup_data_block_context.caller, rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
lookup_data_block_context.is_cache_hit, lookup_data_block_context.is_cache_hit,
lookup_data_block_context.no_insert, lookup_data_block_context.no_insert,
lookup_data_block_context.get_id,
/*referenced_key=*/"", referenced_data_size, /*referenced_key=*/"", referenced_data_size,
lookup_data_block_context.num_keys_in_block, lookup_data_block_context.num_keys_in_block,
does_referenced_key_exist); does_referenced_key_exist);
block_cache_tracer_->WriteBlockAccess( block_cache_tracer_->WriteBlockAccess(
access_record, lookup_data_block_context.block_key, access_record, lookup_data_block_context.block_key,
rep_->cf_name_for_tracing(), key); rep_->cf_name_for_tracing(), referenced_key);
} }
if (done) { if (done) {
@ -3498,14 +3510,19 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
const MultiGetRange* mget_range, const MultiGetRange* mget_range,
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
bool skip_filters) { bool skip_filters) {
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserMultiGet};
const bool no_io = read_options.read_tier == kBlockCacheTier; const bool no_io = read_options.read_tier == kBlockCacheTier;
CachableEntry<FilterBlockReader> filter_entry; CachableEntry<FilterBlockReader> filter_entry;
FilterBlockReader* filter = nullptr; FilterBlockReader* filter = nullptr;
MultiGetRange sst_file_range(*mget_range, mget_range->begin(), MultiGetRange sst_file_range(*mget_range, mget_range->begin(),
mget_range->end()); mget_range->end());
{ uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
if (!skip_filters) { if (!sst_file_range.empty() && sst_file_range.begin()->get_context) {
tracing_mget_id = sst_file_range.begin()->get_context->tracing_get_id();
}
BlockCacheLookupContext lookup_context{TableReaderCaller::kUserMultiGet,
tracing_mget_id};
if (!skip_filters) {
{
// TODO: Figure out where the stats should go // TODO: Figure out where the stats should go
filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr, filter_entry = GetFilter(prefix_extractor, /*prefetch_buffer=*/nullptr,
read_options.read_tier == kBlockCacheTier, read_options.read_tier == kBlockCacheTier,
@ -3644,7 +3661,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
uint64_t referenced_data_size = 0; uint64_t referenced_data_size = 0;
bool does_referenced_key_exist = false; bool does_referenced_key_exist = false;
BlockCacheLookupContext lookup_data_block_context( BlockCacheLookupContext lookup_data_block_context(
TableReaderCaller::kUserMultiGet); TableReaderCaller::kUserMultiGet, tracing_mget_id);
if (first_block) { if (first_block) {
if (!block_handles[idx_in_batch].IsNull() || if (!block_handles[idx_in_batch].IsNull() ||
!results[idx_in_batch].IsEmpty()) { !results[idx_in_batch].IsEmpty()) {
@ -3703,7 +3720,6 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
ParsedInternalKey parsed_key; ParsedInternalKey parsed_key;
Cleanable dummy; Cleanable dummy;
Cleanable* value_pinner = nullptr; Cleanable* value_pinner = nullptr;
if (!ParseInternalKey(biter->key(), &parsed_key)) { if (!ParseInternalKey(biter->key(), &parsed_key)) {
s = Status::Corruption(Slice()); s = Status::Corruption(Slice());
} }
@ -3719,11 +3735,13 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
value_pinner = biter; value_pinner = biter;
} }
} }
if (!get_context->SaveValue(parsed_key, biter->value(), &matched,
if (!get_context->SaveValue( value_pinner)) {
parsed_key, biter->value(), &matched, value_pinner)) { if (get_context->State() == GetContext::GetState::kFound) {
does_referenced_key_exist = true; does_referenced_key_exist = true;
referenced_data_size = biter->key().size() + biter->value().size(); referenced_data_size =
biter->key().size() + biter->value().size();
}
done = true; done = true;
break; break;
} }
@ -3733,6 +3751,12 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) { if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
// Avoid making copy of block_key, cf_name, and referenced_key when // Avoid making copy of block_key, cf_name, and referenced_key when
// constructing the access record. // constructing the access record.
Slice referenced_key;
if (does_referenced_key_exist) {
referenced_key = biter->key();
} else {
referenced_key = ExtractUserKey(key);
}
BlockCacheTraceRecord access_record( BlockCacheTraceRecord access_record(
rep_->ioptions.env->NowMicros(), rep_->ioptions.env->NowMicros(),
/*block_key=*/"", lookup_data_block_context.block_type, /*block_key=*/"", lookup_data_block_context.block_type,
@ -3741,12 +3765,13 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
rep_->sst_number_for_tracing(), lookup_data_block_context.caller, rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
lookup_data_block_context.is_cache_hit, lookup_data_block_context.is_cache_hit,
lookup_data_block_context.no_insert, lookup_data_block_context.no_insert,
lookup_data_block_context.get_id,
/*referenced_key=*/"", referenced_data_size, /*referenced_key=*/"", referenced_data_size,
lookup_data_block_context.num_keys_in_block, lookup_data_block_context.num_keys_in_block,
does_referenced_key_exist); does_referenced_key_exist);
block_cache_tracer_->WriteBlockAccess( block_cache_tracer_->WriteBlockAccess(
access_record, lookup_data_block_context.block_key, access_record, lookup_data_block_context.block_key,
rep_->cf_name_for_tracing(), key); rep_->cf_name_for_tracing(), referenced_key);
} }
s = biter->status(); s = biter->status();
if (done) { if (done) {

@ -38,15 +38,13 @@ void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) {
} // namespace } // namespace
GetContext::GetContext(const Comparator* ucmp, GetContext::GetContext(
const MergeOperator* merge_operator, Logger* logger, const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger,
Statistics* statistics, GetState init_state, Statistics* statistics, GetState init_state, const Slice& user_key,
const Slice& user_key, PinnableSlice* pinnable_val, PinnableSlice* pinnable_val, bool* value_found, MergeContext* merge_context,
bool* value_found, MergeContext* merge_context, SequenceNumber* _max_covering_tombstone_seq, Env* env, SequenceNumber* seq,
SequenceNumber* _max_covering_tombstone_seq, Env* env, PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback,
SequenceNumber* seq, bool* is_blob_index, uint64_t tracing_get_id)
PinnedIteratorsManager* _pinned_iters_mgr,
ReadCallback* callback, bool* is_blob_index)
: ucmp_(ucmp), : ucmp_(ucmp),
merge_operator_(merge_operator), merge_operator_(merge_operator),
logger_(logger), logger_(logger),
@ -62,7 +60,8 @@ GetContext::GetContext(const Comparator* ucmp,
replay_log_(nullptr), replay_log_(nullptr),
pinned_iters_mgr_(_pinned_iters_mgr), pinned_iters_mgr_(_pinned_iters_mgr),
callback_(callback), callback_(callback),
is_blob_index_(is_blob_index) { is_blob_index_(is_blob_index),
tracing_get_id_(tracing_get_id) {
if (seq_) { if (seq_) {
*seq_ = kMaxSequenceNumber; *seq_ = kMaxSequenceNumber;
} }

@ -85,7 +85,8 @@ class GetContext {
SequenceNumber* max_covering_tombstone_seq, Env* env, SequenceNumber* max_covering_tombstone_seq, Env* env,
SequenceNumber* seq = nullptr, SequenceNumber* seq = nullptr,
PinnedIteratorsManager* _pinned_iters_mgr = nullptr, PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
uint64_t tracing_get_id = 0);
GetContext() = default; GetContext() = default;
@ -135,6 +136,8 @@ class GetContext {
void ReportCounters(); void ReportCounters();
uint64_t tracing_get_id() const { return tracing_get_id_; }
private: private:
const Comparator* ucmp_; const Comparator* ucmp_;
const MergeOperator* merge_operator_; const MergeOperator* merge_operator_;
@ -158,6 +161,9 @@ class GetContext {
ReadCallback* callback_; ReadCallback* callback_;
bool sample_; bool sample_;
bool* is_blob_index_; bool* is_blob_index_;
// Used for block cache tracing only. A tracing get id uniquely identifies a
// Get or a MultiGet.
const uint64_t tracing_get_id_;
}; };
// Call this to replay a log and bring the get_context up to date. The replay // Call this to replay a log and bring the get_context up to date. The replay

@ -2563,23 +2563,25 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
GetPlainInternalComparator(options.comparator), &keys, &kvmap); GetPlainInternalComparator(options.comparator), &keys, &kvmap);
auto reader = c.GetTableReader(); auto reader = c.GetTableReader();
PinnableSlice value; PinnableSlice value;
GetContext get_context(options.comparator, nullptr, nullptr, nullptr, {
GetContext::kNotFound, user_key, &value, nullptr, GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr); GetContext::kNotFound, user_key, &value, nullptr,
get_perf_context()->Reset(); nullptr, nullptr, nullptr);
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, get_perf_context()->Reset();
moptions.prefix_extractor.get())); ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
if (index_and_filter_in_cache) { moptions.prefix_extractor.get()));
// data, index and filter block if (index_and_filter_in_cache) {
ASSERT_EQ(get_perf_context()->block_read_count, 3); // data, index and filter block
ASSERT_EQ(get_perf_context()->index_block_read_count, 1); ASSERT_EQ(get_perf_context()->block_read_count, 3);
ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
} else { ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
// just the data block } else {
ASSERT_EQ(get_perf_context()->block_read_count, 1); // just the data block
ASSERT_EQ(get_perf_context()->block_read_count, 1);
}
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_STREQ(value.data(), "hello");
} }
ASSERT_EQ(get_context.State(), GetContext::kFound);
ASSERT_STREQ(value.data(), "hello");
// Get non-existing key // Get non-existing key
user_key = "does-not-exist"; user_key = "does-not-exist";
@ -2587,13 +2589,15 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
encoded_key = internal_key.Encode().ToString(); encoded_key = internal_key.Encode().ToString();
value.Reset(); value.Reset();
get_context = GetContext(options.comparator, nullptr, nullptr, nullptr, {
GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
GetContext::kNotFound, user_key, &value, nullptr, GetContext::kNotFound, user_key, &value, nullptr,
nullptr, nullptr, nullptr); nullptr, nullptr, nullptr);
get_perf_context()->Reset(); get_perf_context()->Reset();
ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
moptions.prefix_extractor.get())); moptions.prefix_extractor.get()));
ASSERT_EQ(get_context.State(), GetContext::kNotFound); ASSERT_EQ(get_context.State(), GetContext::kNotFound);
}
if (index_and_filter_in_cache) { if (index_and_filter_in_cache) {
if (bloom_filter_type == 0) { if (bloom_filter_type == 0) {

@ -31,6 +31,7 @@ bool ShouldTrace(const Slice& block_key, const TraceOptions& trace_options) {
const uint64_t kMicrosInSecond = 1000 * 1000; const uint64_t kMicrosInSecond = 1000 * 1000;
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName = const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
"UnknownColumnFamily"; "UnknownColumnFamily";
const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
bool BlockCacheTraceHelper::ShouldTraceReferencedKey(TraceType block_type, bool BlockCacheTraceHelper::ShouldTraceReferencedKey(TraceType block_type,
TableReaderCaller caller) { TableReaderCaller caller) {
@ -39,6 +40,11 @@ bool BlockCacheTraceHelper::ShouldTraceReferencedKey(TraceType block_type,
caller == TableReaderCaller::kUserMultiGet); caller == TableReaderCaller::kUserMultiGet);
} }
bool BlockCacheTraceHelper::ShouldTraceGetId(TableReaderCaller caller) {
return caller == TableReaderCaller::kUserGet ||
caller == TableReaderCaller::kUserMultiGet;
}
BlockCacheTraceWriter::BlockCacheTraceWriter( BlockCacheTraceWriter::BlockCacheTraceWriter(
Env* env, const TraceOptions& trace_options, Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) std::unique_ptr<TraceWriter>&& trace_writer)
@ -65,6 +71,9 @@ Status BlockCacheTraceWriter::WriteBlockAccess(
trace.payload.push_back(record.caller); trace.payload.push_back(record.caller);
trace.payload.push_back(record.is_cache_hit); trace.payload.push_back(record.is_cache_hit);
trace.payload.push_back(record.no_insert); trace.payload.push_back(record.no_insert);
if (BlockCacheTraceHelper::ShouldTraceGetId(record.caller)) {
PutFixed64(&trace.payload, record.get_id);
}
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record.block_type, if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record.block_type,
record.caller)) { record.caller)) {
PutLengthPrefixedSlice(&trace.payload, referenced_key); PutLengthPrefixedSlice(&trace.payload, referenced_key);
@ -197,7 +206,12 @@ Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
} }
record->no_insert = static_cast<Boolean>(enc_slice[0]); record->no_insert = static_cast<Boolean>(enc_slice[0]);
enc_slice.remove_prefix(kCharSize); enc_slice.remove_prefix(kCharSize);
if (BlockCacheTraceHelper::ShouldTraceGetId(record->caller)) {
if (!GetFixed64(&enc_slice, &record->get_id)) {
return Status::Incomplete(
"Incomplete access record: Failed to read the get id.");
}
}
if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record->block_type, if (BlockCacheTraceHelper::ShouldTraceReferencedKey(record->block_type,
record->caller)) { record->caller)) {
Slice referenced_key; Slice referenced_key;
@ -236,6 +250,7 @@ Status BlockCacheTracer::StartTrace(
if (writer_.load()) { if (writer_.load()) {
return Status::Busy(); return Status::Busy();
} }
get_id_counter_.store(1);
trace_options_ = trace_options; trace_options_ = trace_options;
writer_.store( writer_.store(
new BlockCacheTraceWriter(env, trace_options, std::move(trace_writer))); new BlockCacheTraceWriter(env, trace_options, std::move(trace_writer)));
@ -266,4 +281,16 @@ Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
referenced_key); referenced_key);
} }
uint64_t BlockCacheTracer::NextGetId() {
if (!writer_.load(std::memory_order_relaxed)) {
return BlockCacheTraceHelper::kReservedGetId;
}
uint64_t prev_value = get_id_counter_.fetch_add(1);
if (prev_value == BlockCacheTraceHelper::kReservedGetId) {
// fetch and add again.
return get_id_counter_.fetch_add(1);
}
return prev_value;
}
} // namespace rocksdb } // namespace rocksdb

@ -18,6 +18,16 @@ namespace rocksdb {
extern const uint64_t kMicrosInSecond; extern const uint64_t kMicrosInSecond;
class BlockCacheTraceHelper {
public:
static bool ShouldTraceReferencedKey(TraceType block_type,
TableReaderCaller caller);
static bool ShouldTraceGetId(TableReaderCaller caller);
static const std::string kUnknownColumnFamilyName;
static const uint64_t kReservedGetId;
};
// Lookup context for tracing block cache accesses. // Lookup context for tracing block cache accesses.
// We trace block accesses at five places: // We trace block accesses at five places:
// 1. BlockBasedTable::GetFilter // 1. BlockBasedTable::GetFilter
@ -38,8 +48,10 @@ extern const uint64_t kMicrosInSecond;
// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or // 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
// kUserApproximateSize). // kUserApproximateSize).
struct BlockCacheLookupContext { struct BlockCacheLookupContext {
BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {} BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
const TableReaderCaller caller; BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id)
: caller(_caller), get_id(_get_id) {}
const TableReaderCaller caller;
// These are populated when we perform lookup/insert on block cache. The block // These are populated when we perform lookup/insert on block cache. The block
// cache tracer uses these inforation when logging the block access at // cache tracer uses these inforation when logging the block access at
// BlockBasedTable::GET and BlockBasedTable::MultiGet. // BlockBasedTable::GET and BlockBasedTable::MultiGet.
@ -49,6 +61,10 @@ const TableReaderCaller caller;
uint64_t block_size = 0; uint64_t block_size = 0;
std::string block_key; std::string block_key;
uint64_t num_keys_in_block = 0; uint64_t num_keys_in_block = 0;
// The unique id associated with Get and MultiGet. This enables us to track
// how many blocks a Get/MultiGet request accesses. We can also measure the
// impact of row cache vs block cache.
uint64_t get_id = 0;
void FillLookupContext(bool _is_cache_hit, bool _no_insert, void FillLookupContext(bool _is_cache_hit, bool _no_insert,
TraceType _block_type, uint64_t _block_size, TraceType _block_type, uint64_t _block_size,
@ -78,7 +94,8 @@ struct BlockCacheTraceRecord {
TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller; TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller;
Boolean is_cache_hit = Boolean::kFalse; Boolean is_cache_hit = Boolean::kFalse;
Boolean no_insert = Boolean::kFalse; Boolean no_insert = Boolean::kFalse;
// Required field for Get and MultiGet
uint64_t get_id = BlockCacheTraceHelper::kReservedGetId;
// Required fields for data block and user Get/Multi-Get only. // Required fields for data block and user Get/Multi-Get only.
std::string referenced_key; std::string referenced_key;
uint64_t referenced_data_size = 0; uint64_t referenced_data_size = 0;
@ -91,7 +108,7 @@ struct BlockCacheTraceRecord {
TraceType _block_type, uint64_t _block_size, TraceType _block_type, uint64_t _block_size,
uint64_t _cf_id, std::string _cf_name, uint32_t _level, uint64_t _cf_id, std::string _cf_name, uint32_t _level,
uint64_t _sst_fd_number, TableReaderCaller _caller, uint64_t _sst_fd_number, TableReaderCaller _caller,
bool _is_cache_hit, bool _no_insert, bool _is_cache_hit, bool _no_insert, uint64_t _get_id,
std::string _referenced_key = "", std::string _referenced_key = "",
uint64_t _referenced_data_size = 0, uint64_t _referenced_data_size = 0,
uint64_t _num_keys_in_block = 0, uint64_t _num_keys_in_block = 0,
@ -107,6 +124,7 @@ struct BlockCacheTraceRecord {
caller(_caller), caller(_caller),
is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse), is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse),
no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse), no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse),
get_id(_get_id),
referenced_key(_referenced_key), referenced_key(_referenced_key),
referenced_data_size(_referenced_data_size), referenced_data_size(_referenced_data_size),
num_keys_in_block(_num_keys_in_block), num_keys_in_block(_num_keys_in_block),
@ -121,14 +139,6 @@ struct BlockCacheTraceHeader {
uint32_t rocksdb_minor_version; uint32_t rocksdb_minor_version;
}; };
class BlockCacheTraceHelper {
public:
static bool ShouldTraceReferencedKey(TraceType block_type,
TableReaderCaller caller);
static const std::string kUnknownColumnFamilyName;
};
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a // BlockCacheTraceWriter captures all RocksDB block cache accesses using a
// user-provided TraceWriter. Every RocksDB operation is written as a single // user-provided TraceWriter. Every RocksDB operation is written as a single
// trace. Each trace will have a timestamp and type, followed by the trace // trace. Each trace will have a timestamp and type, followed by the trace
@ -207,11 +217,15 @@ class BlockCacheTracer {
const Slice& block_key, const Slice& cf_name, const Slice& block_key, const Slice& cf_name,
const Slice& referenced_key); const Slice& referenced_key);
// GetId cycles from 1 to port::kMaxUint64.
uint64_t NextGetId();
private: private:
TraceOptions trace_options_; TraceOptions trace_options_;
// A mutex protects the writer_. // A mutex protects the writer_.
InstrumentedMutex trace_writer_mutex_; InstrumentedMutex trace_writer_mutex_;
std::atomic<BlockCacheTraceWriter*> writer_; std::atomic<BlockCacheTraceWriter*> writer_;
std::atomic<uint64_t> get_id_counter_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -71,6 +71,9 @@ class BlockCacheTracerTest : public testing::Test {
record.sst_fd_number = kSSTFDNumber + key_id; record.sst_fd_number = kSSTFDNumber + key_id;
record.is_cache_hit = Boolean::kFalse; record.is_cache_hit = Boolean::kFalse;
record.no_insert = Boolean::kFalse; record.no_insert = Boolean::kFalse;
// Provide get_id for all callers. The writer should only write get_id
// when the caller is either GET or MGET.
record.get_id = key_id + 1;
// Provide these fields for all block types. // Provide these fields for all block types.
// The writer should only write these fields for data blocks and the // The writer should only write these fields for data blocks and the
// caller is either GET or MGET. // caller is either GET or MGET.
@ -120,6 +123,12 @@ class BlockCacheTracerTest : public testing::Test {
ASSERT_EQ(kSSTFDNumber + key_id, record.sst_fd_number); ASSERT_EQ(kSSTFDNumber + key_id, record.sst_fd_number);
ASSERT_EQ(Boolean::kFalse, record.is_cache_hit); ASSERT_EQ(Boolean::kFalse, record.is_cache_hit);
ASSERT_EQ(Boolean::kFalse, record.no_insert); ASSERT_EQ(Boolean::kFalse, record.no_insert);
if (record.caller == TableReaderCaller::kUserGet ||
record.caller == TableReaderCaller::kUserMultiGet) {
ASSERT_EQ(key_id + 1, record.get_id);
} else {
ASSERT_EQ(BlockCacheTraceHelper::kReservedGetId, record.get_id);
}
if (block_type == TraceType::kBlockTraceDataBlock && if (block_type == TraceType::kBlockTraceDataBlock &&
(record.caller == TableReaderCaller::kUserGet || (record.caller == TableReaderCaller::kUserGet ||
record.caller == TableReaderCaller::kUserMultiGet)) { record.caller == TableReaderCaller::kUserMultiGet)) {
@ -239,6 +248,35 @@ TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) {
} }
} }
TEST_F(BlockCacheTracerTest, NextGetId) {
BlockCacheTracer writer;
{
TraceOptions trace_opt;
std::unique_ptr<TraceWriter> trace_writer;
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
&trace_writer));
// next get id should always return 0 before we call StartTrace.
ASSERT_EQ(0, writer.NextGetId());
ASSERT_EQ(0, writer.NextGetId());
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
ASSERT_EQ(1, writer.NextGetId());
ASSERT_EQ(2, writer.NextGetId());
writer.EndTrace();
// next get id should return 0.
ASSERT_EQ(0, writer.NextGetId());
}
// Start trace again and next get id should return 1.
{
TraceOptions trace_opt;
std::unique_ptr<TraceWriter> trace_writer;
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
&trace_writer));
ASSERT_OK(writer.StartTrace(env_, trace_opt, std::move(trace_writer)));
ASSERT_EQ(1, writer.NextGetId());
}
}
TEST_F(BlockCacheTracerTest, MixedBlocks) { TEST_F(BlockCacheTracerTest, MixedBlocks) {
{ {
// Generate a trace file containing a mix of blocks. // Generate a trace file containing a mix of blocks.

Loading…
Cancel
Save