Add PerfContextByLevel to provide per level perf context information (#4226)

Summary:
Current implementation of perf context is level agnostic. Making it hard to do performance evaluation for the LSM tree. This PR adds `PerfContextByLevel` to decompose the counters by level.
This will be helpful when analyzing point and range query performance as well as tuning bloom filter
Also replaced __thread with thread_local keyword for perf_context
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4226

Differential Revision: D10369509

Pulled By: miasantreble

fbshipit-source-id: f1ced4e0de5fcebdb7f9cff36164516bc6382d82
main
Zhongyi Xie 6 years ago committed by Facebook Github Bot
parent 1e3845805d
commit d6ec288703
  1. 3
      HISTORY.md
  2. 23
      db/db_bloom_filter_test.cc
  3. 34
      db/perf_context_test.cc
  4. 26
      include/rocksdb/perf_context.h
  5. 54
      monitoring/perf_context.cc
  6. 18
      monitoring/perf_context_imp.h
  7. 8
      table/block_based_table_reader.cc
  8. 1
      tools/db_bench_tool.cc

@ -1,7 +1,8 @@
# Rocksdb Change Log
## Unreleased
### New Features
* Introduced CacheAllocator, which lets the user specify custom allocator for memory in block cache.
* Introduced `CacheAllocator`, which lets the user specify custom allocator for memory in block cache.
* Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context.
### Bug Fixes
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.

@ -147,6 +147,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) {
options.prefix_extractor =
std::make_shared<SliceTransformLimitedDomainGeneric>();
options.statistics = rocksdb::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
if (partition_filters) {
@ -171,20 +172,29 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) {
ASSERT_EQ("foo", Get("barbarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
// TODO (Zhongyi): uncomment the asserts involving level_to_perf_context when per
// level perf context is enabled in block based table reader
// ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ASSERT_EQ("foo2", Get("barbarbar2"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
// ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
// ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
// ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
// ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ro.total_order_seek = true;
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
// ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
get_perf_context()->Reset();
}
}
@ -193,6 +203,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
Options options = last_options_;
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
options.statistics = rocksdb::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
if (partition_filters) {
@ -231,6 +242,8 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
ro.total_order_seek = true;
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
// ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
get_perf_context()->Reset();
}
}
@ -239,6 +252,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
Options options = last_options_;
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
options.statistics = rocksdb::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
@ -386,6 +400,8 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
ASSERT_EQ("bar", Get("barfoo"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
// ASSERT_EQ(12, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
get_perf_context()->Reset();
}
}
@ -490,6 +506,7 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) {
while (ChangeFilterOptions()) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
CreateAndReopenWithCF({"pikachu"}, options);
const int maxKey = 10000;
@ -511,6 +528,8 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) {
ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
}
ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98);
// ASSERT_GE((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, maxKey*0.98);
get_perf_context()->Reset();
}
}
@ -1032,6 +1051,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.optimize_filters_for_hits = true;
options.statistics = rocksdb::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
CreateAndReopenWithCF({"mypikachu"}, options);
int numkeys = 200000;
@ -1078,6 +1098,8 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
// no bloom filter. Most keys be checked bloom filters twice.
ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2);
ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2);
// ASSERT_GT((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, 65000*2);
// ASSERT_LT((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, 120000*2);
for (int i = 0; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "val");
@ -1187,6 +1209,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
ASSERT_EQ(2 /* index and data block */,
TestGetTickerCount(options, BLOCK_CACHE_ADD));
get_perf_context()->Reset();
}
int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {

@ -10,6 +10,7 @@
#include "monitoring/histogram.h"
#include "monitoring/instrumented_mutex.h"
#include "monitoring/perf_context_imp.h"
#include "monitoring/thread_status_util.h"
#include "port/port.h"
#include "rocksdb/db.h"
@ -579,18 +580,18 @@ TEST_F(PerfContextTest, SeekKeyComparison) {
TEST_F(PerfContextTest, DBMutexLockCounter) {
int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)};
for (PerfLevel perf_level :
for (PerfLevel perf_level_test :
{PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) {
for (int c = 0; c < 2; ++c) {
InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]);
mutex.Lock();
rocksdb::port::Thread child_thread([&] {
SetPerfLevel(perf_level);
SetPerfLevel(perf_level_test);
get_perf_context()->Reset();
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
mutex.Lock();
mutex.Unlock();
if (perf_level == PerfLevel::kEnableTimeExceptForMutex ||
if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex ||
stats_code[c] != DB_MUTEX_WAIT_MICROS) {
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
} else {
@ -686,7 +687,34 @@ TEST_F(PerfContextTest, MergeOperatorTime) {
delete db;
}
TEST_F(PerfContextTest, PerfContextByLevelGetSet) {
get_perf_context()->Reset();
get_perf_context()->EnablePerLevelPerfContext();
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, 2);
ASSERT_EQ(
0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful);
ASSERT_EQ(
1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful);
ASSERT_EQ(
2, (*(get_perf_context()->level_to_perf_context))[7].bloom_filter_useful);
ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0]
.bloom_filter_full_positive);
ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[2]
.bloom_filter_full_true_positive);
std::string zero_excluded = get_perf_context()->ToString(true);
ASSERT_NE(std::string::npos,
zero_excluded.find("bloom_filter_useful = 1@level5, 2@level7"));
ASSERT_NE(std::string::npos,
zero_excluded.find("bloom_filter_full_positive = 1@level0"));
ASSERT_NE(std::string::npos,
zero_excluded.find("bloom_filter_full_true_positive = 1@level2"));
}
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);

@ -5,6 +5,7 @@
#pragma once
#include <map>
#include <stdint.h>
#include <string>
@ -16,12 +17,35 @@ namespace rocksdb {
// and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
struct PerfContextByLevel {
// # of times bloom filter has avoided file reads, i.e., negatives.
uint64_t bloom_filter_useful = 0;
// # of times bloom FullFilter has not avoided the reads.
uint64_t bloom_filter_full_positive = 0;
// # of times bloom FullFilter has not avoided the reads and data actually
// exist.
uint64_t bloom_filter_full_true_positive = 0;
void Reset(); // reset all performance counters to zero
};
struct PerfContext {
~PerfContext();
void Reset(); // reset all performance counters to zero
std::string ToString(bool exclude_zero_counters = false) const;
// enable per level perf context and allocate storage for PerfContextByLevel
void EnablePerLevelPerfContext();
// temporarily disable per level perf contxt by setting the flag to false
void DisablePerLevelPerfContext();
// free the space for PerfContextByLevel, also disable per level perf context
void ClearPerLevelPerfContext();
uint64_t user_key_comparison_count; // total number of user key comparisons
uint64_t block_cache_hit_count; // total number of block cache hits
uint64_t block_read_count; // total number of block reads (with IO)
@ -168,6 +192,8 @@ struct PerfContext {
uint64_t env_lock_file_nanos;
uint64_t env_unlock_file_nanos;
uint64_t env_new_logger_nanos;
std::map<uint32_t, PerfContextByLevel>* level_to_perf_context;
bool per_level_perf_context_enabled;
};
// Get Thread-local PerfContext object pointer

@ -15,7 +15,7 @@ PerfContext perf_context;
#if defined(OS_SOLARIS)
__thread PerfContext perf_context_;
#else
__thread PerfContext perf_context;
thread_local PerfContext perf_context;
#endif
#endif
@ -31,6 +31,12 @@ PerfContext* get_perf_context() {
#endif
}
PerfContext::~PerfContext() {
#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(OS_SOLARIS)
ClearPerLevelPerfContext();
#endif
}
void PerfContext::Reset() {
#ifndef NPERF_CONTEXT
user_key_comparison_count = 0;
@ -104,6 +110,11 @@ void PerfContext::Reset() {
env_lock_file_nanos = 0;
env_unlock_file_nanos = 0;
env_new_logger_nanos = 0;
if (per_level_perf_context_enabled && level_to_perf_context) {
for (auto& kv : *level_to_perf_context) {
kv.second.Reset();
}
}
#endif
}
@ -112,6 +123,25 @@ void PerfContext::Reset() {
ss << #counter << " = " << counter << ", "; \
}
#define PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(counter) \
if (per_level_perf_context_enabled && \
level_to_perf_context) { \
ss << #counter << " = "; \
for (auto& kv : *level_to_perf_context) { \
if (!exclude_zero_counters || (kv.second.counter > 0)) { \
ss << kv.second.counter << "@level" << kv.first << ", "; \
} \
} \
}
void PerfContextByLevel::Reset() {
#ifndef NPERF_CONTEXT
bloom_filter_useful = 0;
bloom_filter_full_positive = 0;
bloom_filter_full_true_positive = 0;
#endif
}
std::string PerfContext::ToString(bool exclude_zero_counters) const {
#ifdef NPERF_CONTEXT
return "";
@ -186,8 +216,30 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
PERF_CONTEXT_OUTPUT(env_lock_file_nanos);
PERF_CONTEXT_OUTPUT(env_unlock_file_nanos);
PERF_CONTEXT_OUTPUT(env_new_logger_nanos);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive);
PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive);
return ss.str();
#endif
}
void PerfContext::EnablePerLevelPerfContext() {
if (!level_to_perf_context) {
level_to_perf_context = new std::map<uint32_t, PerfContextByLevel>();
}
per_level_perf_context_enabled = true;
}
void PerfContext::DisablePerLevelPerfContext(){
per_level_perf_context_enabled = false;
}
void PerfContext::ClearPerLevelPerfContext(){
if (level_to_perf_context) {
delete level_to_perf_context;
level_to_perf_context = nullptr;
}
per_level_perf_context_enabled = false;
}
}

@ -16,7 +16,7 @@ extern PerfContext perf_context;
extern __thread PerfContext perf_context_;
#define perf_context (*get_perf_context())
#else
extern __thread PerfContext perf_context;
extern thread_local PerfContext perf_context;
#endif
#endif
@ -59,6 +59,22 @@ extern __thread PerfContext perf_context;
perf_context.metric += value; \
}
// Increase metric value
#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \
if (perf_level >= PerfLevel::kEnableCount && \
perf_context.per_level_perf_context_enabled && \
perf_context.level_to_perf_context) { \
if ((*(perf_context.level_to_perf_context)).find(level) != \
(*(perf_context.level_to_perf_context)).end()) { \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} \
else { \
PerfContextByLevel empty_context; \
(*(perf_context.level_to_perf_context))[level] = empty_context; \
(*(perf_context.level_to_perf_context))[level].metric += value; \
} \
} \
#endif
}

@ -2315,6 +2315,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
}
if (may_match) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
// TODO(Zhongyi): use the correct level here
// PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, /*level*/);
}
return may_match;
}
@ -2339,6 +2341,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
if (!FullFilterKeyMayMatch(read_options, filter, key, no_io,
prefix_extractor)) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
// TODO(Zhongyi): use the correct level here
// PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, /*level*/);
} else {
IndexBlockIter iiter_on_stack;
// if prefix_extractor found in block differs from options, disable
@ -2371,6 +2375,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
// TODO(Zhongyi): use the correct level here
// PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, /*level*/);
break;
} else {
DataBlockIter biter;
@ -2423,6 +2429,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
}
if (matched && filter != nullptr && !filter->IsBlockBased()) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
// TODO(Zhongyi): use the correct level here
// PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, /*level*/);
}
if (s.ok()) {
s = iiter->status();

@ -2849,6 +2849,7 @@ void VerifyDBFromDB(std::string& truth_db_name) {
}
SetPerfLevel(static_cast<PerfLevel> (shared->perf_level));
perf_context.EnablePerLevelPerfContext();
thread->stats.Start(thread->tid);
(arg->bm->*(arg->method))(thread);
thread->stats.Stop();

Loading…
Cancel
Save