diff --git a/HISTORY.md b/HISTORY.md index 06269acdf..b7685cd58 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,7 +1,8 @@ # Rocksdb Change Log ## Unreleased ### New Features -* Introduced CacheAllocator, which lets the user specify custom allocator for memory in block cache. +* Introduced `CacheAllocator`, which lets the user specify custom allocator for memory in block cache. +* Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. ### Bug Fixes * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index badc23f8f..ee0582bf7 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -147,6 +147,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { options.prefix_extractor = std::make_shared(); options.statistics = rocksdb::CreateDBStatistics(); + get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); if (partition_filters) { @@ -171,20 +172,29 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { ASSERT_EQ("foo", Get("barbarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + // TODO (Zhongyi): uncomment the asserts involving level_to_perf_context when per + // level perf context is enabled in block based table reader + // ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("foo2", Get("barbarbar2")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + // ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + // ASSERT_EQ(0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + // ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + // ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ro.total_order_seek = true; ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + // ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + get_perf_context()->Reset(); } } @@ -193,6 +203,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.statistics = rocksdb::CreateDBStatistics(); + get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); if (partition_filters) { @@ -231,6 +242,8 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { ro.total_order_seek = true; ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); + // ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + get_perf_context()->Reset(); } } @@ -239,6 +252,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(3)); options.statistics = rocksdb::CreateDBStatistics(); + get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); @@ -386,6 +400,8 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); ASSERT_EQ("bar", Get("barfoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); + // ASSERT_EQ(12, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + get_perf_context()->Reset(); } } @@ -490,6 +506,7 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) { while (ChangeFilterOptions()) { Options options = CurrentOptions(); options.statistics = rocksdb::CreateDBStatistics(); + get_perf_context()->EnablePerLevelPerfContext(); CreateAndReopenWithCF({"pikachu"}, options); const int maxKey = 10000; @@ -511,6 +528,8 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); + // ASSERT_GE((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, maxKey*0.98); + get_perf_context()->Reset(); } } @@ -1032,6 +1051,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.optimize_filters_for_hits = true; options.statistics = rocksdb::CreateDBStatistics(); + get_perf_context()->EnablePerLevelPerfContext(); CreateAndReopenWithCF({"mypikachu"}, options); int numkeys = 200000; @@ -1078,6 +1098,8 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { // no bloom filter. Most keys be checked bloom filters twice. ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); + // ASSERT_GT((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, 65000*2); + // ASSERT_LT((*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, 120000*2); for (int i = 0; i < numkeys; i += 2) { ASSERT_EQ(Get(1, Key(i)), "val"); @@ -1187,6 +1209,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(2 /* index and data block */, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + get_perf_context()->Reset(); } int CountIter(std::unique_ptr& iter, const Slice& key) { diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index f9135c79a..e7dc97646 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -10,6 +10,7 @@ #include "monitoring/histogram.h" #include "monitoring/instrumented_mutex.h" +#include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "rocksdb/db.h" @@ -579,18 +580,18 @@ TEST_F(PerfContextTest, SeekKeyComparison) { TEST_F(PerfContextTest, DBMutexLockCounter) { int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; - for (PerfLevel perf_level : + for (PerfLevel perf_level_test : {PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) { for (int c = 0; c < 2; ++c) { InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); mutex.Lock(); rocksdb::port::Thread child_thread([&] { - SetPerfLevel(perf_level); + SetPerfLevel(perf_level_test); get_perf_context()->Reset(); ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); mutex.Lock(); mutex.Unlock(); - if (perf_level == PerfLevel::kEnableTimeExceptForMutex || + if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex || stats_code[c] != DB_MUTEX_WAIT_MICROS) { ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); } else { @@ -686,7 +687,34 @@ TEST_F(PerfContextTest, MergeOperatorTime) { delete db; } + +TEST_F(PerfContextTest, PerfContextByLevelGetSet) { + get_perf_context()->Reset(); + get_perf_context()->EnablePerLevelPerfContext(); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, 2); + ASSERT_EQ( + 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); + ASSERT_EQ( + 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); + ASSERT_EQ( + 2, (*(get_perf_context()->level_to_perf_context))[7].bloom_filter_useful); + ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0] + .bloom_filter_full_positive); + ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[2] + .bloom_filter_full_true_positive); + std::string zero_excluded = get_perf_context()->ToString(true); + ASSERT_NE(std::string::npos, + zero_excluded.find("bloom_filter_useful = 1@level5, 2@level7")); + ASSERT_NE(std::string::npos, + zero_excluded.find("bloom_filter_full_positive = 1@level0")); + ASSERT_NE(std::string::npos, + zero_excluded.find("bloom_filter_full_true_positive = 1@level2")); } +} // namespace rocksdb int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index d3771d3f0..bd7a5de4c 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include @@ -16,12 +17,35 @@ namespace rocksdb { // and transparently. // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. +struct PerfContextByLevel { + // # of times bloom filter has avoided file reads, i.e., negatives. + uint64_t bloom_filter_useful = 0; + // # of times bloom FullFilter has not avoided the reads. + uint64_t bloom_filter_full_positive = 0; + // # of times bloom FullFilter has not avoided the reads and data actually + // exist. + uint64_t bloom_filter_full_true_positive = 0; + + void Reset(); // reset all performance counters to zero +}; + struct PerfContext { + ~PerfContext(); + void Reset(); // reset all performance counters to zero std::string ToString(bool exclude_zero_counters = false) const; + // enable per level perf context and allocate storage for PerfContextByLevel + void EnablePerLevelPerfContext(); + + // temporarily disable per level perf contxt by setting the flag to false + void DisablePerLevelPerfContext(); + + // free the space for PerfContextByLevel, also disable per level perf context + void ClearPerLevelPerfContext(); + uint64_t user_key_comparison_count; // total number of user key comparisons uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_read_count; // total number of block reads (with IO) @@ -168,6 +192,8 @@ struct PerfContext { uint64_t env_lock_file_nanos; uint64_t env_unlock_file_nanos; uint64_t env_new_logger_nanos; + std::map* level_to_perf_context; + bool per_level_perf_context_enabled; }; // Get Thread-local PerfContext object pointer diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 9bba841f8..423443869 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -15,7 +15,7 @@ PerfContext perf_context; #if defined(OS_SOLARIS) __thread PerfContext perf_context_; #else -__thread PerfContext perf_context; +thread_local PerfContext perf_context; #endif #endif @@ -31,6 +31,12 @@ PerfContext* get_perf_context() { #endif } +PerfContext::~PerfContext() { +#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(OS_SOLARIS) + ClearPerLevelPerfContext(); +#endif +} + void PerfContext::Reset() { #ifndef NPERF_CONTEXT user_key_comparison_count = 0; @@ -104,6 +110,11 @@ void PerfContext::Reset() { env_lock_file_nanos = 0; env_unlock_file_nanos = 0; env_new_logger_nanos = 0; + if (per_level_perf_context_enabled && level_to_perf_context) { + for (auto& kv : *level_to_perf_context) { + kv.second.Reset(); + } + } #endif } @@ -112,6 +123,25 @@ void PerfContext::Reset() { ss << #counter << " = " << counter << ", "; \ } +#define PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(counter) \ + if (per_level_perf_context_enabled && \ + level_to_perf_context) { \ + ss << #counter << " = "; \ + for (auto& kv : *level_to_perf_context) { \ + if (!exclude_zero_counters || (kv.second.counter > 0)) { \ + ss << kv.second.counter << "@level" << kv.first << ", "; \ + } \ + } \ + } + +void PerfContextByLevel::Reset() { +#ifndef NPERF_CONTEXT + bloom_filter_useful = 0; + bloom_filter_full_positive = 0; + bloom_filter_full_true_positive = 0; +#endif +} + std::string PerfContext::ToString(bool exclude_zero_counters) const { #ifdef NPERF_CONTEXT return ""; @@ -186,8 +216,30 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const { PERF_CONTEXT_OUTPUT(env_lock_file_nanos); PERF_CONTEXT_OUTPUT(env_unlock_file_nanos); PERF_CONTEXT_OUTPUT(env_new_logger_nanos); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive); return ss.str(); #endif } +void PerfContext::EnablePerLevelPerfContext() { + if (!level_to_perf_context) { + level_to_perf_context = new std::map(); + } + per_level_perf_context_enabled = true; +} + +void PerfContext::DisablePerLevelPerfContext(){ + per_level_perf_context_enabled = false; +} + +void PerfContext::ClearPerLevelPerfContext(){ + if (level_to_perf_context) { + delete level_to_perf_context; + level_to_perf_context = nullptr; + } + per_level_perf_context_enabled = false; +} + } diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index cfcded1c9..d67654914 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -16,7 +16,7 @@ extern PerfContext perf_context; extern __thread PerfContext perf_context_; #define perf_context (*get_perf_context()) #else -extern __thread PerfContext perf_context; +extern thread_local PerfContext perf_context; #endif #endif @@ -59,6 +59,22 @@ extern __thread PerfContext perf_context; perf_context.metric += value; \ } +// Increase metric value +#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ + if (perf_level >= PerfLevel::kEnableCount && \ + perf_context.per_level_perf_context_enabled && \ + perf_context.level_to_perf_context) { \ + if ((*(perf_context.level_to_perf_context)).find(level) != \ + (*(perf_context.level_to_perf_context)).end()) { \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } \ + else { \ + PerfContextByLevel empty_context; \ + (*(perf_context.level_to_perf_context))[level] = empty_context; \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } \ + } \ + #endif } diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index bd9255c8c..faf5826f2 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2315,6 +2315,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch( } if (may_match) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE); + // TODO(Zhongyi): use the correct level here + // PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, /*level*/); } return may_match; } @@ -2339,6 +2341,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, if (!FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor)) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); + // TODO(Zhongyi): use the correct level here + // PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, /*level*/); } else { IndexBlockIter iiter_on_stack; // if prefix_extractor found in block differs from options, disable @@ -2371,6 +2375,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, // TODO: think about interaction with Merge. If a user key cannot // cross one data block, we should be fine. RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); + // TODO(Zhongyi): use the correct level here + // PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, /*level*/); break; } else { DataBlockIter biter; @@ -2423,6 +2429,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } if (matched && filter != nullptr && !filter->IsBlockBased()) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE); + // TODO(Zhongyi): use the correct level here + // PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, /*level*/); } if (s.ok()) { s = iiter->status(); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 9b72f3a91..d9e8eb2c7 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -2849,6 +2849,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { } SetPerfLevel(static_cast (shared->perf_level)); + perf_context.EnablePerLevelPerfContext(); thread->stats.Start(thread->tid); (arg->bm->*(arg->method))(thread); thread->stats.Stop();