New stat rocksdb.{cf|db}-write-stall-stats exposed in a structural way (#11300)
Summary: **Context/Summary:** Users are interested in figuring out what has caused write stall. - Refactor write stall related stats from property `kCFStats` into its own db property `rocksdb.cf-write-stall-stats` as a map or string. For now, this only contains count of different combination of (CF-scope `WriteStallCause`) + (`WriteStallCondition`) - Add new `WriteStallCause::kWriteBufferManagerLimit` to reflect write stall caused by write buffer manager - Add new `rocksdb.db-write-stall-stats`. For now, this only contains `WriteStallCause::kWriteBufferManagerLimit` + `WriteStallCondition::kStopped` - Expose functions in new class `WriteStallStatsMapKeys` for examining the above two properties returned as map - Misc: rename/comment some write stall InternalStats for clarity Pull Request resolved: https://github.com/facebook/rocksdb/pull/11300 Test Plan: - New UT - Stress test `python3 tools/db_crashtest.py blackbox --simple --get_property_one_in=1` - Perf test: Both converge very slowly at similar rates but post-change has higher average ops/sec than pre-change even though they are run at the same time. ``` ./db_bench -seed=1679014417652004 -db=/dev/shm/testdb/ -statistics=false -benchmarks="fillseq[-X60]" -key_size=32 -value_size=512 -num=100000 -db_write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3 ``` pre-change: ``` fillseq [AVG 15 runs] : 1176 (± 732) ops/sec; 0.6 (± 0.4) MB/sec fillseq : 1052.671 micros/op 949 ops/sec 105.267 seconds 100000 operations; 0.5 MB/s fillseq [AVG 16 runs] : 1162 (± 685) ops/sec; 0.6 (± 0.4) MB/sec fillseq : 1387.330 micros/op 720 ops/sec 138.733 seconds 100000 operations; 0.4 MB/s fillseq [AVG 17 runs] : 1136 (± 646) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1232.011 micros/op 811 ops/sec 123.201 seconds 100000 operations; 0.4 MB/s fillseq [AVG 18 runs] : 1118 (± 610) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1282.567 micros/op 779 ops/sec 128.257 seconds 100000 operations; 0.4 MB/s fillseq [AVG 19 runs] : 1100 (± 578) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1914.336 micros/op 522 ops/sec 191.434 seconds 100000 operations; 0.3 MB/s fillseq [AVG 20 runs] : 1071 (± 551) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1227.510 micros/op 814 ops/sec 122.751 seconds 100000 operations; 0.4 MB/s fillseq [AVG 21 runs] : 1059 (± 525) ops/sec; 0.5 (± 0.3) MB/sec ``` post-change: ``` fillseq [AVG 15 runs] : 1226 (± 732) ops/sec; 0.6 (± 0.4) MB/sec fillseq : 1323.825 micros/op 755 ops/sec 132.383 seconds 100000 operations; 0.4 MB/s fillseq [AVG 16 runs] : 1196 (± 687) ops/sec; 0.6 (± 0.4) MB/sec fillseq : 1223.905 micros/op 817 ops/sec 122.391 seconds 100000 operations; 0.4 MB/s fillseq [AVG 17 runs] : 1174 (± 647) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1168.996 micros/op 855 ops/sec 116.900 seconds 100000 operations; 0.4 MB/s fillseq [AVG 18 runs] : 1156 (± 611) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1348.729 micros/op 741 ops/sec 134.873 seconds 100000 operations; 0.4 MB/s fillseq [AVG 19 runs] : 1134 (± 579) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1196.887 micros/op 835 ops/sec 119.689 seconds 100000 operations; 0.4 MB/s fillseq [AVG 20 runs] : 1119 (± 550) ops/sec; 0.6 (± 0.3) MB/sec fillseq : 1193.697 micros/op 837 ops/sec 119.370 seconds 100000 operations; 0.4 MB/s fillseq [AVG 21 runs] : 1106 (± 524) ops/sec; 0.6 (± 0.3) MB/sec ``` Reviewed By: ajkr Differential Revision: D44159541 Pulled By: hx235 fbshipit-source-id: 8d29efb70001fdc52d34535eeb3364fc3e71e40boxigraph-8.1.1
parent
204fcff751
commit
cb58477185
@ -0,0 +1,159 @@ |
||||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#include "db/write_stall_stats.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
const std::string kInvalidWriteStallCauseHyphenString = "invalid"; |
||||
|
||||
const std::array<std::string, static_cast<uint32_t>(WriteStallCause::kNone)> |
||||
kWriteStallCauseToHyphenString{{ |
||||
"memtable-limit", |
||||
"l0-file-count-limit", |
||||
"pending-compaction-bytes", |
||||
// WriteStallCause::kCFScopeWriteStallCauseEnumMax
|
||||
kInvalidWriteStallCauseHyphenString, |
||||
"write-buffer-manager-limit", |
||||
// WriteStallCause::kDBScopeWriteStallCauseEnumMax
|
||||
kInvalidWriteStallCauseHyphenString, |
||||
}}; |
||||
|
||||
const std::array<std::string, |
||||
static_cast<uint32_t>(WriteStallCondition::kNormal)> |
||||
kWriteStallConditionToHyphenString{{ |
||||
"delays", |
||||
"stops", |
||||
}}; |
||||
|
||||
InternalStats::InternalCFStatsType InternalCFStat( |
||||
WriteStallCause cause, WriteStallCondition condition) { |
||||
switch (cause) { |
||||
case WriteStallCause::kMemtableLimit: { |
||||
switch (condition) { |
||||
case WriteStallCondition::kDelayed: |
||||
return InternalStats::MEMTABLE_LIMIT_DELAYS; |
||||
case WriteStallCondition::kStopped: |
||||
return InternalStats::MEMTABLE_LIMIT_STOPS; |
||||
case WriteStallCondition::kNormal: |
||||
break; |
||||
} |
||||
break; |
||||
} |
||||
case WriteStallCause::kL0FileCountLimit: { |
||||
switch (condition) { |
||||
case WriteStallCondition::kDelayed: |
||||
return InternalStats::L0_FILE_COUNT_LIMIT_DELAYS; |
||||
case WriteStallCondition::kStopped: |
||||
return InternalStats::L0_FILE_COUNT_LIMIT_STOPS; |
||||
case WriteStallCondition::kNormal: |
||||
break; |
||||
} |
||||
break; |
||||
} |
||||
case WriteStallCause::kPendingCompactionBytes: { |
||||
switch (condition) { |
||||
case WriteStallCondition::kDelayed: |
||||
return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_DELAYS; |
||||
case WriteStallCondition::kStopped: |
||||
return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS; |
||||
case WriteStallCondition::kNormal: |
||||
break; |
||||
} |
||||
break; |
||||
} |
||||
default: |
||||
break; |
||||
} |
||||
return InternalStats::INTERNAL_CF_STATS_ENUM_MAX; |
||||
} |
||||
|
||||
InternalStats::InternalDBStatsType InternalDBStat( |
||||
WriteStallCause cause, WriteStallCondition condition) { |
||||
switch (cause) { |
||||
case WriteStallCause::kWriteBufferManagerLimit: { |
||||
switch (condition) { |
||||
case WriteStallCondition::kStopped: |
||||
return InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts; |
||||
default: |
||||
break; |
||||
} |
||||
break; |
||||
} |
||||
default: |
||||
break; |
||||
} |
||||
return InternalStats::kIntStatsNumMax; |
||||
} |
||||
|
||||
bool isCFScopeWriteStallCause(WriteStallCause cause) { |
||||
uint32_t int_cause = static_cast<uint32_t>(cause); |
||||
uint32_t lower_bound = |
||||
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) - |
||||
kNumCFScopeWriteStallCauses; |
||||
uint32_t upper_bound = |
||||
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) - |
||||
1; |
||||
return lower_bound <= int_cause && int_cause <= upper_bound; |
||||
} |
||||
|
||||
bool isDBScopeWriteStallCause(WriteStallCause cause) { |
||||
uint32_t int_cause = static_cast<uint32_t>(cause); |
||||
uint32_t lower_bound = |
||||
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) - |
||||
kNumDBScopeWriteStallCauses; |
||||
uint32_t upper_bound = |
||||
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) - |
||||
1; |
||||
return lower_bound <= int_cause && int_cause <= upper_bound; |
||||
} |
||||
|
||||
const std::string& WriteStallStatsMapKeys::TotalStops() { |
||||
static const std::string kTotalStops = "total-stops"; |
||||
return kTotalStops; |
||||
} |
||||
|
||||
const std::string& WriteStallStatsMapKeys::TotalDelays() { |
||||
static const std::string kTotalDelays = "total-delays"; |
||||
return kTotalDelays; |
||||
} |
||||
|
||||
const std::string& |
||||
WriteStallStatsMapKeys::CFL0FileCountLimitDelaysWithOngoingCompaction() { |
||||
static const std::string ret = |
||||
"cf-l0-file-count-limit-delays-with-ongoing-compaction"; |
||||
return ret; |
||||
} |
||||
|
||||
const std::string& |
||||
WriteStallStatsMapKeys::CFL0FileCountLimitStopsWithOngoingCompaction() { |
||||
static const std::string ret = |
||||
"cf-l0-file-count-limit-stops-with-ongoing-compaction"; |
||||
return ret; |
||||
} |
||||
|
||||
std::string WriteStallStatsMapKeys::CauseConditionCount( |
||||
WriteStallCause cause, WriteStallCondition condition) { |
||||
std::string cause_condition_count_name; |
||||
|
||||
std::string cause_name; |
||||
if (isCFScopeWriteStallCause(cause) || isDBScopeWriteStallCause(cause)) { |
||||
cause_name = kWriteStallCauseToHyphenString[static_cast<uint32_t>(cause)]; |
||||
} else { |
||||
assert(false); |
||||
return ""; |
||||
} |
||||
|
||||
const std::string& condition_name = |
||||
kWriteStallConditionToHyphenString[static_cast<uint32_t>(condition)]; |
||||
|
||||
cause_condition_count_name.reserve(cause_name.size() + 1 + |
||||
condition_name.size()); |
||||
cause_condition_count_name.append(cause_name); |
||||
cause_condition_count_name.append("-"); |
||||
cause_condition_count_name.append(condition_name); |
||||
|
||||
return cause_condition_count_name; |
||||
} |
||||
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,50 @@ |
||||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
|
||||
#include <array> |
||||
|
||||
#include "db/internal_stats.h" |
||||
#include "rocksdb/types.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
extern const std::string kInvalidWriteStallCauseHyphenString; |
||||
|
||||
extern const std::array<std::string, |
||||
static_cast<uint32_t>(WriteStallCause::kNone)> |
||||
kWriteStallCauseToHyphenString; |
||||
|
||||
extern const std::array<std::string, |
||||
static_cast<uint32_t>(WriteStallCondition::kNormal)> |
||||
kWriteStallConditionToHyphenString; |
||||
|
||||
// REQUIRES:
|
||||
// cause` is CF-scope `WriteStallCause`, see `WriteStallCause` for more
|
||||
//
|
||||
// REQUIRES:
|
||||
// `condition` != `WriteStallCondition::kNormal`
|
||||
extern InternalStats::InternalCFStatsType InternalCFStat( |
||||
WriteStallCause cause, WriteStallCondition condition); |
||||
|
||||
// REQUIRES:
|
||||
// cause` is DB-scope `WriteStallCause`, see `WriteStallCause` for more
|
||||
//
|
||||
// REQUIRES:
|
||||
// `condition` != `WriteStallCondition::kNormal`
|
||||
extern InternalStats::InternalDBStatsType InternalDBStat( |
||||
WriteStallCause cause, WriteStallCondition condition); |
||||
|
||||
extern bool isCFScopeWriteStallCause(WriteStallCause cause); |
||||
extern bool isDBScopeWriteStallCause(WriteStallCause cause); |
||||
|
||||
constexpr uint32_t kNumCFScopeWriteStallCauses = |
||||
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) - |
||||
static_cast<uint32_t>(WriteStallCause::kMemtableLimit); |
||||
|
||||
constexpr uint32_t kNumDBScopeWriteStallCauses = |
||||
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) - |
||||
static_cast<uint32_t>(WriteStallCause::kWriteBufferManagerLimit); |
||||
} // namespace ROCKSDB_NAMESPACE
|
Loading…
Reference in new issue