New stat rocksdb.{cf|db}-write-stall-stats exposed in a structural way (#11300)

Summary:
**Context/Summary:**
Users are interested in figuring out what has caused write stall.
- Refactor write stall related stats from property `kCFStats` into its own db property `rocksdb.cf-write-stall-stats` as a map or string. For now, this only contains count of different combination of (CF-scope `WriteStallCause`) + (`WriteStallCondition`)
- Add new `WriteStallCause::kWriteBufferManagerLimit` to reflect write stall caused by write buffer manager
- Add new `rocksdb.db-write-stall-stats`. For now, this only contains `WriteStallCause::kWriteBufferManagerLimit` + `WriteStallCondition::kStopped`

- Expose functions in new class `WriteStallStatsMapKeys` for examining the above two properties returned as map
- Misc: rename/comment some write stall InternalStats for clarity

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11300

Test Plan:
- New UT
- Stress test
`python3 tools/db_crashtest.py blackbox --simple --get_property_one_in=1`
- Perf test: Both converge very slowly at similar rates but post-change has higher average ops/sec than pre-change even though they are run at the same time.
```
./db_bench -seed=1679014417652004 -db=/dev/shm/testdb/ -statistics=false -benchmarks="fillseq[-X60]" -key_size=32 -value_size=512 -num=100000 -db_write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3
```
pre-change:
```
fillseq [AVG 15 runs] : 1176 (± 732) ops/sec;    0.6 (± 0.4) MB/sec
fillseq      :    1052.671 micros/op 949 ops/sec 105.267 seconds 100000 operations;    0.5 MB/s
fillseq [AVG 16 runs] : 1162 (± 685) ops/sec;    0.6 (± 0.4) MB/sec
fillseq      :    1387.330 micros/op 720 ops/sec 138.733 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 17 runs] : 1136 (± 646) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1232.011 micros/op 811 ops/sec 123.201 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 18 runs] : 1118 (± 610) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1282.567 micros/op 779 ops/sec 128.257 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 19 runs] : 1100 (± 578) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1914.336 micros/op 522 ops/sec 191.434 seconds 100000 operations;    0.3 MB/s
fillseq [AVG 20 runs] : 1071 (± 551) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1227.510 micros/op 814 ops/sec 122.751 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 21 runs] : 1059 (± 525) ops/sec;    0.5 (± 0.3) MB/sec
```
post-change:
```
fillseq [AVG 15 runs] : 1226 (± 732) ops/sec;    0.6 (± 0.4) MB/sec
fillseq      :    1323.825 micros/op 755 ops/sec 132.383 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 16 runs] : 1196 (± 687) ops/sec;    0.6 (± 0.4) MB/sec
fillseq      :    1223.905 micros/op 817 ops/sec 122.391 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 17 runs] : 1174 (± 647) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1168.996 micros/op 855 ops/sec 116.900 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 18 runs] : 1156 (± 611) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1348.729 micros/op 741 ops/sec 134.873 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 19 runs] : 1134 (± 579) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1196.887 micros/op 835 ops/sec 119.689 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 20 runs] : 1119 (± 550) ops/sec;    0.6 (± 0.3) MB/sec
fillseq      :    1193.697 micros/op 837 ops/sec 119.370 seconds 100000 operations;    0.4 MB/s
fillseq [AVG 21 runs] : 1106 (± 524) ops/sec;    0.6 (± 0.3) MB/sec
```

Reviewed By: ajkr

Differential Revision: D44159541

Pulled By: hx235

fbshipit-source-id: 8d29efb70001fdc52d34535eeb3364fc3e71e40b
oxigraph-8.1.1
Hui Xiao 2 years ago committed by Facebook GitHub Bot
parent 204fcff751
commit cb58477185
  1. 1
      CMakeLists.txt
  2. 1
      HISTORY.md
  3. 2
      TARGETS
  4. 15
      db/column_family.cc
  5. 6
      db/column_family.h
  6. 3
      db/db_impl/db_impl_write.cc
  7. 175
      db/db_properties_test.cc
  8. 236
      db/internal_stats.cc
  9. 43
      db/internal_stats.h
  10. 159
      db/write_stall_stats.cc
  11. 50
      db/write_stall_stats.h
  12. 30
      include/rocksdb/db.h
  13. 6
      include/rocksdb/listener.h
  14. 28
      include/rocksdb/types.h
  15. 6
      java/src/main/java/org/rocksdb/WriteStallCondition.java
  16. 2
      java/src/test/java/org/rocksdb/EventListenerTest.java
  17. 1
      src.mk

@ -748,6 +748,7 @@ set(SOURCES
db/write_batch.cc db/write_batch.cc
db/write_batch_base.cc db/write_batch_base.cc
db/write_controller.cc db/write_controller.cc
db/write_stall_stats.cc
db/write_thread.cc db/write_thread.cc
env/composite_env.cc env/composite_env.cc
env/env.cc env/env.cc

@ -15,6 +15,7 @@
* Added a new PerfContext counter `internal_merge_point_lookup_count` which tracks the number of Merge operands applied while serving point lookup queries. * Added a new PerfContext counter `internal_merge_point_lookup_count` which tracks the number of Merge operands applied while serving point lookup queries.
* Add new statistics rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit} * Add new statistics rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit}
* Add support for SecondaryCache with HyperClockCache (`HyperClockCacheOptions` inherits `secondary_cache` option from `ShardedCacheOptions`) * Add support for SecondaryCache with HyperClockCache (`HyperClockCacheOptions` inherits `secondary_cache` option from `ShardedCacheOptions`)
* Add new db properties `rocksdb.cf-write-stall-stats`, `rocksdb.db-write-stall-stats`and APIs to examine them in a structured way. In particular, users of `GetMapProperty()` with property `kCFWriteStallStats`/`kDBWriteStallStats` can now use the functions in `WriteStallStatsMapKeys` to find stats in the map.
### Public API Changes ### Public API Changes
* Changed various functions and features in `Cache` that are mostly relevant to custom implementations or wrappers. Especially, asychronous lookup functionality is moved from `Lookup()` to a new `StartAsyncLookup()` function. * Changed various functions and features in `Cache` that are mostly relevant to custom implementations or wrappers. Especially, asychronous lookup functionality is moved from `Lookup()` to a new `StartAsyncLookup()` function.

@ -106,6 +106,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"db/write_batch.cc", "db/write_batch.cc",
"db/write_batch_base.cc", "db/write_batch_base.cc",
"db/write_controller.cc", "db/write_controller.cc",
"db/write_stall_stats.cc",
"db/write_thread.cc", "db/write_thread.cc",
"env/composite_env.cc", "env/composite_env.cc",
"env/env.cc", "env/env.cc",
@ -451,6 +452,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"db/write_batch.cc", "db/write_batch.cc",
"db/write_batch_base.cc", "db/write_batch_base.cc",
"db/write_controller.cc", "db/write_controller.cc",
"db/write_stall_stats.cc",
"db/write_thread.cc", "db/write_thread.cc",
"env/composite_env.cc", "env/composite_env.cc",
"env/env.cc", "env/env.cc",

@ -869,7 +869,7 @@ int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
} }
} // anonymous namespace } // anonymous namespace
std::pair<WriteStallCondition, ColumnFamilyData::WriteStallCause> std::pair<WriteStallCondition, WriteStallCause>
ColumnFamilyData::GetWriteStallConditionAndCause( ColumnFamilyData::GetWriteStallConditionAndCause(
int num_unflushed_memtables, int num_l0_files, int num_unflushed_memtables, int num_l0_files,
uint64_t num_compaction_needed_bytes, uint64_t num_compaction_needed_bytes,
@ -942,7 +942,8 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_STOPS, 1); internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_STOPS, 1);
if (compaction_picker_->IsLevel0CompactionInProgress()) { if (compaction_picker_->IsLevel0CompactionInProgress()) {
internal_stats_->AddCFStats( internal_stats_->AddCFStats(
InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_STOPS, 1); InternalStats::L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION,
1);
} }
ROCKS_LOG_WARN(ioptions_.logger, ROCKS_LOG_WARN(ioptions_.logger,
"[%s] Stopping writes because we have %d level-0 files", "[%s] Stopping writes because we have %d level-0 files",
@ -963,7 +964,7 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
SetupDelay(write_controller, compaction_needed_bytes, SetupDelay(write_controller, compaction_needed_bytes,
prev_compaction_needed_bytes_, was_stopped, prev_compaction_needed_bytes_, was_stopped,
mutable_cf_options.disable_auto_compactions); mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_SLOWDOWNS, 1); internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_DELAYS, 1);
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
ioptions_.logger, ioptions_.logger,
"[%s] Stalling writes because we have %d immutable memtables " "[%s] Stalling writes because we have %d immutable memtables "
@ -981,11 +982,11 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
SetupDelay(write_controller, compaction_needed_bytes, SetupDelay(write_controller, compaction_needed_bytes,
prev_compaction_needed_bytes_, was_stopped || near_stop, prev_compaction_needed_bytes_, was_stopped || near_stop,
mutable_cf_options.disable_auto_compactions); mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_SLOWDOWNS, internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_DELAYS, 1);
1);
if (compaction_picker_->IsLevel0CompactionInProgress()) { if (compaction_picker_->IsLevel0CompactionInProgress()) {
internal_stats_->AddCFStats( internal_stats_->AddCFStats(
InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, 1); InternalStats::L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION,
1);
} }
ROCKS_LOG_WARN(ioptions_.logger, ROCKS_LOG_WARN(ioptions_.logger,
"[%s] Stalling writes because we have %d level-0 files " "[%s] Stalling writes because we have %d level-0 files "
@ -1011,7 +1012,7 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
prev_compaction_needed_bytes_, was_stopped || near_stop, prev_compaction_needed_bytes_, was_stopped || near_stop,
mutable_cf_options.disable_auto_compactions); mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats( internal_stats_->AddCFStats(
InternalStats::PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, 1); InternalStats::PENDING_COMPACTION_BYTES_LIMIT_DELAYS, 1);
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
ioptions_.logger, ioptions_.logger,
"[%s] Stalling writes because of estimated pending compaction " "[%s] Stalling writes because of estimated pending compaction "

@ -462,12 +462,6 @@ class ColumnFamilyData {
bool queued_for_flush() { return queued_for_flush_; } bool queued_for_flush() { return queued_for_flush_; }
bool queued_for_compaction() { return queued_for_compaction_; } bool queued_for_compaction() { return queued_for_compaction_; }
enum class WriteStallCause {
kNone,
kMemtableLimit,
kL0FileCountLimit,
kPendingCompactionBytes,
};
static std::pair<WriteStallCondition, WriteStallCause> static std::pair<WriteStallCondition, WriteStallCause>
GetWriteStallConditionAndCause( GetWriteStallConditionAndCause(
int num_unflushed_memtables, int num_l0_files, int num_unflushed_memtables, int num_l0_files,

@ -1211,6 +1211,9 @@ Status DBImpl::PreprocessWrite(const WriteOptions& write_options,
// exceeded at this point so no new write (including current one) will go // exceeded at this point so no new write (including current one) will go
// through until memory usage is decreased. // through until memory usage is decreased.
if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldStall())) { if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldStall())) {
default_cf_internal_stats_->AddDBStats(
InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts, 1,
true /* concurrent */);
if (write_options.no_slowdown) { if (write_options.no_slowdown) {
status = Status::Incomplete("Write stall"); status = Status::Incomplete("Write stall");
} else { } else {

@ -13,6 +13,7 @@
#include <string> #include <string>
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "db/write_stall_stats.h"
#include "options/cf_options.h" #include "options/cf_options.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "rocksdb/listener.h" #include "rocksdb/listener.h"
@ -2106,6 +2107,180 @@ TEST_F(DBPropertiesTest, GetMapPropertyBlockCacheEntryStats) {
ASSERT_EQ(3 * kNumCacheEntryRoles + 4, values.size()); ASSERT_EQ(3 * kNumCacheEntryRoles + 4, values.size());
} }
TEST_F(DBPropertiesTest, WriteStallStatsSanityCheck) {
for (uint32_t i = 0; i < static_cast<uint32_t>(WriteStallCause::kNone); ++i) {
std::string str = kWriteStallCauseToHyphenString[i];
ASSERT_TRUE(!str.empty())
<< "Please ensure mapping from `WriteStallCause` to "
"`kWriteStallCauseToHyphenString` is complete";
WriteStallCause cause = static_cast<WriteStallCause>(i);
if (cause == WriteStallCause::kCFScopeWriteStallCauseEnumMax ||
cause == WriteStallCause::kDBScopeWriteStallCauseEnumMax) {
ASSERT_EQ(str, kInvalidWriteStallCauseHyphenString)
<< "Please ensure order in `kWriteStallCauseToHyphenString` is "
"consistent with `WriteStallCause`";
}
}
for (uint32_t i = 0; i < static_cast<uint32_t>(WriteStallCondition::kNormal);
++i) {
std::string str = kWriteStallConditionToHyphenString[i];
ASSERT_TRUE(!str.empty())
<< "Please ensure mapping from `WriteStallCondition` to "
"`kWriteStallConditionToHyphenString` is complete";
}
for (uint32_t i = 0; i < static_cast<uint32_t>(WriteStallCause::kNone); ++i) {
for (uint32_t j = 0;
j < static_cast<uint32_t>(WriteStallCondition::kNormal); ++j) {
WriteStallCause cause = static_cast<WriteStallCause>(i);
WriteStallCondition condition = static_cast<WriteStallCondition>(j);
if (isCFScopeWriteStallCause(cause)) {
ASSERT_TRUE(InternalCFStat(cause, condition) !=
InternalStats::INTERNAL_CF_STATS_ENUM_MAX)
<< "Please ensure the combination of WriteStallCause(" +
std::to_string(static_cast<uint32_t>(cause)) +
") + WriteStallCondition(" +
std::to_string(static_cast<uint32_t>(condition)) +
") is correctly mapped to a valid `InternalStats` or bypass "
"its check in this test";
} else if (isDBScopeWriteStallCause(cause)) {
InternalStats::InternalDBStatsType internal_db_stat =
InternalDBStat(cause, condition);
if (internal_db_stat == InternalStats::kIntStatsNumMax) {
ASSERT_TRUE(cause == WriteStallCause::kWriteBufferManagerLimit &&
condition == WriteStallCondition::kDelayed)
<< "Please ensure the combination of WriteStallCause(" +
std::to_string(static_cast<uint32_t>(cause)) +
") + WriteStallCondition(" +
std::to_string(static_cast<uint32_t>(condition)) +
") is correctly mapped to a valid `InternalStats` or "
"bypass its check in this test";
}
} else if (cause != WriteStallCause::kCFScopeWriteStallCauseEnumMax &&
cause != WriteStallCause::kDBScopeWriteStallCauseEnumMax) {
ASSERT_TRUE(false) << "Please ensure the WriteStallCause(" +
std::to_string(static_cast<uint32_t>(cause)) +
") is either CF-scope or DB-scope write "
"stall cause in enum `WriteStallCause`";
}
}
}
}
TEST_F(DBPropertiesTest, GetMapPropertyWriteStallStats) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"heavy_write_cf"}, options);
for (auto test_cause : {WriteStallCause::kWriteBufferManagerLimit,
WriteStallCause::kMemtableLimit}) {
if (test_cause == WriteStallCause::kWriteBufferManagerLimit) {
options.write_buffer_manager.reset(
new WriteBufferManager(100000, nullptr, true));
} else if (test_cause == WriteStallCause::kMemtableLimit) {
options.max_write_buffer_number = 2;
options.disable_auto_compactions = true;
}
ReopenWithColumnFamilies({"default", "heavy_write_cf"}, options);
// Assert initial write stall stats are all 0
std::map<std::string, std::string> db_values;
ASSERT_TRUE(dbfull()->GetMapProperty(DB::Properties::kDBWriteStallStats,
&db_values));
ASSERT_EQ(std::stoi(db_values[WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause::kWriteBufferManagerLimit,
WriteStallCondition::kStopped)]),
0);
for (int cf = 0; cf <= 1; ++cf) {
std::map<std::string, std::string> cf_values;
ASSERT_TRUE(dbfull()->GetMapProperty(
handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values));
ASSERT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]), 0);
ASSERT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]), 0);
}
// Pause flush thread to help coerce write stall
std::unique_ptr<test::SleepingBackgroundTask> sleeping_task(
new test::SleepingBackgroundTask());
env_->SetBackgroundThreads(1, Env::HIGH);
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
sleeping_task.get(), Env::Priority::HIGH);
sleeping_task->WaitUntilSleeping();
// Coerce write stall
if (test_cause == WriteStallCause::kWriteBufferManagerLimit) {
ASSERT_OK(dbfull()->Put(
WriteOptions(), handles_[1], Key(1),
DummyString(options.write_buffer_manager->buffer_size())));
WriteOptions wo;
wo.no_slowdown = true;
Status s = dbfull()->Put(
wo, handles_[1], Key(2),
DummyString(options.write_buffer_manager->buffer_size()));
ASSERT_TRUE(s.IsIncomplete());
ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos);
} else if (test_cause == WriteStallCause::kMemtableLimit) {
FlushOptions fo;
fo.allow_write_stall = true;
fo.wait = false;
ASSERT_OK(
dbfull()->Put(WriteOptions(), handles_[1], Key(1), DummyString(1)));
ASSERT_OK(dbfull()->Flush(fo, handles_[1]));
ASSERT_OK(
dbfull()->Put(WriteOptions(), handles_[1], Key(2), DummyString(1)));
ASSERT_OK(dbfull()->Flush(fo, handles_[1]));
}
if (test_cause == WriteStallCause::kWriteBufferManagerLimit) {
db_values.clear();
EXPECT_TRUE(dbfull()->GetMapProperty(DB::Properties::kDBWriteStallStats,
&db_values));
EXPECT_EQ(std::stoi(db_values[WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause::kWriteBufferManagerLimit,
WriteStallCondition::kStopped)]),
1);
// `WriteStallCause::kWriteBufferManagerLimit` should not result in any
// CF-scope write stall stats changes
for (int cf = 0; cf <= 1; ++cf) {
std::map<std::string, std::string> cf_values;
EXPECT_TRUE(dbfull()->GetMapProperty(
handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values));
EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]),
0);
EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]),
0);
}
} else if (test_cause == WriteStallCause::kMemtableLimit) {
for (int cf = 0; cf <= 1; ++cf) {
std::map<std::string, std::string> cf_values;
EXPECT_TRUE(dbfull()->GetMapProperty(
handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values));
EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]),
cf == 1 ? 1 : 0);
EXPECT_EQ(
std::stoi(cf_values[WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause::kMemtableLimit,
WriteStallCondition::kStopped)]),
cf == 1 ? 1 : 0);
EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]),
0);
EXPECT_EQ(
std::stoi(cf_values[WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause::kMemtableLimit,
WriteStallCondition::kDelayed)]),
0);
}
}
sleeping_task->WakeUp();
sleeping_task->WaitUntilDone();
}
}
namespace { namespace {
std::string PopMetaIndexKey(InternalIterator* meta_iter) { std::string PopMetaIndexKey(InternalIterator* meta_iter) {
Status s = meta_iter->status(); Status s = meta_iter->status();

@ -23,6 +23,7 @@
#include "cache/cache_entry_stats.h" #include "cache/cache_entry_stats.h"
#include "db/column_family.h" #include "db/column_family.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/write_stall_stats.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/system_clock.h" #include "rocksdb/system_clock.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
@ -77,6 +78,10 @@ const std::map<InternalStats::InternalDBStatsType, DBStatInfo>
DBStatInfo{"db.user_writes_with_wal"}}, DBStatInfo{"db.user_writes_with_wal"}},
{InternalStats::kIntStatsWriteStallMicros, {InternalStats::kIntStatsWriteStallMicros,
DBStatInfo{"db.user_write_stall_micros"}}, DBStatInfo{"db.user_write_stall_micros"}},
{InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts,
DBStatInfo{WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause::kWriteBufferManagerLimit,
WriteStallCondition::kStopped)}},
}; };
namespace { namespace {
@ -243,7 +248,9 @@ static const std::string cfstats = "cfstats";
static const std::string cfstats_no_file_histogram = static const std::string cfstats_no_file_histogram =
"cfstats-no-file-histogram"; "cfstats-no-file-histogram";
static const std::string cf_file_histogram = "cf-file-histogram"; static const std::string cf_file_histogram = "cf-file-histogram";
static const std::string cf_write_stall_stats = "cf-write-stall-stats";
static const std::string dbstats = "dbstats"; static const std::string dbstats = "dbstats";
static const std::string db_write_stall_stats = "db-write-stall-stats";
static const std::string levelstats = "levelstats"; static const std::string levelstats = "levelstats";
static const std::string block_cache_entry_stats = "block-cache-entry-stats"; static const std::string block_cache_entry_stats = "block-cache-entry-stats";
static const std::string fast_block_cache_entry_stats = static const std::string fast_block_cache_entry_stats =
@ -323,6 +330,10 @@ const std::string DB::Properties::kCFStatsNoFileHistogram =
rocksdb_prefix + cfstats_no_file_histogram; rocksdb_prefix + cfstats_no_file_histogram;
const std::string DB::Properties::kCFFileHistogram = const std::string DB::Properties::kCFFileHistogram =
rocksdb_prefix + cf_file_histogram; rocksdb_prefix + cf_file_histogram;
const std::string DB::Properties::kCFWriteStallStats =
rocksdb_prefix + cf_write_stall_stats;
const std::string DB::Properties::kDBWriteStallStats =
rocksdb_prefix + db_write_stall_stats;
const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats; const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats;
const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats; const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats;
const std::string DB::Properties::kBlockCacheEntryStats = const std::string DB::Properties::kBlockCacheEntryStats =
@ -450,9 +461,15 @@ const UnorderedMap<std::string, DBPropertyInfo>
{DB::Properties::kCFFileHistogram, {DB::Properties::kCFFileHistogram,
{false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr, {false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr,
nullptr}}, nullptr}},
{DB::Properties::kCFWriteStallStats,
{false, &InternalStats::HandleCFWriteStallStats, nullptr,
&InternalStats::HandleCFWriteStallStatsMap, nullptr}},
{DB::Properties::kDBStats, {DB::Properties::kDBStats,
{false, &InternalStats::HandleDBStats, nullptr, {false, &InternalStats::HandleDBStats, nullptr,
&InternalStats::HandleDBMapStats, nullptr}}, &InternalStats::HandleDBMapStats, nullptr}},
{DB::Properties::kDBWriteStallStats,
{false, &InternalStats::HandleDBWriteStallStats, nullptr,
&InternalStats::HandleDBWriteStallStatsMap, nullptr}},
{DB::Properties::kBlockCacheEntryStats, {DB::Properties::kBlockCacheEntryStats,
{true, &InternalStats::HandleBlockCacheEntryStats, nullptr, {true, &InternalStats::HandleBlockCacheEntryStats, nullptr,
&InternalStats::HandleBlockCacheEntryStatsMap, nullptr}}, &InternalStats::HandleBlockCacheEntryStatsMap, nullptr}},
@ -1087,6 +1104,18 @@ bool InternalStats::HandleCFFileHistogram(std::string* value,
return true; return true;
} }
bool InternalStats::HandleCFWriteStallStats(std::string* value,
Slice /*suffix*/) {
DumpCFStatsWriteStall(value);
return true;
}
bool InternalStats::HandleCFWriteStallStatsMap(
std::map<std::string, std::string>* value, Slice /*suffix*/) {
DumpCFMapStatsWriteStall(value);
return true;
}
bool InternalStats::HandleDBMapStats( bool InternalStats::HandleDBMapStats(
std::map<std::string, std::string>* db_stats, Slice /*suffix*/) { std::map<std::string, std::string>* db_stats, Slice /*suffix*/) {
DumpDBMapStats(db_stats); DumpDBMapStats(db_stats);
@ -1098,6 +1127,18 @@ bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) {
return true; return true;
} }
bool InternalStats::HandleDBWriteStallStats(std::string* value,
Slice /*suffix*/) {
DumpDBStatsWriteStall(value);
return true;
}
bool InternalStats::HandleDBWriteStallStatsMap(
std::map<std::string, std::string>* value, Slice /*suffix*/) {
DumpDBMapStatsWriteStall(value);
return true;
}
bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) { bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) {
auto* current = cfd_->current(); auto* current = cfd_->current();
*value = current->DebugString(true, true); *value = current->DebugString(true, true);
@ -1580,6 +1621,10 @@ void InternalStats::DumpDBStats(std::string* value) {
10000.0 / std::max(interval_seconds_up, 0.001)); 10000.0 / std::max(interval_seconds_up, 0.001));
value->append(buf); value->append(buf);
std::string write_stall_stats;
DumpDBStatsWriteStall(&write_stall_stats);
value->append(write_stall_stats);
db_stats_snapshot_.seconds_up = seconds_up; db_stats_snapshot_.seconds_up = seconds_up;
db_stats_snapshot_.ingest_bytes = user_bytes_written; db_stats_snapshot_.ingest_bytes = user_bytes_written;
db_stats_snapshot_.write_other = write_other; db_stats_snapshot_.write_other = write_other;
@ -1591,6 +1636,50 @@ void InternalStats::DumpDBStats(std::string* value) {
db_stats_snapshot_.write_stall_micros = write_stall_micros; db_stats_snapshot_.write_stall_micros = write_stall_micros;
} }
void InternalStats::DumpDBMapStatsWriteStall(
std::map<std::string, std::string>* value) {
constexpr uint32_t max_db_scope_write_stall_cause =
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax);
for (uint32_t i =
max_db_scope_write_stall_cause - kNumDBScopeWriteStallCauses;
i < max_db_scope_write_stall_cause; ++i) {
for (uint32_t j = 0;
j < static_cast<uint32_t>(WriteStallCondition::kNormal); ++j) {
WriteStallCause cause = static_cast<WriteStallCause>(i);
WriteStallCondition condition = static_cast<WriteStallCondition>(j);
InternalStats::InternalDBStatsType internal_db_stat =
InternalDBStat(cause, condition);
if (internal_db_stat == InternalStats::kIntStatsNumMax) {
continue;
}
std::string name =
WriteStallStatsMapKeys::CauseConditionCount(cause, condition);
uint64_t stat =
db_stats_[static_cast<std::size_t>(internal_db_stat)].load(
std::memory_order_relaxed);
(*value)[name] = std::to_string(stat);
}
}
}
void InternalStats::DumpDBStatsWriteStall(std::string* value) {
assert(value);
std::map<std::string, std::string> write_stall_stats_map;
DumpDBMapStatsWriteStall(&write_stall_stats_map);
std::ostringstream str;
str << "Write Stall (count): ";
for (const auto& name_and_stat : write_stall_stats_map) {
str << name_and_stat.first << ": " << name_and_stat.second << ", ";
}
*value = str.str();
}
/** /**
* Dump Compaction Level stats to a map of stat name with "compaction." prefix * Dump Compaction Level stats to a map of stat name with "compaction." prefix
* to value in double as string. The level in stat name is represented with * to value in double as string. The level in stat name is represented with
@ -1617,7 +1706,7 @@ void InternalStats::DumpCFMapStats(
} }
} }
DumpCFMapStatsIOStalls(cf_stats); DumpCFMapStatsWriteStall(cf_stats);
} }
void InternalStats::DumpCFMapStats( void InternalStats::DumpCFMapStats(
@ -1709,36 +1798,81 @@ void InternalStats::DumpCFMapStatsByPriority(
} }
} }
void InternalStats::DumpCFMapStatsIOStalls( void InternalStats::DumpCFMapStatsWriteStall(
std::map<std::string, std::string>* cf_stats) { std::map<std::string, std::string>* value) {
(*cf_stats)["io_stalls.level0_slowdown"] = uint64_t total_delays = 0;
std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS]); uint64_t total_stops = 0;
(*cf_stats)["io_stalls.level0_slowdown_with_compaction"] = constexpr uint32_t max_cf_scope_write_stall_cause =
std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS]); static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax);
(*cf_stats)["io_stalls.level0_numfiles"] =
std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS]); for (uint32_t i =
(*cf_stats)["io_stalls.level0_numfiles_with_compaction"] = max_cf_scope_write_stall_cause - kNumCFScopeWriteStallCauses;
std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS]); i < max_cf_scope_write_stall_cause; ++i) {
(*cf_stats)["io_stalls.stop_for_pending_compaction_bytes"] = for (uint32_t j = 0;
std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS]); j < static_cast<uint32_t>(WriteStallCondition::kNormal); ++j) {
(*cf_stats)["io_stalls.slowdown_for_pending_compaction_bytes"] = WriteStallCause cause = static_cast<WriteStallCause>(i);
std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS]); WriteStallCondition condition = static_cast<WriteStallCondition>(j);
(*cf_stats)["io_stalls.memtable_compaction"] = InternalStats::InternalCFStatsType internal_cf_stat =
std::to_string(cf_stats_count_[MEMTABLE_LIMIT_STOPS]); InternalCFStat(cause, condition);
(*cf_stats)["io_stalls.memtable_slowdown"] =
std::to_string(cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS]); if (internal_cf_stat == InternalStats::INTERNAL_CF_STATS_ENUM_MAX) {
continue;
uint64_t total_stop = cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] + }
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] +
cf_stats_count_[MEMTABLE_LIMIT_STOPS]; std::string name =
WriteStallStatsMapKeys::CauseConditionCount(cause, condition);
uint64_t total_slowdown = uint64_t stat =
cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] + cf_stats_count_[static_cast<std::size_t>(internal_cf_stat)];
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] + (*value)[name] = std::to_string(stat);
cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS];
if (condition == WriteStallCondition::kDelayed) {
(*cf_stats)["io_stalls.total_stop"] = std::to_string(total_stop); total_delays += stat;
(*cf_stats)["io_stalls.total_slowdown"] = std::to_string(total_slowdown); } else if (condition == WriteStallCondition::kStopped) {
total_stops += stat;
}
}
}
(*value)[WriteStallStatsMapKeys::
CFL0FileCountLimitDelaysWithOngoingCompaction()] =
std::to_string(
cf_stats_count_[L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION]);
(*value)[WriteStallStatsMapKeys::
CFL0FileCountLimitStopsWithOngoingCompaction()] =
std::to_string(
cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION]);
(*value)[WriteStallStatsMapKeys::TotalStops()] = std::to_string(total_stops);
(*value)[WriteStallStatsMapKeys::TotalDelays()] =
std::to_string(total_delays);
}
void InternalStats::DumpCFStatsWriteStall(std::string* value,
uint64_t* total_stall_count) {
assert(value);
std::map<std::string, std::string> write_stall_stats_map;
DumpCFMapStatsWriteStall(&write_stall_stats_map);
std::ostringstream str;
str << "Write Stall (count): ";
for (const auto& name_and_stat : write_stall_stats_map) {
str << name_and_stat.first << ": " << name_and_stat.second << ", ";
}
if (total_stall_count) {
*total_stall_count =
ParseUint64(
write_stall_stats_map[WriteStallStatsMapKeys::TotalStops()]) +
ParseUint64(
write_stall_stats_map[WriteStallStatsMapKeys::TotalDelays()]);
if (*total_stall_count > 0) {
str << "interval: " << *total_stall_count - cf_stats_snapshot_.stall_count
<< " total count\n";
}
}
*value = str.str();
} }
void InternalStats::DumpCFStats(std::string* value) { void InternalStats::DumpCFStats(std::string* value) {
@ -1776,14 +1910,6 @@ void InternalStats::DumpCFStatsNoFileHistogram(bool is_periodic,
uint64_t ingest_l0_files_addfile = uint64_t ingest_l0_files_addfile =
cf_stats_value_[INGESTED_LEVEL0_NUM_FILES_TOTAL]; cf_stats_value_[INGESTED_LEVEL0_NUM_FILES_TOTAL];
uint64_t ingest_keys_addfile = cf_stats_value_[INGESTED_NUM_KEYS_TOTAL]; uint64_t ingest_keys_addfile = cf_stats_value_[INGESTED_NUM_KEYS_TOTAL];
// Cumulative summary
uint64_t total_stall_count =
cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] +
cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] +
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] +
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] +
cf_stats_count_[MEMTABLE_LIMIT_STOPS] +
cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS];
// Interval summary // Interval summary
uint64_t interval_flush_ingest = uint64_t interval_flush_ingest =
flush_ingest - cf_stats_snapshot_.ingest_bytes_flush; flush_ingest - cf_stats_snapshot_.ingest_bytes_flush;
@ -1905,34 +2031,10 @@ void InternalStats::DumpCFStatsNoFileHistogram(bool is_periodic,
cf_stats_snapshot_.compact_micros = compact_micros; cf_stats_snapshot_.compact_micros = compact_micros;
} }
snprintf(buf, sizeof(buf), std::string write_stall_stats;
"Stalls(count): %" PRIu64 uint64_t total_stall_count;
" level0_slowdown, " DumpCFStatsWriteStall(&write_stall_stats, &total_stall_count);
"%" PRIu64 value->append(write_stall_stats);
" level0_slowdown_with_compaction, "
"%" PRIu64
" level0_numfiles, "
"%" PRIu64
" level0_numfiles_with_compaction, "
"%" PRIu64
" stop for pending_compaction_bytes, "
"%" PRIu64
" slowdown for pending_compaction_bytes, "
"%" PRIu64
" memtable_compaction, "
"%" PRIu64
" memtable_slowdown, "
"interval %" PRIu64 " total count\n",
cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS],
cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS],
cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS],
cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS],
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS],
cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS],
cf_stats_count_[MEMTABLE_LIMIT_STOPS],
cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS],
total_stall_count - cf_stats_snapshot_.stall_count);
value->append(buf);
if (is_periodic) { if (is_periodic) {
cf_stats_snapshot_.seconds_up = seconds_up; cf_stats_snapshot_.seconds_up = seconds_up;

@ -104,15 +104,20 @@ class InternalStats {
static const std::map<LevelStatType, LevelStat> compaction_level_stats; static const std::map<LevelStatType, LevelStat> compaction_level_stats;
enum InternalCFStatsType { enum InternalCFStatsType {
L0_FILE_COUNT_LIMIT_SLOWDOWNS, MEMTABLE_LIMIT_DELAYS,
LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS,
MEMTABLE_LIMIT_STOPS, MEMTABLE_LIMIT_STOPS,
MEMTABLE_LIMIT_SLOWDOWNS, L0_FILE_COUNT_LIMIT_DELAYS,
L0_FILE_COUNT_LIMIT_STOPS, L0_FILE_COUNT_LIMIT_STOPS,
LOCKED_L0_FILE_COUNT_LIMIT_STOPS, PENDING_COMPACTION_BYTES_LIMIT_DELAYS,
PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS,
PENDING_COMPACTION_BYTES_LIMIT_STOPS, PENDING_COMPACTION_BYTES_LIMIT_STOPS,
// Write slowdown caused by l0 file count limit while there is ongoing L0
// compaction
L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION,
// Write stop caused by l0 file count limit while there is ongoing L0
// compaction
L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION,
WRITE_STALLS_ENUM_MAX, WRITE_STALLS_ENUM_MAX,
// End of all write stall stats
BYTES_FLUSHED, BYTES_FLUSHED,
BYTES_INGESTED_ADD_FILE, BYTES_INGESTED_ADD_FILE,
INGESTED_NUM_FILES_TOTAL, INGESTED_NUM_FILES_TOTAL,
@ -129,7 +134,18 @@ class InternalStats {
kIntStatsWriteDoneByOther, kIntStatsWriteDoneByOther,
kIntStatsWriteDoneBySelf, kIntStatsWriteDoneBySelf,
kIntStatsWriteWithWal, kIntStatsWriteWithWal,
// TODO(hx235): Currently `kIntStatsWriteStallMicros` only measures
// "delayed" time of CF-scope write stalls, not including the "stopped" time
// nor any DB-scope write stalls (e.g, ones triggered by
// `WriteBufferManager`).
//
// However, the word "write stall" includes both "delayed" and "stopped"
// (see `WriteStallCondition`) and DB-scope writes stalls (see
// `WriteStallCause`).
//
// So we should improve, rename or clarify it
kIntStatsWriteStallMicros, kIntStatsWriteStallMicros,
kIntStatsWriteBufferManagerLimitStopsCounts,
kIntStatsNumMax, kIntStatsNumMax,
}; };
@ -599,6 +615,10 @@ class InternalStats {
private: private:
void DumpDBMapStats(std::map<std::string, std::string>* db_stats); void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
void DumpDBStats(std::string* value); void DumpDBStats(std::string* value);
void DumpDBMapStatsWriteStall(std::map<std::string, std::string>* value);
void DumpDBStatsWriteStall(std::string* value);
void DumpCFMapStats(std::map<std::string, std::string>* cf_stats); void DumpCFMapStats(std::map<std::string, std::string>* cf_stats);
void DumpCFMapStats( void DumpCFMapStats(
const VersionStorageInfo* vstorage, const VersionStorageInfo* vstorage,
@ -606,7 +626,6 @@ class InternalStats {
CompactionStats* compaction_stats_sum); CompactionStats* compaction_stats_sum);
void DumpCFMapStatsByPriority( void DumpCFMapStatsByPriority(
std::map<int, std::map<LevelStatType, double>>* priorities_stats); std::map<int, std::map<LevelStatType, double>>* priorities_stats);
void DumpCFMapStatsIOStalls(std::map<std::string, std::string>* cf_stats);
void DumpCFStats(std::string* value); void DumpCFStats(std::string* value);
// if is_periodic = true, it is an internal call by RocksDB periodically to // if is_periodic = true, it is an internal call by RocksDB periodically to
// dump the status. // dump the status.
@ -615,6 +634,10 @@ class InternalStats {
// dump the status. // dump the status.
void DumpCFFileHistogram(std::string* value); void DumpCFFileHistogram(std::string* value);
void DumpCFMapStatsWriteStall(std::map<std::string, std::string>* value);
void DumpCFStatsWriteStall(std::string* value,
uint64_t* total_stall_count = nullptr);
Cache* GetBlockCacheForStats(); Cache* GetBlockCacheForStats();
Cache* GetBlobCacheForStats(); Cache* GetBlobCacheForStats();
@ -648,7 +671,7 @@ class InternalStats {
// ColumnFamily-level stats // ColumnFamily-level stats
CompactionStats comp_stats; CompactionStats comp_stats;
uint64_t ingest_bytes_flush; // Bytes written to L0 (Flush) uint64_t ingest_bytes_flush; // Bytes written to L0 (Flush)
uint64_t stall_count; // Stall count uint64_t stall_count; // Total counts of CF-scope write stalls
// Stats from compaction jobs - bytes written, bytes read, duration. // Stats from compaction jobs - bytes written, bytes read, duration.
uint64_t compact_bytes_write; uint64_t compact_bytes_write;
uint64_t compact_bytes_read; uint64_t compact_bytes_read;
@ -743,9 +766,15 @@ class InternalStats {
bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix); bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix);
bool HandleCFFileHistogram(std::string* value, Slice suffix); bool HandleCFFileHistogram(std::string* value, Slice suffix);
bool HandleCFStatsPeriodic(std::string* value, Slice suffix); bool HandleCFStatsPeriodic(std::string* value, Slice suffix);
bool HandleCFWriteStallStats(std::string* value, Slice suffix);
bool HandleCFWriteStallStatsMap(std::map<std::string, std::string>* values,
Slice suffix);
bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats, bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats,
Slice suffix); Slice suffix);
bool HandleDBStats(std::string* value, Slice suffix); bool HandleDBStats(std::string* value, Slice suffix);
bool HandleDBWriteStallStats(std::string* value, Slice suffix);
bool HandleDBWriteStallStatsMap(std::map<std::string, std::string>* values,
Slice suffix);
bool HandleSsTables(std::string* value, Slice suffix); bool HandleSsTables(std::string* value, Slice suffix);
bool HandleAggregatedTableProperties(std::string* value, Slice suffix); bool HandleAggregatedTableProperties(std::string* value, Slice suffix);
bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix); bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix);

@ -0,0 +1,159 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/write_stall_stats.h"
namespace ROCKSDB_NAMESPACE {
const std::string kInvalidWriteStallCauseHyphenString = "invalid";
const std::array<std::string, static_cast<uint32_t>(WriteStallCause::kNone)>
kWriteStallCauseToHyphenString{{
"memtable-limit",
"l0-file-count-limit",
"pending-compaction-bytes",
// WriteStallCause::kCFScopeWriteStallCauseEnumMax
kInvalidWriteStallCauseHyphenString,
"write-buffer-manager-limit",
// WriteStallCause::kDBScopeWriteStallCauseEnumMax
kInvalidWriteStallCauseHyphenString,
}};
const std::array<std::string,
static_cast<uint32_t>(WriteStallCondition::kNormal)>
kWriteStallConditionToHyphenString{{
"delays",
"stops",
}};
InternalStats::InternalCFStatsType InternalCFStat(
WriteStallCause cause, WriteStallCondition condition) {
switch (cause) {
case WriteStallCause::kMemtableLimit: {
switch (condition) {
case WriteStallCondition::kDelayed:
return InternalStats::MEMTABLE_LIMIT_DELAYS;
case WriteStallCondition::kStopped:
return InternalStats::MEMTABLE_LIMIT_STOPS;
case WriteStallCondition::kNormal:
break;
}
break;
}
case WriteStallCause::kL0FileCountLimit: {
switch (condition) {
case WriteStallCondition::kDelayed:
return InternalStats::L0_FILE_COUNT_LIMIT_DELAYS;
case WriteStallCondition::kStopped:
return InternalStats::L0_FILE_COUNT_LIMIT_STOPS;
case WriteStallCondition::kNormal:
break;
}
break;
}
case WriteStallCause::kPendingCompactionBytes: {
switch (condition) {
case WriteStallCondition::kDelayed:
return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_DELAYS;
case WriteStallCondition::kStopped:
return InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS;
case WriteStallCondition::kNormal:
break;
}
break;
}
default:
break;
}
return InternalStats::INTERNAL_CF_STATS_ENUM_MAX;
}
InternalStats::InternalDBStatsType InternalDBStat(
WriteStallCause cause, WriteStallCondition condition) {
switch (cause) {
case WriteStallCause::kWriteBufferManagerLimit: {
switch (condition) {
case WriteStallCondition::kStopped:
return InternalStats::kIntStatsWriteBufferManagerLimitStopsCounts;
default:
break;
}
break;
}
default:
break;
}
return InternalStats::kIntStatsNumMax;
}
bool isCFScopeWriteStallCause(WriteStallCause cause) {
uint32_t int_cause = static_cast<uint32_t>(cause);
uint32_t lower_bound =
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
kNumCFScopeWriteStallCauses;
uint32_t upper_bound =
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
1;
return lower_bound <= int_cause && int_cause <= upper_bound;
}
bool isDBScopeWriteStallCause(WriteStallCause cause) {
uint32_t int_cause = static_cast<uint32_t>(cause);
uint32_t lower_bound =
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
kNumDBScopeWriteStallCauses;
uint32_t upper_bound =
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
1;
return lower_bound <= int_cause && int_cause <= upper_bound;
}
const std::string& WriteStallStatsMapKeys::TotalStops() {
static const std::string kTotalStops = "total-stops";
return kTotalStops;
}
const std::string& WriteStallStatsMapKeys::TotalDelays() {
static const std::string kTotalDelays = "total-delays";
return kTotalDelays;
}
const std::string&
WriteStallStatsMapKeys::CFL0FileCountLimitDelaysWithOngoingCompaction() {
static const std::string ret =
"cf-l0-file-count-limit-delays-with-ongoing-compaction";
return ret;
}
const std::string&
WriteStallStatsMapKeys::CFL0FileCountLimitStopsWithOngoingCompaction() {
static const std::string ret =
"cf-l0-file-count-limit-stops-with-ongoing-compaction";
return ret;
}
std::string WriteStallStatsMapKeys::CauseConditionCount(
WriteStallCause cause, WriteStallCondition condition) {
std::string cause_condition_count_name;
std::string cause_name;
if (isCFScopeWriteStallCause(cause) || isDBScopeWriteStallCause(cause)) {
cause_name = kWriteStallCauseToHyphenString[static_cast<uint32_t>(cause)];
} else {
assert(false);
return "";
}
const std::string& condition_name =
kWriteStallConditionToHyphenString[static_cast<uint32_t>(condition)];
cause_condition_count_name.reserve(cause_name.size() + 1 +
condition_name.size());
cause_condition_count_name.append(cause_name);
cause_condition_count_name.append("-");
cause_condition_count_name.append(condition_name);
return cause_condition_count_name;
}
} // namespace ROCKSDB_NAMESPACE

@ -0,0 +1,50 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include "db/internal_stats.h"
#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
extern const std::string kInvalidWriteStallCauseHyphenString;
extern const std::array<std::string,
static_cast<uint32_t>(WriteStallCause::kNone)>
kWriteStallCauseToHyphenString;
extern const std::array<std::string,
static_cast<uint32_t>(WriteStallCondition::kNormal)>
kWriteStallConditionToHyphenString;
// REQUIRES:
// cause` is CF-scope `WriteStallCause`, see `WriteStallCause` for more
//
// REQUIRES:
// `condition` != `WriteStallCondition::kNormal`
extern InternalStats::InternalCFStatsType InternalCFStat(
WriteStallCause cause, WriteStallCondition condition);
// REQUIRES:
// cause` is DB-scope `WriteStallCause`, see `WriteStallCause` for more
//
// REQUIRES:
// `condition` != `WriteStallCondition::kNormal`
extern InternalStats::InternalDBStatsType InternalDBStat(
WriteStallCause cause, WriteStallCondition condition);
extern bool isCFScopeWriteStallCause(WriteStallCause cause);
extern bool isDBScopeWriteStallCause(WriteStallCause cause);
constexpr uint32_t kNumCFScopeWriteStallCauses =
static_cast<uint32_t>(WriteStallCause::kCFScopeWriteStallCauseEnumMax) -
static_cast<uint32_t>(WriteStallCause::kMemtableLimit);
constexpr uint32_t kNumDBScopeWriteStallCauses =
static_cast<uint32_t>(WriteStallCause::kDBScopeWriteStallCauseEnumMax) -
static_cast<uint32_t>(WriteStallCause::kWriteBufferManagerLimit);
} // namespace ROCKSDB_NAMESPACE

@ -953,6 +953,18 @@ class DB {
// level, as well as the histogram of latency of single requests. // level, as well as the histogram of latency of single requests.
static const std::string kCFFileHistogram; static const std::string kCFFileHistogram;
// "rocksdb.cf-write-stall-stats" - returns a multi-line string or
// map with statistics on CF-scope write stalls for a given CF
// See`WriteStallStatsMapKeys` for structured representation of keys
// available in the map form.
static const std::string kCFWriteStallStats;
// "rocksdb.db-write-stall-stats" - returns a multi-line string or
// map with statistics on DB-scope write stalls
// See`WriteStallStatsMapKeys` for structured representation of keys
// available in the map form.
static const std::string kDBWriteStallStats;
// "rocksdb.dbstats" - As a string property, returns a multi-line string // "rocksdb.dbstats" - As a string property, returns a multi-line string
// with general database stats, both cumulative (over the db's // with general database stats, both cumulative (over the db's
// lifetime) and interval (since the last retrieval of kDBStats). // lifetime) and interval (since the last retrieval of kDBStats).
@ -1873,6 +1885,24 @@ class DB {
} }
}; };
struct WriteStallStatsMapKeys {
static const std::string& TotalStops();
static const std::string& TotalDelays();
static const std::string& CFL0FileCountLimitDelaysWithOngoingCompaction();
static const std::string& CFL0FileCountLimitStopsWithOngoingCompaction();
// REQUIRES:
// `cause` isn't any of these: `WriteStallCause::kNone`,
// `WriteStallCause::kCFScopeWriteStallCauseEnumMax`,
// `WriteStallCause::kDBScopeWriteStallCauseEnumMax`
//
// REQUIRES:
// `condition` isn't any of these: `WriteStallCondition::kNormal`
static std::string CauseConditionCount(WriteStallCause cause,
WriteStallCondition condition);
};
// Overloaded operators for enum class SizeApproximationFlags. // Overloaded operators for enum class SizeApproximationFlags.
inline DB::SizeApproximationFlags operator&(DB::SizeApproximationFlags lhs, inline DB::SizeApproximationFlags operator&(DB::SizeApproximationFlags lhs,
DB::SizeApproximationFlags rhs) { DB::SizeApproximationFlags rhs) {

@ -194,12 +194,6 @@ enum class BackgroundErrorReason {
kManifestWriteNoWAL, kManifestWriteNoWAL,
}; };
enum class WriteStallCondition {
kNormal,
kDelayed,
kStopped,
};
struct WriteStallInfo { struct WriteStallInfo {
// the name of the column family // the name of the column family
std::string cf_name; std::string cf_name;

@ -63,4 +63,32 @@ enum EntryType {
kEntryOther, kEntryOther,
}; };
enum class WriteStallCause {
// Beginning of CF-scope write stall causes
//
// Always keep `kMemtableLimit` as the first stat in this section
kMemtableLimit,
kL0FileCountLimit,
kPendingCompactionBytes,
kCFScopeWriteStallCauseEnumMax,
// End of CF-scope write stall causes
// Beginning of DB-scope write stall causes
//
// Always keep `kWriteBufferManagerLimit` as the first stat in this section
kWriteBufferManagerLimit,
kDBScopeWriteStallCauseEnumMax,
// End of DB-scope write stall causes
// Always add new WriteStallCause before `kNone`
kNone,
};
enum class WriteStallCondition {
kDelayed,
kStopped,
// Always add new WriteStallCondition before `kNormal`
kNormal,
};
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -6,9 +6,9 @@
package org.rocksdb; package org.rocksdb;
public enum WriteStallCondition { public enum WriteStallCondition {
NORMAL((byte) 0x0), DELAYED((byte) 0x0),
DELAYED((byte) 0x1), STOPPED((byte) 0x1),
STOPPED((byte) 0x2); NORMAL((byte) 0x2);
private final byte value; private final byte value;

@ -266,7 +266,7 @@ public class EventListenerTest {
final FileOperationInfo fileOperationInfoTestData = new FileOperationInfo("/file/path", final FileOperationInfo fileOperationInfoTestData = new FileOperationInfo("/file/path",
TEST_LONG_VAL, TEST_LONG_VAL, 1_600_699_420_000_000_000L, 5_000_000_000L, statusTestData); TEST_LONG_VAL, TEST_LONG_VAL, 1_600_699_420_000_000_000L, 5_000_000_000L, statusTestData);
final WriteStallInfo writeStallInfoTestData = final WriteStallInfo writeStallInfoTestData =
new WriteStallInfo("columnFamilyName", (byte) 0x1, (byte) 0x2); new WriteStallInfo("columnFamilyName", (byte) 0x0, (byte) 0x1);
final ExternalFileIngestionInfo externalFileIngestionInfoTestData = final ExternalFileIngestionInfo externalFileIngestionInfoTestData =
new ExternalFileIngestionInfo("columnFamilyName", "/external/file/path", new ExternalFileIngestionInfo("columnFamilyName", "/external/file/path",
"/internal/file/path", TEST_LONG_VAL, tablePropertiesTestData); "/internal/file/path", TEST_LONG_VAL, tablePropertiesTestData);

@ -97,6 +97,7 @@ LIB_SOURCES = \
db/write_batch.cc \ db/write_batch.cc \
db/write_batch_base.cc \ db/write_batch_base.cc \
db/write_controller.cc \ db/write_controller.cc \
db/write_stall_stats.cc \
db/write_thread.cc \ db/write_thread.cc \
env/composite_env.cc \ env/composite_env.cc \
env/env.cc \ env/env.cc \

Loading…
Cancel
Save