From 18afff2e631698c312daa3c6c44bb540542fe3ab Mon Sep 17 00:00:00 2001 From: Jim Paton Date: Mon, 29 Jul 2013 10:34:23 -0700 Subject: [PATCH] Add stall counts to statistics Summary: Previously, statistics are kept on how much time is spent on stalls of different types. This patch adds support for keeping number of stalls of each type. For example, instead of just reporting how many microseconds are spent waiting for memtables to be compacted, it will also report how many times a write stalled for that to occur. Test Plan: make -j32 check ./db_stress # Not really sure what else should be done... Reviewers: dhruba, MarkCallaghan, haobo Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D11841 --- db/db_impl.cc | 53 ++++++++++++++++++++++++++++++------ db/db_impl.h | 4 +++ include/leveldb/statistics.h | 15 ++++++++-- 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 0587774fb..4ba270e91 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -179,6 +179,9 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) stall_level0_slowdown_(0), stall_memtable_compaction_(0), stall_level0_num_files_(0), + stall_level0_slowdown_count_(0), + stall_memtable_compaction_count_(0), + stall_level0_num_files_count_(0), started_at_(options.env->NowMicros()), flush_on_destroy_(false), stats_(options.num_levels), @@ -193,8 +196,11 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) env_->GetAbsolutePath(dbname, &db_absolute_path_); stall_leveln_slowdown_.resize(options.num_levels); - for (int i = 0; i < options.num_levels; ++i) + stall_leveln_slowdown_count_.resize(options.num_levels); + for (int i = 0; i < options.num_levels; ++i) { stall_leveln_slowdown_[i] = 0; + stall_leveln_slowdown_count_[i] = 0; + } // Reserve ten files or so for other uses and give the rest to TableCache. const int table_cache_size = options_.max_open_files - 10; @@ -2437,10 +2443,14 @@ Status DBImpl::MakeRoomForWrite(bool force) { // case it is sharing the same core as the writer. mutex_.Unlock(); uint64_t t1 = env_->NowMicros(); - env_->SleepForMicroseconds(1000); + { + StopWatch sw(env_, options_.statistics, STALL_L0_SLOWDOWN_COUNT); + env_->SleepForMicroseconds(1000); + } uint64_t delayed = env_->NowMicros() - t1; RecordTick(options_.statistics, STALL_L0_SLOWDOWN_MICROS, delayed); stall_level0_slowdown_ += delayed; + stall_level0_slowdown_count_++; allow_delay = false; // Do not delay a single write more than once //Log(options_.info_log, // "delaying write %llu usecs for level0_slowdown_writes_trigger\n", @@ -2460,20 +2470,29 @@ Status DBImpl::MakeRoomForWrite(bool force) { DelayLoggingAndReset(); Log(options_.info_log, "wait for memtable compaction...\n"); uint64_t t1 = env_->NowMicros(); - bg_cv_.Wait(); + { + StopWatch sw(env_, options_.statistics, + STALL_MEMTABLE_COMPACTION_COUNT); + bg_cv_.Wait(); + } const uint64_t stall = env_->NowMicros() -t1; RecordTick(options_.statistics, STALL_MEMTABLE_COMPACTION_MICROS, stall); stall_memtable_compaction_ += stall; + stall_memtable_compaction_count_++; } else if (versions_->NumLevelFiles(0) >= options_.level0_stop_writes_trigger) { // There are too many level-0 files. DelayLoggingAndReset(); uint64_t t1 = env_->NowMicros(); Log(options_.info_log, "wait for fewer level0 files...\n"); - bg_cv_.Wait(); + { + StopWatch sw(env_, options_.statistics, STALL_L0_NUM_FILES_COUNT); + bg_cv_.Wait(); + } const uint64_t stall = env_->NowMicros() - t1; RecordTick(options_.statistics, STALL_L0_NUM_FILES_MICROS, stall); stall_level0_num_files_ += stall; + stall_level0_num_files_count_++; } else if ( allow_rate_limit_delay && options_.rate_limit > 1.0 && @@ -2482,9 +2501,13 @@ Status DBImpl::MakeRoomForWrite(bool force) { int max_level = versions_->MaxCompactionScoreLevel(); mutex_.Unlock(); uint64_t t1 = env_->NowMicros(); - env_->SleepForMicroseconds(1000); + { + StopWatch sw(env_, options_.statistics, RATE_LIMIT_DELAY_COUNT); + env_->SleepForMicroseconds(1000); + } uint64_t delayed = env_->NowMicros() - t1; stall_leveln_slowdown_[max_level] += delayed; + stall_leveln_slowdown_count_[max_level]++; // Make sure the following value doesn't round to zero. uint64_t rate_limit = std::max((delayed / 1000), (uint64_t) 1); rate_limit_delay_millis += rate_limit; @@ -2579,6 +2602,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { // Add "+1" to make sure seconds_up is > 0 and avoid NaN later double seconds_up = (micros_up + 1) / 1000000.0; uint64_t total_slowdown = 0; + uint64_t total_slowdown_count = 0; uint64_t interval_bytes_written = 0; uint64_t interval_bytes_read = 0; uint64_t interval_bytes_new = 0; @@ -2587,8 +2611,8 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { // Pardon the long line but I think it is easier to read this way. snprintf(buf, sizeof(buf), " Compactions\n" - "Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) Rnp1(MB) Wnew(MB) Amplify Read(MB/s) Write(MB/s) Rn Rnp1 Wnp1 NewW Count Ln-stall\n" - "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" + "Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) Rnp1(MB) Wnew(MB) Amplify Read(MB/s) Write(MB/s) Rn Rnp1 Wnp1 NewW Count Ln-stall Stall-cnt\n" + "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" ); value->append(buf); for (int level = 0; level < NumberLevels(); level++) { @@ -2608,7 +2632,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { snprintf( buf, sizeof(buf), - "%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %7.1f %9.1f %11.1f %8d %8d %8d %8d %8d %9.1f\n", + "%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %7.1f %9.1f %11.1f %8d %8d %8d %8d %8d %9.1f %9lu\n", level, files, versions_->NumLevelBytes(level) / 1048576.0, @@ -2630,8 +2654,10 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { stats_[level].files_out_levelnp1, stats_[level].files_out_levelnp1 - stats_[level].files_in_levelnp1, stats_[level].count, - stall_leveln_slowdown_[level] / 1000000.0); + stall_leveln_slowdown_[level] / 1000000.0, + (unsigned long) stall_leveln_slowdown_count_[level]); total_slowdown += stall_leveln_slowdown_[level]; + total_slowdown_count += stall_leveln_slowdown_count_[level]; value->append(buf); } } @@ -2707,6 +2733,15 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { total_slowdown / 1000000.0); value->append(buf); + snprintf(buf, sizeof(buf), + "Stalls(count): %lu level0_slowdown, %lu level0_numfiles, " + "%lu memtable_compaction, %lu leveln_slowdown\n", + (unsigned long) stall_level0_slowdown_count_, + (unsigned long) stall_level0_num_files_count_, + (unsigned long) stall_memtable_compaction_count_, + (unsigned long) total_slowdown_count); + value->append(buf); + last_stats_.bytes_read_ = total_bytes_read; last_stats_.bytes_written_ = total_bytes_written; last_stats_.bytes_new_ = stats_[0].bytes_written; diff --git a/db/db_impl.h b/db/db_impl.h index b7f48dcac..fa20fe2b1 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -313,6 +313,10 @@ class DBImpl : public DB { uint64_t stall_memtable_compaction_; uint64_t stall_level0_num_files_; std::vector stall_leveln_slowdown_; + uint64_t stall_level0_slowdown_count_; + uint64_t stall_memtable_compaction_count_; + uint64_t stall_level0_num_files_count_; + std::vector stall_leveln_slowdown_count_; // Time at which this instance was started. const uint64_t started_at_; diff --git a/include/leveldb/statistics.h b/include/leveldb/statistics.h index 4ce4f6d1b..6bd4a05fa 100644 --- a/include/leveldb/statistics.h +++ b/include/leveldb/statistics.h @@ -80,7 +80,7 @@ const std::vector> TickersNameMap = { { STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros" }, { STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros" }, { STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros" }, - { RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.dleay.millis" }, + { RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis" }, { NO_ITERATORS, "rocksdb.num.iterators" }, { NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" }, { NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" }, @@ -109,7 +109,12 @@ enum Histograms { READ_BLOCK_COMPACTION_MICROS = 9, READ_BLOCK_GET_MICROS = 10, WRITE_RAW_BLOCK_MICROS = 11, - HISTOGRAM_ENUM_MAX = 12 + + STALL_L0_SLOWDOWN_COUNT = 12, + STALL_MEMTABLE_COMPACTION_COUNT = 13, + STALL_L0_NUM_FILES_COUNT = 14, + RATE_LIMIT_DELAY_COUNT = 15, + HISTOGRAM_ENUM_MAX = 16 }; const std::vector> HistogramsNameMap = { @@ -124,7 +129,11 @@ const std::vector> HistogramsNameMap = { { DB_MULTIGET, "rocksdb.db.multiget.micros" }, { READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros" }, { READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros" }, - { WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros" } + { WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros" }, + { STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, + { STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, + { STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, + { RATE_LIMIT_DELAY_COUNT, "rocksdb.rate.limit.delay.count"} }; struct HistogramData {