From d989300ad158c0797a1c201bee964f180d0599ad Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 31 Oct 2022 09:51:38 -0700 Subject: [PATCH] Avoid repeat periodic stats printing when there is no change (#10891) Summary: When there is a column family that doesn't get any traffic, its stats are still dumped when options.options.stats_dump_period_sec triggers. This sometimes spam the information logs. With this change, we skip the printing if there is not change, until 8 periods. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10891 Test Plan: Manually test the behavior with hacked db_bench setups. Reviewed By: jay-zhuang Differential Revision: D40777183 fbshipit-source-id: ef0b9a793e4f6282df099b464f01d1fb4c5a2cab --- HISTORY.md | 2 ++ db/db_impl/db_impl.cc | 13 +------- db/internal_stats.cc | 73 ++++++++++++++++++++++++++++++++++--------- db/internal_stats.h | 17 +++++++++- 4 files changed, 78 insertions(+), 27 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 308954c2f..cd3953249 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -36,6 +36,8 @@ ### Behavior Changes * Sanitize min_write_buffer_number_to_merge to 1 if atomic flush is enabled to prevent unexpected data loss when WAL is disabled in a multi-column-family setting (#10773). +* With periodic stat dumper waits up every options.stats_dump_period_sec seconds, it won't dump stats for a CF if it has no change in the period, unless 7 periods have been skipped. +* Only periodic stats dumper triggered by options.stats_dump_period_sec will update stats interval. Ones triggered by DB::GetProperty() will not update stats interval and will report based on an interval since the last time stats dump period. ### Public API changes * Make kXXH3 checksum the new default, because it is faster on common hardware, especially with kCRC32c affected by a performance bug in some versions of clang (https://github.com/facebook/rocksdb/issues/9891). DBs written with this new setting can be read by RocksDB 6.27 and newer. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 380aabb6e..6fe5bf01b 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1079,18 +1079,7 @@ void DBImpl::DumpStats() { default_cf_internal_stats_->GetStringProperty(*property_info, *property, &stats); - property = &DB::Properties::kCFStatsNoFileHistogram; - property_info = GetPropertyInfo(*property); - assert(property_info != nullptr); - assert(!property_info->need_out_of_mutex); - for (auto cfd : *versions_->GetColumnFamilySet()) { - if (cfd->initialized()) { - cfd->internal_stats()->GetStringProperty(*property_info, *property, - &stats); - } - } - - property = &DB::Properties::kCFFileHistogram; + property = &InternalStats::kPeriodicCFStats; property_info = GetPropertyInfo(*property); assert(property_info != nullptr); assert(!property_info->need_out_of_mutex); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index dfe7e6e70..ac5b81f3e 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -423,6 +423,10 @@ const std::string DB::Properties::kBlobCacheUsage = const std::string DB::Properties::kBlobCachePinnedUsage = rocksdb_prefix + blob_cache_pinned_usage; +const std::string InternalStats::kPeriodicCFStats = + DB::Properties::kCFStats + ".periodic"; +const int InternalStats::kMaxNoChangePeriodSinceDump = 8; + const UnorderedMap InternalStats::ppt_name_to_info = { {DB::Properties::kNumFilesAtLevelPrefix, @@ -438,6 +442,9 @@ const UnorderedMap {DB::Properties::kCFStats, {false, &InternalStats::HandleCFStats, nullptr, &InternalStats::HandleCFMapStats, nullptr}}, + {InternalStats::kPeriodicCFStats, + {false, &InternalStats::HandleCFStatsPeriodic, nullptr, nullptr, + nullptr}}, {DB::Properties::kCFStatsNoFileHistogram, {false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr, nullptr}}, @@ -605,6 +612,7 @@ InternalStats::InternalStats(int num_levels, SystemClock* clock, comp_stats_(num_levels), comp_stats_by_pri_(Env::Priority::TOTAL), file_read_latency_(num_levels), + has_cf_change_since_dump_(true), bg_error_count_(0), number_levels_(num_levels), clock_(clock), @@ -1041,9 +1049,41 @@ bool InternalStats::HandleCFStats(std::string* value, Slice /*suffix*/) { return true; } +bool InternalStats::HandleCFStatsPeriodic(std::string* value, + Slice /*suffix*/) { + bool has_change = has_cf_change_since_dump_; + if (!has_change) { + // If file histogram changes, there is activity in this period too. + uint64_t new_histogram_num = 0; + for (int level = 0; level < number_levels_; level++) { + new_histogram_num += file_read_latency_[level].num(); + } + new_histogram_num += blob_file_read_latency_.num(); + if (new_histogram_num != last_histogram_num) { + has_change = true; + last_histogram_num = new_histogram_num; + } + } + if (has_change) { + no_cf_change_period_since_dump_ = 0; + has_cf_change_since_dump_ = false; + } else if (no_cf_change_period_since_dump_++ > 0) { + // Not ready to sync + if (no_cf_change_period_since_dump_ == kMaxNoChangePeriodSinceDump) { + // Next periodic, we need to dump stats even if there is no change. + no_cf_change_period_since_dump_ = 0; + } + return true; + } + + DumpCFStatsNoFileHistogram(/*is_periodic=*/true, value); + DumpCFFileHistogram(value); + return true; +} + bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value, Slice /*suffix*/) { - DumpCFStatsNoFileHistogram(value); + DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value); return true; } @@ -1708,11 +1748,12 @@ void InternalStats::DumpCFMapStatsIOStalls( } void InternalStats::DumpCFStats(std::string* value) { - DumpCFStatsNoFileHistogram(value); + DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value); DumpCFFileHistogram(value); } -void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { +void InternalStats::DumpCFStatsNoFileHistogram(bool is_periodic, + std::string* value) { char buf[2000]; // Per-ColumnFamily stats PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level"); @@ -1864,9 +1905,11 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001), interval_compact_micros / kMicrosInSec); value->append(buf); - cf_stats_snapshot_.compact_bytes_write = compact_bytes_write; - cf_stats_snapshot_.compact_bytes_read = compact_bytes_read; - cf_stats_snapshot_.compact_micros = compact_micros; + if (is_periodic) { + cf_stats_snapshot_.compact_bytes_write = compact_bytes_write; + cf_stats_snapshot_.compact_bytes_read = compact_bytes_read; + cf_stats_snapshot_.compact_micros = compact_micros; + } snprintf(buf, sizeof(buf), "Stalls(count): %" PRIu64 @@ -1897,14 +1940,16 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { total_stall_count - cf_stats_snapshot_.stall_count); value->append(buf); - cf_stats_snapshot_.seconds_up = seconds_up; - cf_stats_snapshot_.ingest_bytes_flush = flush_ingest; - cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest; - cf_stats_snapshot_.ingest_files_addfile = ingest_files_addfile; - cf_stats_snapshot_.ingest_l0_files_addfile = ingest_l0_files_addfile; - cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile; - cf_stats_snapshot_.comp_stats = compaction_stats_sum; - cf_stats_snapshot_.stall_count = total_stall_count; + if (is_periodic) { + cf_stats_snapshot_.seconds_up = seconds_up; + cf_stats_snapshot_.ingest_bytes_flush = flush_ingest; + cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest; + cf_stats_snapshot_.ingest_files_addfile = ingest_files_addfile; + cf_stats_snapshot_.ingest_l0_files_addfile = ingest_l0_files_addfile; + cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile; + cf_stats_snapshot_.comp_stats = compaction_stats_sum; + cf_stats_snapshot_.stall_count = total_stall_count; + } // Do not gather cache entry stats during CFStats because DB // mutex is held. Only dump last cached collection (rely on DB diff --git a/db/internal_stats.h b/db/internal_stats.h index 386eef42d..cb29e82e9 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -506,6 +506,7 @@ class InternalStats { db_stats_snapshot_.Clear(); bg_error_count_ = 0; started_at_ = clock_->NowMicros(); + has_cf_change_since_dump_ = true; } void AddCompactionStats(int level, Env::Priority thread_pri, @@ -528,6 +529,7 @@ class InternalStats { } void AddCFStats(InternalCFStatsType type, uint64_t value) { + has_cf_change_since_dump_ = true; cf_stats_value_[type] += value; ++cf_stats_count_[type]; } @@ -593,6 +595,8 @@ class InternalStats { // DBPropertyInfo struct used internally for retrieving properties. static const UnorderedMap ppt_name_to_info; + static const std::string kPeriodicCFStats; + private: void DumpDBMapStats(std::map* db_stats); void DumpDBStats(std::string* value); @@ -605,7 +609,11 @@ class InternalStats { std::map>* priorities_stats); void DumpCFMapStatsIOStalls(std::map* cf_stats); void DumpCFStats(std::string* value); - void DumpCFStatsNoFileHistogram(std::string* value); + // if is_periodic = true, it is an internal call by RocksDB periodically to + // dump the status. + void DumpCFStatsNoFileHistogram(bool is_periodic, std::string* value); + // if is_periodic = true, it is an internal call by RocksDB periodically to + // dump the status. void DumpCFFileHistogram(std::string* value); Cache* GetBlockCacheForStats(); @@ -629,6 +637,12 @@ class InternalStats { CompactionStats per_key_placement_comp_stats_; std::vector file_read_latency_; HistogramImpl blob_file_read_latency_; + bool has_cf_change_since_dump_; + // How many periods of no change since the last time stats are dumped for + // a periodic dump. + int no_cf_change_period_since_dump_ = 0; + uint64_t last_histogram_num = std::numeric_limits::max(); + static const int kMaxNoChangePeriodSinceDump; // Used to compute per-interval statistics struct CFStatsSnapshot { @@ -729,6 +743,7 @@ class InternalStats { bool HandleCFStats(std::string* value, Slice suffix); bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix); bool HandleCFFileHistogram(std::string* value, Slice suffix); + bool HandleCFStatsPeriodic(std::string* value, Slice suffix); bool HandleDBMapStats(std::map* compaction_stats, Slice suffix); bool HandleDBStats(std::string* value, Slice suffix);