Avoid repeat periodic stats printing when there is no change (#10891)

Summary:
When there is a column family that doesn't get any traffic, its stats are still dumped when options.options.stats_dump_period_sec triggers. This sometimes spam the information logs. With this change, we skip the printing if there is not change, until 8 periods.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10891

Test Plan: Manually test the behavior with hacked db_bench setups.

Reviewed By: jay-zhuang

Differential Revision: D40777183

fbshipit-source-id: ef0b9a793e4f6282df099b464f01d1fb4c5a2cab
main
sdong 2 years ago committed by Facebook GitHub Bot
parent 9079895aae
commit d989300ad1
  1. 2
      HISTORY.md
  2. 13
      db/db_impl/db_impl.cc
  3. 51
      db/internal_stats.cc
  4. 17
      db/internal_stats.h

@ -36,6 +36,8 @@
### Behavior Changes ### Behavior Changes
* Sanitize min_write_buffer_number_to_merge to 1 if atomic flush is enabled to prevent unexpected data loss when WAL is disabled in a multi-column-family setting (#10773). * Sanitize min_write_buffer_number_to_merge to 1 if atomic flush is enabled to prevent unexpected data loss when WAL is disabled in a multi-column-family setting (#10773).
* With periodic stat dumper waits up every options.stats_dump_period_sec seconds, it won't dump stats for a CF if it has no change in the period, unless 7 periods have been skipped.
* Only periodic stats dumper triggered by options.stats_dump_period_sec will update stats interval. Ones triggered by DB::GetProperty() will not update stats interval and will report based on an interval since the last time stats dump period.
### Public API changes ### Public API changes
* Make kXXH3 checksum the new default, because it is faster on common hardware, especially with kCRC32c affected by a performance bug in some versions of clang (https://github.com/facebook/rocksdb/issues/9891). DBs written with this new setting can be read by RocksDB 6.27 and newer. * Make kXXH3 checksum the new default, because it is faster on common hardware, especially with kCRC32c affected by a performance bug in some versions of clang (https://github.com/facebook/rocksdb/issues/9891). DBs written with this new setting can be read by RocksDB 6.27 and newer.

@ -1079,18 +1079,7 @@ void DBImpl::DumpStats() {
default_cf_internal_stats_->GetStringProperty(*property_info, *property, default_cf_internal_stats_->GetStringProperty(*property_info, *property,
&stats); &stats);
property = &DB::Properties::kCFStatsNoFileHistogram; property = &InternalStats::kPeriodicCFStats;
property_info = GetPropertyInfo(*property);
assert(property_info != nullptr);
assert(!property_info->need_out_of_mutex);
for (auto cfd : *versions_->GetColumnFamilySet()) {
if (cfd->initialized()) {
cfd->internal_stats()->GetStringProperty(*property_info, *property,
&stats);
}
}
property = &DB::Properties::kCFFileHistogram;
property_info = GetPropertyInfo(*property); property_info = GetPropertyInfo(*property);
assert(property_info != nullptr); assert(property_info != nullptr);
assert(!property_info->need_out_of_mutex); assert(!property_info->need_out_of_mutex);

@ -423,6 +423,10 @@ const std::string DB::Properties::kBlobCacheUsage =
const std::string DB::Properties::kBlobCachePinnedUsage = const std::string DB::Properties::kBlobCachePinnedUsage =
rocksdb_prefix + blob_cache_pinned_usage; rocksdb_prefix + blob_cache_pinned_usage;
const std::string InternalStats::kPeriodicCFStats =
DB::Properties::kCFStats + ".periodic";
const int InternalStats::kMaxNoChangePeriodSinceDump = 8;
const UnorderedMap<std::string, DBPropertyInfo> const UnorderedMap<std::string, DBPropertyInfo>
InternalStats::ppt_name_to_info = { InternalStats::ppt_name_to_info = {
{DB::Properties::kNumFilesAtLevelPrefix, {DB::Properties::kNumFilesAtLevelPrefix,
@ -438,6 +442,9 @@ const UnorderedMap<std::string, DBPropertyInfo>
{DB::Properties::kCFStats, {DB::Properties::kCFStats,
{false, &InternalStats::HandleCFStats, nullptr, {false, &InternalStats::HandleCFStats, nullptr,
&InternalStats::HandleCFMapStats, nullptr}}, &InternalStats::HandleCFMapStats, nullptr}},
{InternalStats::kPeriodicCFStats,
{false, &InternalStats::HandleCFStatsPeriodic, nullptr, nullptr,
nullptr}},
{DB::Properties::kCFStatsNoFileHistogram, {DB::Properties::kCFStatsNoFileHistogram,
{false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr, {false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr,
nullptr}}, nullptr}},
@ -605,6 +612,7 @@ InternalStats::InternalStats(int num_levels, SystemClock* clock,
comp_stats_(num_levels), comp_stats_(num_levels),
comp_stats_by_pri_(Env::Priority::TOTAL), comp_stats_by_pri_(Env::Priority::TOTAL),
file_read_latency_(num_levels), file_read_latency_(num_levels),
has_cf_change_since_dump_(true),
bg_error_count_(0), bg_error_count_(0),
number_levels_(num_levels), number_levels_(num_levels),
clock_(clock), clock_(clock),
@ -1041,9 +1049,41 @@ bool InternalStats::HandleCFStats(std::string* value, Slice /*suffix*/) {
return true; return true;
} }
bool InternalStats::HandleCFStatsPeriodic(std::string* value,
Slice /*suffix*/) {
bool has_change = has_cf_change_since_dump_;
if (!has_change) {
// If file histogram changes, there is activity in this period too.
uint64_t new_histogram_num = 0;
for (int level = 0; level < number_levels_; level++) {
new_histogram_num += file_read_latency_[level].num();
}
new_histogram_num += blob_file_read_latency_.num();
if (new_histogram_num != last_histogram_num) {
has_change = true;
last_histogram_num = new_histogram_num;
}
}
if (has_change) {
no_cf_change_period_since_dump_ = 0;
has_cf_change_since_dump_ = false;
} else if (no_cf_change_period_since_dump_++ > 0) {
// Not ready to sync
if (no_cf_change_period_since_dump_ == kMaxNoChangePeriodSinceDump) {
// Next periodic, we need to dump stats even if there is no change.
no_cf_change_period_since_dump_ = 0;
}
return true;
}
DumpCFStatsNoFileHistogram(/*is_periodic=*/true, value);
DumpCFFileHistogram(value);
return true;
}
bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value, bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value,
Slice /*suffix*/) { Slice /*suffix*/) {
DumpCFStatsNoFileHistogram(value); DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value);
return true; return true;
} }
@ -1708,11 +1748,12 @@ void InternalStats::DumpCFMapStatsIOStalls(
} }
void InternalStats::DumpCFStats(std::string* value) { void InternalStats::DumpCFStats(std::string* value) {
DumpCFStatsNoFileHistogram(value); DumpCFStatsNoFileHistogram(/*is_periodic=*/false, value);
DumpCFFileHistogram(value); DumpCFFileHistogram(value);
} }
void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { void InternalStats::DumpCFStatsNoFileHistogram(bool is_periodic,
std::string* value) {
char buf[2000]; char buf[2000];
// Per-ColumnFamily stats // Per-ColumnFamily stats
PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level"); PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level");
@ -1864,9 +1905,11 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001), interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001),
interval_compact_micros / kMicrosInSec); interval_compact_micros / kMicrosInSec);
value->append(buf); value->append(buf);
if (is_periodic) {
cf_stats_snapshot_.compact_bytes_write = compact_bytes_write; cf_stats_snapshot_.compact_bytes_write = compact_bytes_write;
cf_stats_snapshot_.compact_bytes_read = compact_bytes_read; cf_stats_snapshot_.compact_bytes_read = compact_bytes_read;
cf_stats_snapshot_.compact_micros = compact_micros; cf_stats_snapshot_.compact_micros = compact_micros;
}
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
"Stalls(count): %" PRIu64 "Stalls(count): %" PRIu64
@ -1897,6 +1940,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
total_stall_count - cf_stats_snapshot_.stall_count); total_stall_count - cf_stats_snapshot_.stall_count);
value->append(buf); value->append(buf);
if (is_periodic) {
cf_stats_snapshot_.seconds_up = seconds_up; cf_stats_snapshot_.seconds_up = seconds_up;
cf_stats_snapshot_.ingest_bytes_flush = flush_ingest; cf_stats_snapshot_.ingest_bytes_flush = flush_ingest;
cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest; cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest;
@ -1905,6 +1949,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile; cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile;
cf_stats_snapshot_.comp_stats = compaction_stats_sum; cf_stats_snapshot_.comp_stats = compaction_stats_sum;
cf_stats_snapshot_.stall_count = total_stall_count; cf_stats_snapshot_.stall_count = total_stall_count;
}
// Do not gather cache entry stats during CFStats because DB // Do not gather cache entry stats during CFStats because DB
// mutex is held. Only dump last cached collection (rely on DB // mutex is held. Only dump last cached collection (rely on DB

@ -506,6 +506,7 @@ class InternalStats {
db_stats_snapshot_.Clear(); db_stats_snapshot_.Clear();
bg_error_count_ = 0; bg_error_count_ = 0;
started_at_ = clock_->NowMicros(); started_at_ = clock_->NowMicros();
has_cf_change_since_dump_ = true;
} }
void AddCompactionStats(int level, Env::Priority thread_pri, void AddCompactionStats(int level, Env::Priority thread_pri,
@ -528,6 +529,7 @@ class InternalStats {
} }
void AddCFStats(InternalCFStatsType type, uint64_t value) { void AddCFStats(InternalCFStatsType type, uint64_t value) {
has_cf_change_since_dump_ = true;
cf_stats_value_[type] += value; cf_stats_value_[type] += value;
++cf_stats_count_[type]; ++cf_stats_count_[type];
} }
@ -593,6 +595,8 @@ class InternalStats {
// DBPropertyInfo struct used internally for retrieving properties. // DBPropertyInfo struct used internally for retrieving properties.
static const UnorderedMap<std::string, DBPropertyInfo> ppt_name_to_info; static const UnorderedMap<std::string, DBPropertyInfo> ppt_name_to_info;
static const std::string kPeriodicCFStats;
private: private:
void DumpDBMapStats(std::map<std::string, std::string>* db_stats); void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
void DumpDBStats(std::string* value); void DumpDBStats(std::string* value);
@ -605,7 +609,11 @@ class InternalStats {
std::map<int, std::map<LevelStatType, double>>* priorities_stats); std::map<int, std::map<LevelStatType, double>>* priorities_stats);
void DumpCFMapStatsIOStalls(std::map<std::string, std::string>* cf_stats); void DumpCFMapStatsIOStalls(std::map<std::string, std::string>* cf_stats);
void DumpCFStats(std::string* value); void DumpCFStats(std::string* value);
void DumpCFStatsNoFileHistogram(std::string* value); // if is_periodic = true, it is an internal call by RocksDB periodically to
// dump the status.
void DumpCFStatsNoFileHistogram(bool is_periodic, std::string* value);
// if is_periodic = true, it is an internal call by RocksDB periodically to
// dump the status.
void DumpCFFileHistogram(std::string* value); void DumpCFFileHistogram(std::string* value);
Cache* GetBlockCacheForStats(); Cache* GetBlockCacheForStats();
@ -629,6 +637,12 @@ class InternalStats {
CompactionStats per_key_placement_comp_stats_; CompactionStats per_key_placement_comp_stats_;
std::vector<HistogramImpl> file_read_latency_; std::vector<HistogramImpl> file_read_latency_;
HistogramImpl blob_file_read_latency_; HistogramImpl blob_file_read_latency_;
bool has_cf_change_since_dump_;
// How many periods of no change since the last time stats are dumped for
// a periodic dump.
int no_cf_change_period_since_dump_ = 0;
uint64_t last_histogram_num = std::numeric_limits<uint64_t>::max();
static const int kMaxNoChangePeriodSinceDump;
// Used to compute per-interval statistics // Used to compute per-interval statistics
struct CFStatsSnapshot { struct CFStatsSnapshot {
@ -729,6 +743,7 @@ class InternalStats {
bool HandleCFStats(std::string* value, Slice suffix); bool HandleCFStats(std::string* value, Slice suffix);
bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix); bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix);
bool HandleCFFileHistogram(std::string* value, Slice suffix); bool HandleCFFileHistogram(std::string* value, Slice suffix);
bool HandleCFStatsPeriodic(std::string* value, Slice suffix);
bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats, bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats,
Slice suffix); Slice suffix);
bool HandleDBStats(std::string* value, Slice suffix); bool HandleDBStats(std::string* value, Slice suffix);

Loading…
Cancel
Save