From 5e298f865be4df87b0065c0b3e19c565b5564abc Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Thu, 28 Feb 2019 10:14:19 -0800 Subject: [PATCH] Add two more StatsLevel (#5027) Summary: Statistics cost too much CPU for some use cases. Add two stats levels so that people can choose to skip two types of expensive stats, timers and histograms. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5027 Differential Revision: D14252765 Pulled By: siying fbshipit-source-id: 75ecec9eaa44c06118229df4f80c366115346592 --- HISTORY.md | 3 +++ db/compaction_job.cc | 18 +++++++++--------- db/compaction_picker_universal.cc | 4 ++-- db/db_impl.cc | 4 ++-- db/db_impl_compaction_flush.cc | 4 ++-- db/db_impl_readonly.cc | 2 +- db/db_impl_write.cc | 6 +++--- db/flush_job.cc | 2 +- db/merge_helper.cc | 4 ++-- include/rocksdb/statistics.h | 30 +++++++++++++++++++++++++++--- monitoring/statistics.cc | 7 +++++-- monitoring/statistics.h | 16 ++++++++++++---- table/block_based_table_builder.cc | 8 ++++---- table/format.cc | 7 ++++--- tools/db_bench_tool.cc | 5 +++++ util/stop_watch.h | 11 +++++++---- utilities/blob_db/blob_db_impl.cc | 4 ++-- 17 files changed, 91 insertions(+), 44 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 14502d37a..ed37c3355 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,7 @@ # Rocksdb Change Log +## Unreleased +### New Features +* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers. ## 6.0.0 (2/19/2019) ### New Features diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 72380f1eb..2ecb960fd 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -414,11 +414,10 @@ void CompactionJob::Prepare() { bottommost_level_ = c->bottommost_level(); if (c->ShouldFormSubcompactions()) { - const uint64_t start_micros = env_->NowMicros(); - GenSubcompactionBoundaries(); - MeasureTime(stats_, SUBCOMPACTION_SETUP_TIME, - env_->NowMicros() - start_micros); - + { + StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME); + GenSubcompactionBoundaries(); + } assert(sizes_.size() == boundaries_.size() + 1); for (size_t i = 0; i <= boundaries_.size(); i++) { @@ -426,8 +425,8 @@ void CompactionJob::Prepare() { Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i]; compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]); } - MeasureTime(stats_, NUM_SUBCOMPACTIONS_SCHEDULED, - compact_->sub_compact_states.size()); + RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED, + compact_->sub_compact_states.size()); } else { compact_->sub_compact_states.emplace_back(c, nullptr, nullptr); } @@ -607,8 +606,9 @@ Status CompactionJob::Run() { compact_->sub_compact_states[i].compaction_job_stats.cpu_micros; } - MeasureTime(stats_, COMPACTION_TIME, compaction_stats_.micros); - MeasureTime(stats_, COMPACTION_CPU_TIME, compaction_stats_.cpu_micros); + RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros); + RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME, + compaction_stats_.cpu_micros); TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify"); diff --git a/db/compaction_picker_universal.cc b/db/compaction_picker_universal.cc index c0cf2b062..929117858 100644 --- a/db/compaction_picker_universal.cc +++ b/db/compaction_picker_universal.cc @@ -395,8 +395,8 @@ Compaction* UniversalCompactionPicker::PickCompaction( } #endif // update statistics - MeasureTime(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION, - c->inputs(0)->size()); + RecordInHistogram(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION, + c->inputs(0)->size()); RegisterCompaction(c); vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options); diff --git a/db/db_impl.cc b/db/db_impl.cc index 821abba0e..558b76082 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1447,7 +1447,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, RecordTick(stats_, BYTES_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); } - MeasureTime(stats_, BYTES_PER_READ, size); + RecordInHistogram(stats_, BYTES_PER_READ, size); } return s; } @@ -1632,7 +1632,7 @@ std::vector DBImpl::MultiGet( RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); - MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read); + RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read); PERF_COUNTER_ADD(multiget_read_bytes, bytes_read); PERF_TIMER_STOP(get_post_process_time); diff --git a/db/db_impl_compaction_flush.cc b/db/db_impl_compaction_flush.cc index 050b3f106..2298237a8 100644 --- a/db/db_impl_compaction_flush.cc +++ b/db/db_impl_compaction_flush.cc @@ -2418,8 +2418,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, status = Status::CompactionTooLarge(); } else { // update statistics - MeasureTime(stats_, NUM_FILES_IN_SINGLE_COMPACTION, - c->inputs(0)->size()); + RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, + c->inputs(0)->size()); // There are three things that can change compaction score: // 1) When flush or compaction finish. This case is covered by // InstallSuperVersionAndScheduleWork diff --git a/db/db_impl_readonly.cc b/db/db_impl_readonly.cc index bd7099f00..5d7515c28 100644 --- a/db/db_impl_readonly.cc +++ b/db/db_impl_readonly.cc @@ -60,7 +60,7 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options, RecordTick(stats_, NUMBER_KEYS_READ); size_t size = pinnable_val->size(); RecordTick(stats_, BYTES_READ, size); - MeasureTime(stats_, BYTES_PER_READ, size); + RecordInHistogram(stats_, BYTES_PER_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); return s; } diff --git a/db/db_impl_write.cc b/db/db_impl_write.cc index 40bd6e26c..9f84fe905 100644 --- a/db/db_impl_write.cc +++ b/db/db_impl_write.cc @@ -279,7 +279,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); } - MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); + RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); if (write_options.disableWAL) { has_unpersisted_data_.store(true, std::memory_order_relaxed); @@ -471,7 +471,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size); RecordTick(stats_, BYTES_WRITTEN, total_byte_size); - MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); + RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); PERF_TIMER_STOP(write_pre_and_post_process_time); @@ -602,7 +602,7 @@ Status DBImpl::WriteImplWALOnly(const WriteOptions& write_options, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); } - MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); + RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); PERF_TIMER_STOP(write_pre_and_post_process_time); diff --git a/db/flush_job.cc b/db/flush_job.cc index 17c9b0df6..413fa1c99 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -415,7 +415,7 @@ Status FlushJob::WriteLevel0Table() { stats.micros = db_options_.env->NowMicros() - start_micros; stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros; stats.bytes_written = meta_.fd.GetFileSize(); - MeasureTime(stats_, FLUSH_TIME, stats.micros); + RecordTimeToHistogram(stats_, FLUSH_TIME, stats.micros); cfd_->internal_stats()->AddCompactionStats(0 /* level */, stats); cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED, meta_.fd.GetFileSize()); diff --git a/db/merge_helper.cc b/db/merge_helper.cc index b03e9dafe..4a4d2fb71 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -64,8 +64,8 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator, } if (update_num_ops_stats) { - MeasureTime(statistics, READ_NUM_MERGE_OPERANDS, - static_cast(operands.size())); + RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS, + static_cast(operands.size())); } bool success; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 9119d8c93..254da1dc4 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -22,7 +22,8 @@ namespace rocksdb { * 1. Any ticker should be added before TICKER_ENUM_MAX. * 2. Add a readable string in TickersNameMap below for the newly added ticker. * 3. Add a corresponding enum value to TickerType.java in the java API - * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType and toCppTickers + * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType + * and toCppTickers */ enum Tickers : uint32_t { // total block cache misses @@ -443,7 +444,11 @@ struct HistogramData { double min = 0.0; }; -enum StatsLevel { +enum StatsLevel : uint8_t { + // Disable timer stats, and skip histogram stats + kExceptHistogramOrTimers, + // Skip timer stats + kExceptTimers, // Collect all stats except time inside mutex lock AND time spent on // compression. kExceptDetailedTimers, @@ -468,7 +473,26 @@ class Statistics { virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0; virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0; virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0; - virtual void measureTime(uint32_t histogramType, uint64_t time) = 0; + virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) { + if (stats_level_ <= StatsLevel::kExceptTimers) { + return; + } + recordInHistogram(histogramType, time); + } + // The function is here only for backward compatibility reason. + // Users implementing their own Statistics class should override + // recordInHistogram() instead and leave measureTime() as it is. + virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) { + // This is not supposed to be called. + assert(false); + } + virtual void recordInHistogram(uint32_t histogramType, uint64_t time) { + // measureTime() is the old and inaccurate function name. + // To keep backward compatible. If users implement their own + // statistics, which overrides meareTime() but doesn't override + // this function. We forward to measureTime(). + measureTime(histogramType, time); + } // Resets all ticker and histogram stats virtual Status Reset() { diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 0501ee207..9c8c6ee8c 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -322,11 +322,14 @@ void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) { } } -void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) { +void StatisticsImpl::recordInHistogram(uint32_t histogramType, uint64_t value) { assert(histogramType < HISTOGRAM_ENUM_MAX); + if (stats_level_ <= StatsLevel::kExceptHistogramOrTimers) { + return; + } per_core_stats_.Access()->histograms_[histogramType].Add(value); if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) { - stats_->measureTime(histogramType, value); + stats_->recordInHistogram(histogramType, value); } } diff --git a/monitoring/statistics.h b/monitoring/statistics.h index 55dd1fad7..fac01205f 100644 --- a/monitoring/statistics.h +++ b/monitoring/statistics.h @@ -53,7 +53,8 @@ class StatisticsImpl : public Statistics { virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override; virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override; virtual void recordTick(uint32_t ticker_type, uint64_t count) override; - virtual void measureTime(uint32_t histogram_type, uint64_t value) override; + virtual void recordInHistogram(uint32_t histogram_type, + uint64_t value) override; virtual Status Reset() override; virtual std::string ToString() const override; @@ -99,10 +100,17 @@ class StatisticsImpl : public Statistics { }; // Utility functions -inline void MeasureTime(Statistics* statistics, uint32_t histogram_type, - uint64_t value) { +inline void RecordInHistogram(Statistics* statistics, uint32_t histogram_type, + uint64_t value) { if (statistics) { - statistics->measureTime(histogram_type, value); + statistics->recordInHistogram(histogram_type, value); + } +} + +inline void RecordTimeToHistogram(Statistics* statistics, + uint32_t histogram_type, uint64_t value) { + if (statistics) { + statistics->reportTimeToHistogram(histogram_type, value); } } diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 5ac013462..1b6829623 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -634,11 +634,11 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, block_contents = raw_block_contents; } else if (type != kNoCompression) { if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) { - MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS, - timer.ElapsedNanos()); + RecordTimeToHistogram(r->ioptions.statistics, COMPRESSION_TIMES_NANOS, + timer.ElapsedNanos()); } - MeasureTime(r->ioptions.statistics, BYTES_COMPRESSED, - raw_block_contents.size()); + RecordInHistogram(r->ioptions.statistics, BYTES_COMPRESSED, + raw_block_contents.size()); RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED); } else if (type != r->compression_type) { RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED); diff --git a/table/format.cc b/table/format.cc index d37b54912..e0fbc3363 100644 --- a/table/format.cc +++ b/table/format.cc @@ -379,10 +379,11 @@ Status UncompressBlockContentsForCompressionType( } if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){ - MeasureTime(ioptions.statistics, DECOMPRESSION_TIMES_NANOS, - timer.ElapsedNanos()); + RecordTimeToHistogram(ioptions.statistics, DECOMPRESSION_TIMES_NANOS, + timer.ElapsedNanos()); } - MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size()); + RecordTimeToHistogram(ioptions.statistics, BYTES_DECOMPRESSED, + contents->data.size()); RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED); return Status::OK(); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index ec9cdcab4..16ce30736 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -560,6 +560,8 @@ DEFINE_bool(verify_checksum, true, " from storage"); DEFINE_bool(statistics, false, "Database statistics"); +DEFINE_int32(stats_level, rocksdb::StatsLevel::kExceptDetailedTimers, + "stats level for statistics"); DEFINE_string(statistics_string, "", "Serialized statistics string"); static class std::shared_ptr dbstats; @@ -6079,6 +6081,9 @@ int db_bench_tool(int argc, char** argv) { if (FLAGS_statistics) { dbstats = rocksdb::CreateDBStatistics(); } + if (dbstats) { + dbstats->stats_level_ = static_cast(FLAGS_stats_level); + } FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri; std::vector fanout = rocksdb::StringSplit( diff --git a/util/stop_watch.h b/util/stop_watch.h index b018eb1d6..fe353b526 100644 --- a/util/stop_watch.h +++ b/util/stop_watch.h @@ -22,7 +22,9 @@ class StopWatch { hist_type_(hist_type), elapsed_(elapsed), overwrite_(overwrite), - stats_enabled_(statistics && statistics->HistEnabledForType(hist_type)), + stats_enabled_(statistics && + statistics->stats_level_ >= StatsLevel::kExceptTimers && + statistics->HistEnabledForType(hist_type)), delay_enabled_(delay_enabled), total_delay_(0), delay_start_time_(0), @@ -41,9 +43,10 @@ class StopWatch { *elapsed_ -= total_delay_; } if (stats_enabled_) { - statistics_->measureTime(hist_type_, - (elapsed_ != nullptr) ? *elapsed_ : - (env_->NowMicros() - start_time_)); + statistics_->reportTimeToHistogram( + hist_type_, (elapsed_ != nullptr) + ? *elapsed_ + : (env_->NowMicros() - start_time_)); } } diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 7c938c16e..880e222ba 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -743,8 +743,8 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/, RecordTick(statistics_, BLOB_DB_NUM_KEYS_WRITTEN); RecordTick(statistics_, BLOB_DB_BYTES_WRITTEN, key.size() + value.size()); - MeasureTime(statistics_, BLOB_DB_KEY_SIZE, key.size()); - MeasureTime(statistics_, BLOB_DB_VALUE_SIZE, value.size()); + RecordInHistogram(statistics_, BLOB_DB_KEY_SIZE, key.size()); + RecordInHistogram(statistics_, BLOB_DB_VALUE_SIZE, value.size()); return s; }