Add two more StatsLevel (#5027)

Summary:
Statistics cost too much CPU for some use cases. Add two stats levels
so that people can choose to skip two types of expensive stats, timers and
histograms.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5027

Differential Revision: D14252765

Pulled By: siying

fbshipit-source-id: 75ecec9eaa44c06118229df4f80c366115346592
main
Siying Dong 6 years ago committed by Facebook Github Bot
parent a661c0d208
commit 5e298f865b
  1. 3
      HISTORY.md
  2. 18
      db/compaction_job.cc
  3. 4
      db/compaction_picker_universal.cc
  4. 4
      db/db_impl.cc
  5. 4
      db/db_impl_compaction_flush.cc
  6. 2
      db/db_impl_readonly.cc
  7. 6
      db/db_impl_write.cc
  8. 2
      db/flush_job.cc
  9. 4
      db/merge_helper.cc
  10. 30
      include/rocksdb/statistics.h
  11. 7
      monitoring/statistics.cc
  12. 16
      monitoring/statistics.h
  13. 8
      table/block_based_table_builder.cc
  14. 7
      table/format.cc
  15. 5
      tools/db_bench_tool.cc
  16. 11
      util/stop_watch.h
  17. 4
      utilities/blob_db/blob_db_impl.cc

@ -1,4 +1,7 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased
### New Features
* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers.
## 6.0.0 (2/19/2019) ## 6.0.0 (2/19/2019)
### New Features ### New Features

@ -414,11 +414,10 @@ void CompactionJob::Prepare() {
bottommost_level_ = c->bottommost_level(); bottommost_level_ = c->bottommost_level();
if (c->ShouldFormSubcompactions()) { if (c->ShouldFormSubcompactions()) {
const uint64_t start_micros = env_->NowMicros(); {
GenSubcompactionBoundaries(); StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME);
MeasureTime(stats_, SUBCOMPACTION_SETUP_TIME, GenSubcompactionBoundaries();
env_->NowMicros() - start_micros); }
assert(sizes_.size() == boundaries_.size() + 1); assert(sizes_.size() == boundaries_.size() + 1);
for (size_t i = 0; i <= boundaries_.size(); i++) { for (size_t i = 0; i <= boundaries_.size(); i++) {
@ -426,8 +425,8 @@ void CompactionJob::Prepare() {
Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i]; Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i];
compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]); compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]);
} }
MeasureTime(stats_, NUM_SUBCOMPACTIONS_SCHEDULED, RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
compact_->sub_compact_states.size()); compact_->sub_compact_states.size());
} else { } else {
compact_->sub_compact_states.emplace_back(c, nullptr, nullptr); compact_->sub_compact_states.emplace_back(c, nullptr, nullptr);
} }
@ -607,8 +606,9 @@ Status CompactionJob::Run() {
compact_->sub_compact_states[i].compaction_job_stats.cpu_micros; compact_->sub_compact_states[i].compaction_job_stats.cpu_micros;
} }
MeasureTime(stats_, COMPACTION_TIME, compaction_stats_.micros); RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros);
MeasureTime(stats_, COMPACTION_CPU_TIME, compaction_stats_.cpu_micros); RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
compaction_stats_.cpu_micros);
TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify"); TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify");

@ -395,8 +395,8 @@ Compaction* UniversalCompactionPicker::PickCompaction(
} }
#endif #endif
// update statistics // update statistics
MeasureTime(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION, RecordInHistogram(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION,
c->inputs(0)->size()); c->inputs(0)->size());
RegisterCompaction(c); RegisterCompaction(c);
vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options); vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options);

@ -1447,7 +1447,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
RecordTick(stats_, BYTES_READ, size); RecordTick(stats_, BYTES_READ, size);
PERF_COUNTER_ADD(get_read_bytes, size); PERF_COUNTER_ADD(get_read_bytes, size);
} }
MeasureTime(stats_, BYTES_PER_READ, size); RecordInHistogram(stats_, BYTES_PER_READ, size);
} }
return s; return s;
} }
@ -1632,7 +1632,7 @@ std::vector<Status> DBImpl::MultiGet(
RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found); RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found);
RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read); RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read);
PERF_COUNTER_ADD(multiget_read_bytes, bytes_read); PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
PERF_TIMER_STOP(get_post_process_time); PERF_TIMER_STOP(get_post_process_time);

@ -2418,8 +2418,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
status = Status::CompactionTooLarge(); status = Status::CompactionTooLarge();
} else { } else {
// update statistics // update statistics
MeasureTime(stats_, NUM_FILES_IN_SINGLE_COMPACTION, RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION,
c->inputs(0)->size()); c->inputs(0)->size());
// There are three things that can change compaction score: // There are three things that can change compaction score:
// 1) When flush or compaction finish. This case is covered by // 1) When flush or compaction finish. This case is covered by
// InstallSuperVersionAndScheduleWork // InstallSuperVersionAndScheduleWork

@ -60,7 +60,7 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options,
RecordTick(stats_, NUMBER_KEYS_READ); RecordTick(stats_, NUMBER_KEYS_READ);
size_t size = pinnable_val->size(); size_t size = pinnable_val->size();
RecordTick(stats_, BYTES_READ, size); RecordTick(stats_, BYTES_READ, size);
MeasureTime(stats_, BYTES_PER_READ, size); RecordInHistogram(stats_, BYTES_PER_READ, size);
PERF_COUNTER_ADD(get_read_bytes, size); PERF_COUNTER_ADD(get_read_bytes, size);
return s; return s;
} }

@ -279,7 +279,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
concurrent_update); concurrent_update);
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
} }
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
if (write_options.disableWAL) { if (write_options.disableWAL) {
has_unpersisted_data_.store(true, std::memory_order_relaxed); has_unpersisted_data_.store(true, std::memory_order_relaxed);
@ -471,7 +471,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size); stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size);
RecordTick(stats_, BYTES_WRITTEN, total_byte_size); RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_STOP(write_pre_and_post_process_time);
@ -602,7 +602,7 @@ Status DBImpl::WriteImplWALOnly(const WriteOptions& write_options,
concurrent_update); concurrent_update);
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
} }
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size); RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_STOP(write_pre_and_post_process_time);

@ -415,7 +415,7 @@ Status FlushJob::WriteLevel0Table() {
stats.micros = db_options_.env->NowMicros() - start_micros; stats.micros = db_options_.env->NowMicros() - start_micros;
stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros; stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros;
stats.bytes_written = meta_.fd.GetFileSize(); stats.bytes_written = meta_.fd.GetFileSize();
MeasureTime(stats_, FLUSH_TIME, stats.micros); RecordTimeToHistogram(stats_, FLUSH_TIME, stats.micros);
cfd_->internal_stats()->AddCompactionStats(0 /* level */, stats); cfd_->internal_stats()->AddCompactionStats(0 /* level */, stats);
cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED, cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED,
meta_.fd.GetFileSize()); meta_.fd.GetFileSize());

@ -64,8 +64,8 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
} }
if (update_num_ops_stats) { if (update_num_ops_stats) {
MeasureTime(statistics, READ_NUM_MERGE_OPERANDS, RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS,
static_cast<uint64_t>(operands.size())); static_cast<uint64_t>(operands.size()));
} }
bool success; bool success;

@ -22,7 +22,8 @@ namespace rocksdb {
* 1. Any ticker should be added before TICKER_ENUM_MAX. * 1. Any ticker should be added before TICKER_ENUM_MAX.
* 2. Add a readable string in TickersNameMap below for the newly added ticker. * 2. Add a readable string in TickersNameMap below for the newly added ticker.
* 3. Add a corresponding enum value to TickerType.java in the java API * 3. Add a corresponding enum value to TickerType.java in the java API
* 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType and toCppTickers * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType
* and toCppTickers
*/ */
enum Tickers : uint32_t { enum Tickers : uint32_t {
// total block cache misses // total block cache misses
@ -443,7 +444,11 @@ struct HistogramData {
double min = 0.0; double min = 0.0;
}; };
enum StatsLevel { enum StatsLevel : uint8_t {
// Disable timer stats, and skip histogram stats
kExceptHistogramOrTimers,
// Skip timer stats
kExceptTimers,
// Collect all stats except time inside mutex lock AND time spent on // Collect all stats except time inside mutex lock AND time spent on
// compression. // compression.
kExceptDetailedTimers, kExceptDetailedTimers,
@ -468,7 +473,26 @@ class Statistics {
virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0; virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0; virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0; virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
virtual void measureTime(uint32_t histogramType, uint64_t time) = 0; virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
if (stats_level_ <= StatsLevel::kExceptTimers) {
return;
}
recordInHistogram(histogramType, time);
}
// The function is here only for backward compatibility reason.
// Users implementing their own Statistics class should override
// recordInHistogram() instead and leave measureTime() as it is.
virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) {
// This is not supposed to be called.
assert(false);
}
virtual void recordInHistogram(uint32_t histogramType, uint64_t time) {
// measureTime() is the old and inaccurate function name.
// To keep backward compatible. If users implement their own
// statistics, which overrides meareTime() but doesn't override
// this function. We forward to measureTime().
measureTime(histogramType, time);
}
// Resets all ticker and histogram stats // Resets all ticker and histogram stats
virtual Status Reset() { virtual Status Reset() {

@ -322,11 +322,14 @@ void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) {
} }
} }
void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) { void StatisticsImpl::recordInHistogram(uint32_t histogramType, uint64_t value) {
assert(histogramType < HISTOGRAM_ENUM_MAX); assert(histogramType < HISTOGRAM_ENUM_MAX);
if (stats_level_ <= StatsLevel::kExceptHistogramOrTimers) {
return;
}
per_core_stats_.Access()->histograms_[histogramType].Add(value); per_core_stats_.Access()->histograms_[histogramType].Add(value);
if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) { if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) {
stats_->measureTime(histogramType, value); stats_->recordInHistogram(histogramType, value);
} }
} }

@ -53,7 +53,8 @@ class StatisticsImpl : public Statistics {
virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override; virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override;
virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override; virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override;
virtual void recordTick(uint32_t ticker_type, uint64_t count) override; virtual void recordTick(uint32_t ticker_type, uint64_t count) override;
virtual void measureTime(uint32_t histogram_type, uint64_t value) override; virtual void recordInHistogram(uint32_t histogram_type,
uint64_t value) override;
virtual Status Reset() override; virtual Status Reset() override;
virtual std::string ToString() const override; virtual std::string ToString() const override;
@ -99,10 +100,17 @@ class StatisticsImpl : public Statistics {
}; };
// Utility functions // Utility functions
inline void MeasureTime(Statistics* statistics, uint32_t histogram_type, inline void RecordInHistogram(Statistics* statistics, uint32_t histogram_type,
uint64_t value) { uint64_t value) {
if (statistics) { if (statistics) {
statistics->measureTime(histogram_type, value); statistics->recordInHistogram(histogram_type, value);
}
}
inline void RecordTimeToHistogram(Statistics* statistics,
uint32_t histogram_type, uint64_t value) {
if (statistics) {
statistics->reportTimeToHistogram(histogram_type, value);
} }
} }

@ -634,11 +634,11 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
block_contents = raw_block_contents; block_contents = raw_block_contents;
} else if (type != kNoCompression) { } else if (type != kNoCompression) {
if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) { if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) {
MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS, RecordTimeToHistogram(r->ioptions.statistics, COMPRESSION_TIMES_NANOS,
timer.ElapsedNanos()); timer.ElapsedNanos());
} }
MeasureTime(r->ioptions.statistics, BYTES_COMPRESSED, RecordInHistogram(r->ioptions.statistics, BYTES_COMPRESSED,
raw_block_contents.size()); raw_block_contents.size());
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED); RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED);
} else if (type != r->compression_type) { } else if (type != r->compression_type) {
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED); RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED);

@ -379,10 +379,11 @@ Status UncompressBlockContentsForCompressionType(
} }
if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){ if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){
MeasureTime(ioptions.statistics, DECOMPRESSION_TIMES_NANOS, RecordTimeToHistogram(ioptions.statistics, DECOMPRESSION_TIMES_NANOS,
timer.ElapsedNanos()); timer.ElapsedNanos());
} }
MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size()); RecordTimeToHistogram(ioptions.statistics, BYTES_DECOMPRESSED,
contents->data.size());
RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED); RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED);
return Status::OK(); return Status::OK();

@ -560,6 +560,8 @@ DEFINE_bool(verify_checksum, true,
" from storage"); " from storage");
DEFINE_bool(statistics, false, "Database statistics"); DEFINE_bool(statistics, false, "Database statistics");
DEFINE_int32(stats_level, rocksdb::StatsLevel::kExceptDetailedTimers,
"stats level for statistics");
DEFINE_string(statistics_string, "", "Serialized statistics string"); DEFINE_string(statistics_string, "", "Serialized statistics string");
static class std::shared_ptr<rocksdb::Statistics> dbstats; static class std::shared_ptr<rocksdb::Statistics> dbstats;
@ -6079,6 +6081,9 @@ int db_bench_tool(int argc, char** argv) {
if (FLAGS_statistics) { if (FLAGS_statistics) {
dbstats = rocksdb::CreateDBStatistics(); dbstats = rocksdb::CreateDBStatistics();
} }
if (dbstats) {
dbstats->stats_level_ = static_cast<StatsLevel>(FLAGS_stats_level);
}
FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri; FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri;
std::vector<std::string> fanout = rocksdb::StringSplit( std::vector<std::string> fanout = rocksdb::StringSplit(

@ -22,7 +22,9 @@ class StopWatch {
hist_type_(hist_type), hist_type_(hist_type),
elapsed_(elapsed), elapsed_(elapsed),
overwrite_(overwrite), overwrite_(overwrite),
stats_enabled_(statistics && statistics->HistEnabledForType(hist_type)), stats_enabled_(statistics &&
statistics->stats_level_ >= StatsLevel::kExceptTimers &&
statistics->HistEnabledForType(hist_type)),
delay_enabled_(delay_enabled), delay_enabled_(delay_enabled),
total_delay_(0), total_delay_(0),
delay_start_time_(0), delay_start_time_(0),
@ -41,9 +43,10 @@ class StopWatch {
*elapsed_ -= total_delay_; *elapsed_ -= total_delay_;
} }
if (stats_enabled_) { if (stats_enabled_) {
statistics_->measureTime(hist_type_, statistics_->reportTimeToHistogram(
(elapsed_ != nullptr) ? *elapsed_ : hist_type_, (elapsed_ != nullptr)
(env_->NowMicros() - start_time_)); ? *elapsed_
: (env_->NowMicros() - start_time_));
} }
} }

@ -743,8 +743,8 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
RecordTick(statistics_, BLOB_DB_NUM_KEYS_WRITTEN); RecordTick(statistics_, BLOB_DB_NUM_KEYS_WRITTEN);
RecordTick(statistics_, BLOB_DB_BYTES_WRITTEN, key.size() + value.size()); RecordTick(statistics_, BLOB_DB_BYTES_WRITTEN, key.size() + value.size());
MeasureTime(statistics_, BLOB_DB_KEY_SIZE, key.size()); RecordInHistogram(statistics_, BLOB_DB_KEY_SIZE, key.size());
MeasureTime(statistics_, BLOB_DB_VALUE_SIZE, value.size()); RecordInHistogram(statistics_, BLOB_DB_VALUE_SIZE, value.size());
return s; return s;
} }

Loading…
Cancel
Save