|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
#include "util/statistics.h"
|
|
|
|
|
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include "rocksdb/statistics.h"
|
|
|
|
#include "port/likely.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cstdio>
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
std::shared_ptr<Statistics> CreateDBStatistics() {
|
|
|
|
return std::make_shared<StatisticsImpl>(nullptr, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
StatisticsImpl::StatisticsImpl(
|
|
|
|
std::shared_ptr<Statistics> stats,
|
|
|
|
bool enable_internal_stats)
|
|
|
|
: stats_shared_(stats),
|
|
|
|
stats_(stats.get()),
|
|
|
|
enable_internal_stats_(enable_internal_stats) {
|
|
|
|
}
|
|
|
|
|
|
|
|
StatisticsImpl::~StatisticsImpl() {}
|
|
|
|
|
|
|
|
uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const {
|
|
|
|
MutexLock lock(&aggregate_lock_);
|
|
|
|
assert(
|
|
|
|
enable_internal_stats_ ?
|
|
|
|
tickerType < INTERNAL_TICKER_ENUM_MAX :
|
|
|
|
tickerType < TICKER_ENUM_MAX);
|
|
|
|
uint64_t thread_local_sum = 0;
|
|
|
|
tickers_[tickerType].thread_value->Fold(
|
|
|
|
[](void* curr_ptr, void* res) {
|
|
|
|
auto* sum_ptr = static_cast<uint64_t*>(res);
|
|
|
|
*sum_ptr += static_cast<std::atomic_uint_fast64_t*>(curr_ptr)->load(
|
|
|
|
std::memory_order_relaxed);
|
|
|
|
},
|
|
|
|
&thread_local_sum);
|
|
|
|
return thread_local_sum +
|
|
|
|
tickers_[tickerType].merged_sum.load(std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
|
Thread-specific histogram statistics
Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,
- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones
Test Plan:
unit tests, ran db_bench and verified histograms look similar
before:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 7.63% db_bench db_bench [.] rocksdb::HistogramStat::Add
after:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 0.98% db_bench db_bench [.] rocksdb::HistogramStat::Add
Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: andrewkr, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D62649
8 years ago
|
|
|
std::unique_ptr<HistogramImpl>
|
|
|
|
StatisticsImpl::HistogramInfo::getMergedHistogram() const {
|
|
|
|
MutexLock lock(&merge_lock);
|
|
|
|
std::unique_ptr<HistogramImpl> res_hist(new HistogramImpl());
|
|
|
|
res_hist->Merge(merged_hist);
|
|
|
|
thread_value->Fold(
|
|
|
|
[](void* curr_ptr, void* res) {
|
|
|
|
auto tmp_res_hist = static_cast<HistogramImpl*>(res);
|
|
|
|
auto curr_hist = static_cast<HistogramImpl*>(curr_ptr);
|
|
|
|
tmp_res_hist->Merge(*curr_hist);
|
|
|
|
},
|
|
|
|
res_hist.get());
|
|
|
|
return res_hist;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StatisticsImpl::histogramData(uint32_t histogramType,
|
|
|
|
HistogramData* const data) const {
|
|
|
|
assert(
|
|
|
|
enable_internal_stats_ ?
|
|
|
|
histogramType < INTERNAL_HISTOGRAM_ENUM_MAX :
|
|
|
|
histogramType < HISTOGRAM_ENUM_MAX);
|
Thread-specific histogram statistics
Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,
- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones
Test Plan:
unit tests, ran db_bench and verified histograms look similar
before:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 7.63% db_bench db_bench [.] rocksdb::HistogramStat::Add
after:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 0.98% db_bench db_bench [.] rocksdb::HistogramStat::Add
Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: andrewkr, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D62649
8 years ago
|
|
|
histograms_[histogramType].getMergedHistogram()->Data(data);
|
|
|
|
}
|
|
|
|
|
Add Statistics.getHistogramString() to print more detailed outputs of a histogram
Summary:
Provide a way for users to know more detailed ditribution of a histogram metrics. Example outputs:
Manually add statement
fprintf(stdout, "%s\n", dbstats->getHistogramString(SST_READ_MICROS).c_str());
Will print out something like:
Count: 989151 Average: 1.7659 StdDev: 1.52
Min: 0.0000 Median: 1.2071 Max: 860.0000
Percentiles: P50: 1.21 P75: 1.70 P99: 5.12 P99.9: 13.67 P99.99: 21.70
------------------------------------------------------
[ 0, 1 ) 390839 39.513% 39.513% ########
[ 1, 2 ) 500918 50.641% 90.154% ##########
[ 2, 3 ) 79358 8.023% 98.177% ##
[ 3, 4 ) 6297 0.637% 98.813%
[ 4, 5 ) 1712 0.173% 98.986%
[ 5, 6 ) 1134 0.115% 99.101%
[ 6, 7 ) 1222 0.124% 99.224%
[ 7, 8 ) 1529 0.155% 99.379%
[ 8, 9 ) 1264 0.128% 99.507%
[ 9, 10 ) 988 0.100% 99.607%
[ 10, 12 ) 1378 0.139% 99.746%
[ 12, 14 ) 1828 0.185% 99.931%
[ 14, 16 ) 410 0.041% 99.972%
[ 16, 18 ) 72 0.007% 99.980%
[ 18, 20 ) 67 0.007% 99.986%
[ 20, 25 ) 106 0.011% 99.997%
[ 25, 30 ) 24 0.002% 99.999%
[ 30, 35 ) 1 0.000% 100.000%
[ 250, 300 ) 2 0.000% 100.000%
[ 300, 350 ) 1 0.000% 100.000%
[ 800, 900 ) 1 0.000% 100.000%
Test Plan: Manually add a print in db_bench and make sure it prints out as expected. Will add some codes to cover the function
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43611
10 years ago
|
|
|
std::string StatisticsImpl::getHistogramString(uint32_t histogramType) const {
|
|
|
|
assert(enable_internal_stats_ ? histogramType < INTERNAL_HISTOGRAM_ENUM_MAX
|
|
|
|
: histogramType < HISTOGRAM_ENUM_MAX);
|
Thread-specific histogram statistics
Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,
- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones
Test Plan:
unit tests, ran db_bench and verified histograms look similar
before:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 7.63% db_bench db_bench [.] rocksdb::HistogramStat::Add
after:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 0.98% db_bench db_bench [.] rocksdb::HistogramStat::Add
Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: andrewkr, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D62649
8 years ago
|
|
|
return histograms_[histogramType].getMergedHistogram()->ToString();
|
Add Statistics.getHistogramString() to print more detailed outputs of a histogram
Summary:
Provide a way for users to know more detailed ditribution of a histogram metrics. Example outputs:
Manually add statement
fprintf(stdout, "%s\n", dbstats->getHistogramString(SST_READ_MICROS).c_str());
Will print out something like:
Count: 989151 Average: 1.7659 StdDev: 1.52
Min: 0.0000 Median: 1.2071 Max: 860.0000
Percentiles: P50: 1.21 P75: 1.70 P99: 5.12 P99.9: 13.67 P99.99: 21.70
------------------------------------------------------
[ 0, 1 ) 390839 39.513% 39.513% ########
[ 1, 2 ) 500918 50.641% 90.154% ##########
[ 2, 3 ) 79358 8.023% 98.177% ##
[ 3, 4 ) 6297 0.637% 98.813%
[ 4, 5 ) 1712 0.173% 98.986%
[ 5, 6 ) 1134 0.115% 99.101%
[ 6, 7 ) 1222 0.124% 99.224%
[ 7, 8 ) 1529 0.155% 99.379%
[ 8, 9 ) 1264 0.128% 99.507%
[ 9, 10 ) 988 0.100% 99.607%
[ 10, 12 ) 1378 0.139% 99.746%
[ 12, 14 ) 1828 0.185% 99.931%
[ 14, 16 ) 410 0.041% 99.972%
[ 16, 18 ) 72 0.007% 99.980%
[ 18, 20 ) 67 0.007% 99.986%
[ 20, 25 ) 106 0.011% 99.997%
[ 25, 30 ) 24 0.002% 99.999%
[ 30, 35 ) 1 0.000% 100.000%
[ 250, 300 ) 2 0.000% 100.000%
[ 300, 350 ) 1 0.000% 100.000%
[ 800, 900 ) 1 0.000% 100.000%
Test Plan: Manually add a print in db_bench and make sure it prints out as expected. Will add some codes to cover the function
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43611
10 years ago
|
|
|
}
|
|
|
|
|
|
|
|
StatisticsImpl::ThreadTickerInfo* StatisticsImpl::getThreadTickerInfo(
|
|
|
|
uint32_t tickerType) {
|
|
|
|
auto info_ptr =
|
|
|
|
static_cast<ThreadTickerInfo*>(tickers_[tickerType].thread_value->Get());
|
|
|
|
if (info_ptr == nullptr) {
|
|
|
|
info_ptr =
|
|
|
|
new ThreadTickerInfo(0 /* value */, &tickers_[tickerType].merged_sum);
|
|
|
|
tickers_[tickerType].thread_value->Reset(info_ptr);
|
|
|
|
}
|
|
|
|
return info_ptr;
|
|
|
|
}
|
|
|
|
|
Thread-specific histogram statistics
Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,
- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones
Test Plan:
unit tests, ran db_bench and verified histograms look similar
before:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 7.63% db_bench db_bench [.] rocksdb::HistogramStat::Add
after:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 0.98% db_bench db_bench [.] rocksdb::HistogramStat::Add
Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: andrewkr, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D62649
8 years ago
|
|
|
StatisticsImpl::ThreadHistogramInfo* StatisticsImpl::getThreadHistogramInfo(
|
|
|
|
uint32_t histogram_type) {
|
|
|
|
auto info_ptr = static_cast<ThreadHistogramInfo*>(
|
|
|
|
histograms_[histogram_type].thread_value->Get());
|
|
|
|
if (info_ptr == nullptr) {
|
|
|
|
info_ptr = new ThreadHistogramInfo(&histograms_[histogram_type].merged_hist,
|
|
|
|
&histograms_[histogram_type].merge_lock);
|
|
|
|
histograms_[histogram_type].thread_value->Reset(info_ptr);
|
|
|
|
}
|
|
|
|
return info_ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StatisticsImpl::setTickerCount(uint32_t tickerType, uint64_t count) {
|
|
|
|
{
|
|
|
|
MutexLock lock(&aggregate_lock_);
|
|
|
|
assert(enable_internal_stats_ ? tickerType < INTERNAL_TICKER_ENUM_MAX
|
|
|
|
: tickerType < TICKER_ENUM_MAX);
|
|
|
|
if (tickerType < TICKER_ENUM_MAX || enable_internal_stats_) {
|
|
|
|
tickers_[tickerType].thread_value->Fold(
|
|
|
|
[](void* curr_ptr, void* res) {
|
|
|
|
static_cast<std::atomic<uint64_t>*>(curr_ptr)->store(
|
|
|
|
0, std::memory_order_relaxed);
|
|
|
|
},
|
|
|
|
nullptr /* res */);
|
|
|
|
tickers_[tickerType].merged_sum.store(count, std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (stats_ && tickerType < TICKER_ENUM_MAX) {
|
|
|
|
stats_->setTickerCount(tickerType, count);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) {
|
|
|
|
assert(
|
|
|
|
enable_internal_stats_ ?
|
|
|
|
tickerType < INTERNAL_TICKER_ENUM_MAX :
|
|
|
|
tickerType < TICKER_ENUM_MAX);
|
|
|
|
if (tickerType < TICKER_ENUM_MAX || enable_internal_stats_) {
|
|
|
|
auto info_ptr = getThreadTickerInfo(tickerType);
|
|
|
|
info_ptr->value.fetch_add(count, std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
if (stats_ && tickerType < TICKER_ENUM_MAX) {
|
|
|
|
stats_->recordTick(tickerType, count);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) {
|
|
|
|
assert(
|
|
|
|
enable_internal_stats_ ?
|
|
|
|
histogramType < INTERNAL_HISTOGRAM_ENUM_MAX :
|
|
|
|
histogramType < HISTOGRAM_ENUM_MAX);
|
|
|
|
if (histogramType < HISTOGRAM_ENUM_MAX || enable_internal_stats_) {
|
Thread-specific histogram statistics
Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,
- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones
Test Plan:
unit tests, ran db_bench and verified histograms look similar
before:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 7.63% db_bench db_bench [.] rocksdb::HistogramStat::Add
after:
$ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
...
+ 0.98% db_bench db_bench [.] rocksdb::HistogramStat::Add
Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: andrewkr, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D62649
8 years ago
|
|
|
getThreadHistogramInfo(histogramType)->value.Add(value);
|
|
|
|
}
|
|
|
|
if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) {
|
|
|
|
stats_->measureTime(histogramType, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
// a buffer size used for temp string buffers
|
|
|
|
const int kBufferSize = 200;
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
std::string StatisticsImpl::ToString() const {
|
|
|
|
std::string res;
|
|
|
|
res.reserve(20000);
|
|
|
|
for (const auto& t : TickersNameMap) {
|
|
|
|
if (t.first < TICKER_ENUM_MAX || enable_internal_stats_) {
|
|
|
|
char buffer[kBufferSize];
|
|
|
|
snprintf(buffer, kBufferSize, "%s COUNT : %" PRIu64 "\n",
|
|
|
|
t.second.c_str(), getTickerCount(t.first));
|
|
|
|
res.append(buffer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (const auto& h : HistogramsNameMap) {
|
|
|
|
if (h.first < HISTOGRAM_ENUM_MAX || enable_internal_stats_) {
|
|
|
|
char buffer[kBufferSize];
|
|
|
|
HistogramData hData;
|
|
|
|
histogramData(h.first, &hData);
|
|
|
|
snprintf(
|
|
|
|
buffer,
|
|
|
|
kBufferSize,
|
|
|
|
"%s statistics Percentiles :=> 50 : %f 95 : %f 99 : %f\n",
|
|
|
|
h.second.c_str(),
|
|
|
|
hData.median,
|
|
|
|
hData.percentile95,
|
|
|
|
hData.percentile99);
|
|
|
|
res.append(buffer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
res.shrink_to_fit();
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool StatisticsImpl::HistEnabledForType(uint32_t type) const {
|
|
|
|
if (LIKELY(!enable_internal_stats_)) {
|
|
|
|
return type < HISTOGRAM_ENUM_MAX;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace rocksdb
|