Scale histogram bucket size by constant factor

Summary:
The goal is to reduce the number of histogram buckets, particularly now that we print these histograms for each column family. I chose 1.5 as the factor. We can adjust it later to either make buckets more granular or make fewer buckets.
Closes https://github.com/facebook/rocksdb/pull/2139

Differential Revision: D4872076

Pulled By: ajkr

fbshipit-source-id: 87790d782a605506c3d24190a028cecbd7aa564a
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent f004307e9b
commit 867fe92e5e
  1. 2
      HISTORY.md
  2. 53
      monitoring/histogram.cc
  3. 10
      monitoring/histogram.h
  4. 26
      monitoring/histogram_test.cc

@ -1,5 +1,7 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased ## Unreleased
### Public API Change
* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints.
### New Features ### New Features
* Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. * Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators.

@ -23,41 +23,26 @@
namespace rocksdb { namespace rocksdb {
HistogramBucketMapper::HistogramBucketMapper() HistogramBucketMapper::HistogramBucketMapper() {
: // If you change this, you also need to change
// Add newer bucket index here. // size of array buckets_ in HistogramImpl
// Should be always added in sorted order. bucketValues_ = {1, 2};
// If you change this, you also need to change valueIndexMap_ = {{1, 0}, {2, 1}};
// size of array buckets_ in HistogramImpl double bucket_val = static_cast<double>(bucketValues_.back());
bucketValues_( while ((bucket_val = 1.5 * bucket_val) <= static_cast<double>(port::kMaxUint64)) {
{1, 2, 3, 4, 5, 6, bucketValues_.push_back(static_cast<uint64_t>(bucket_val));
7, 8, 9, 10, 12, 14, // Extracts two most significant digits to make histogram buckets more
16, 18, 20, 25, 30, 35, // human-readable. E.g., 172 becomes 170.
40, 45, 50, 60, 70, 80, uint64_t pow_of_ten = 1;
90, 100, 120, 140, 160, 180, while (bucketValues_.back() / 10 > 10) {
200, 250, 300, 350, 400, 450, bucketValues_.back() /= 10;
500, 600, 700, 800, 900, 1000, pow_of_ten *= 10;
1200, 1400, 1600, 1800, 2000, 2500, }
3000, 3500, 4000, 4500, 5000, 6000, bucketValues_.back() *= pow_of_ten;
7000, 8000, 9000, 10000, 12000, 14000, valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1;
16000, 18000, 20000, 25000, 30000, 35000,
40000, 45000, 50000, 60000, 70000, 80000,
90000, 100000, 120000, 140000, 160000, 180000,
200000, 250000, 300000, 350000, 400000, 450000,
500000, 600000, 700000, 800000, 900000, 1000000,
1200000, 1400000, 1600000, 1800000, 2000000, 2500000,
3000000, 3500000, 4000000, 4500000, 5000000, 6000000,
7000000, 8000000, 9000000, 10000000, 12000000, 14000000,
16000000, 18000000, 20000000, 25000000, 30000000, 35000000,
40000000, 45000000, 50000000, 60000000, 70000000, 80000000,
90000000, 100000000, 120000000, 140000000, 160000000, 180000000,
200000000, 250000000, 300000000, 350000000, 400000000, 450000000,
500000000, 600000000, 700000000, 800000000, 900000000, 1000000000}),
maxBucketValue_(bucketValues_.back()),
minBucketValue_(bucketValues_.front()) {
for (size_t i =0; i < bucketValues_.size(); ++i) {
valueIndexMap_[bucketValues_[i]] = i;
} }
maxBucketValue_ = bucketValues_.back();
minBucketValue_ = bucketValues_.front();
} }
size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {

@ -45,9 +45,9 @@ class HistogramBucketMapper {
} }
private: private:
const std::vector<uint64_t> bucketValues_; std::vector<uint64_t> bucketValues_;
const uint64_t maxBucketValue_; uint64_t maxBucketValue_;
const uint64_t minBucketValue_; uint64_t minBucketValue_;
std::map<uint64_t, uint64_t> valueIndexMap_; std::map<uint64_t, uint64_t> valueIndexMap_;
}; };
@ -89,7 +89,7 @@ struct HistogramStat {
std::atomic_uint_fast64_t num_; std::atomic_uint_fast64_t num_;
std::atomic_uint_fast64_t sum_; std::atomic_uint_fast64_t sum_;
std::atomic_uint_fast64_t sum_squares_; std::atomic_uint_fast64_t sum_squares_;
std::atomic_uint_fast64_t buckets_[138]; // 138==BucketMapper::BucketCount() std::atomic_uint_fast64_t buckets_[109]; // 109==BucketMapper::BucketCount()
const uint64_t num_buckets_; const uint64_t num_buckets_;
}; };
@ -146,4 +146,4 @@ class HistogramImpl : public Histogram {
std::mutex mutex_; std::mutex mutex_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -29,33 +29,31 @@ void PopulateHistogram(Histogram& histogram,
} }
void BasicOperation(Histogram& histogram) { void BasicOperation(Histogram& histogram) {
PopulateHistogram(histogram, 1, 100, 10); PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110)
HistogramData data; HistogramData data;
histogram.Data(&data); histogram.Data(&data);
ASSERT_LE(fabs(histogram.Percentile(100.0) - 100.0), kIota); ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 99.0), kIota); ASSERT_LE(fabs(data.percentile99 - 108.9), kIota); // 99 * 110 / 100
ASSERT_LE(fabs(data.percentile95 - 95.0), kIota); ASSERT_LE(fabs(data.percentile95 - 104.5), kIota); // 95 * 110 / 100
ASSERT_LE(fabs(data.median - 50.0), kIota); ASSERT_LE(fabs(data.median - 55.0), kIota); // 50 * 110 / 100
ASSERT_EQ(data.average, 50.5); // avg is acurately calculated. ASSERT_EQ(data.average, 55.5); // (1 + 110) / 2
ASSERT_LT(fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86
} }
void MergeHistogram(Histogram& histogram, Histogram& other) { void MergeHistogram(Histogram& histogram, Histogram& other) {
PopulateHistogram(histogram, 1, 100); PopulateHistogram(histogram, 1, 100);
PopulateHistogram(other, 101, 200); PopulateHistogram(other, 101, 250);
histogram.Merge(other); histogram.Merge(other);
HistogramData data; HistogramData data;
histogram.Data(&data); histogram.Data(&data);
ASSERT_LE(fabs(histogram.Percentile(100.0) - 200.0), kIota); ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 198.0), kIota); ASSERT_LE(fabs(data.percentile99 - 247.5), kIota); // 99 * 250 / 100
ASSERT_LE(fabs(data.percentile95 - 190.0), kIota); ASSERT_LE(fabs(data.percentile95 - 237.5), kIota); // 95 * 250 / 100
ASSERT_LE(fabs(data.median - 100.0), kIota); ASSERT_LE(fabs(data.median - 125.0), kIota); // 50 * 250 / 100
ASSERT_EQ(data.average, 100.5); // avg is acurately calculated. ASSERT_EQ(data.average, 125.5); // (1 + 250) / 2
ASSERT_LT(fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73
} }
void EmptyHistogram(Histogram& histogram) { void EmptyHistogram(Histogram& histogram) {

Loading…
Cancel
Save