From 2af132c34166e235b1c85f672a39072e29406a23 Mon Sep 17 00:00:00 2001 From: Guido Tagliavini Ponce Date: Mon, 6 Jun 2022 13:53:47 -0700 Subject: [PATCH] Fix overflow bug in standard deviation computation. (#10100) Summary: There was an overflow bug when computing the variance in the HistogramStat class. This manifests, for instance, when running cache_bench with default arguments. This executes 32M lookups/inserts/deletes in a block cache, and then computes (among other things) the variance of the latencies. The variance is computed as ``variance = (cur_sum_squares * cur_num - cur_sum * cur_sum) / (cur_num * cur_num)``, where ``cum_sum_squares`` is the sum of the squares of the samples, ``cur_num`` is the number of samples, and ``cur_sum`` is the sum of the samples. Because the median latency in a typical run is around 3800 nanoseconds, both the ``cur_sum_squares * cur_num`` and ``cur_sum * cur_sum`` terms overflow as uint64_t. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10100 Test Plan: Added a unit test. Run ``make -j24 histogram_test && ./histogram_test``. Reviewed By: pdillinger Differential Revision: D36942738 Pulled By: guidotag fbshipit-source-id: 0af5fb9e2a297a284e8e74c24e604d302906006e --- monitoring/histogram.cc | 11 ++++++----- monitoring/histogram_test.cc | 6 ++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/monitoring/histogram.cc b/monitoring/histogram.cc index 323a08efb..331796e5b 100644 --- a/monitoring/histogram.cc +++ b/monitoring/histogram.cc @@ -164,15 +164,16 @@ double HistogramStat::Average() const { } double HistogramStat::StandardDeviation() const { - uint64_t cur_num = num(); - uint64_t cur_sum = sum(); - uint64_t cur_sum_squares = sum_squares(); + double cur_num = + static_cast(num()); // Use double to avoid integer overflow + double cur_sum = static_cast(sum()); + double cur_sum_squares = static_cast(sum_squares()); if (cur_num == 0) return 0; double variance = - static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / - static_cast(cur_num * cur_num); + (cur_sum_squares * cur_num - cur_sum * cur_sum) / (cur_num * cur_num); return std::sqrt(variance); } + std::string HistogramStat::ToString() const { uint64_t cur_num = num(); std::string r; diff --git a/monitoring/histogram_test.cc b/monitoring/histogram_test.cc index a37289365..7b63dc93a 100644 --- a/monitoring/histogram_test.cc +++ b/monitoring/histogram_test.cc @@ -225,6 +225,12 @@ TEST_F(HistogramTest, HistogramWindowingMerge) { ASSERT_EQ(histogramWindowing.max(), 5); } +TEST_F(HistogramTest, LargeStandardDeviation) { + HistogramImpl histogram; + PopulateHistogram(histogram, 1, 1000000); + ASSERT_LT(fabs(histogram.StandardDeviation() - 288675), 1); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) {