Added benchmark functionality on the lines of folly/Benchmark.h

Summary: Added benchmark functionality on the lines of folly/Benchmark.h Test Plan: Added unit tests Reviewers: igor, haobo, sdong, ljin, yhchiang, dhruba Reviewed By: igor CC: leveldb Differential Revision: https://reviews.facebook.net/D17973
12 years ago · ff1b5df4c6
parent c7076a7a05
commit ff1b5df4c6
10 changed files with 911 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,7 +10,7 @@ build_config.mk
 *.so
 *.so.*
 *_test
-*_bench
+*_benchmark
 *_stress
 *.out
 *.class
--- a/26
+++ b/26
@ -76,6 +76,7 @@ TESTS = \
 	table_properties_collector_test \
 	arena_test \
 	auto_roll_logger_test \
 	benchmarkharness_test \
 	block_test \
 	bloom_test \
 	dynamic_bloom_test \
@ -119,10 +120,10 @@ TOOLS = \
        db_stress \
        ldb \
 	db_repl_stress \
-	blob_store_bench
+	blob_store_benchmark
-PROGRAMS = db_bench signal_test table_reader_bench $(TOOLS)
+PROGRAMS = db_benchmark signal_test table_reader_benchmark $(TOOLS)
-BENCHMARKS = db_bench_sqlite3 db_bench_tree_db table_reader_bench
+BENCHMARKS = db_bench_sqlite3 db_bench_tree_db table_reader_benchmark
 # The library name is configurable since we are maintaining libraries of both
 # debug/release mode.
@ -245,8 +246,8 @@ $(LIBRARY): $(LIBOBJECTS)
 	rm -f $@
 	$(AR) -rs $@ $(LIBOBJECTS)
-db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
+db_benchmark: db/db_benchmark.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
+	$(CXX) db/db_benchmark.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
 block_hash_index_test: table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	 $(CXX) table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
@ -260,8 +261,8 @@ db_sanity_test: tools/db_sanity_test.o $(LIBOBJECTS) $(TESTUTIL)
 db_repl_stress: tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL)
 	$(CXX) tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
-blob_store_bench: tools/blob_store_bench.o $(LIBOBJECTS) $(TESTUTIL)
+blob_store_benchmark: tools/blob_store_benchmark.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) tools/blob_store_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
+	$(CXX) tools/blob_store_benchmark.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
 db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
 	$(CXX) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) -lsqlite3 $(COVERAGEFLAGS)
@ -308,6 +309,9 @@ stringappend_test: utilities/merge_operators/string_append/stringappend_test.o $
 redis_test: utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
 benchmarkharness_test: util/benchmarkharness_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) util/benchmarkharness_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(COVERAGEFLAGS)
 histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(COVERAGEFLAGS)
@ -323,8 +327,8 @@ crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
 db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
-log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS)
+log_write_benchmark: util/log_write_benchmark.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
+	$(CXX) util/log_write_benchmark.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
 plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
@ -332,8 +336,8 @@ plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 simple_table_db_test: db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
-table_reader_bench: table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS)
+table_reader_benchmark: table/table_reader_benchmark.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
+	$(CXX) table/table_reader_benchmark.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
 perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@ -166,9 +166,9 @@ DIRS="util db table utilities"
 set -f # temporarily disable globbing so that our patterns arent expanded
 PRUNE_TEST="-name *test*.cc -prune"
-PRUNE_BENCH="-name *_bench.cc -prune"
+PRUNE_BENCHMARK="-name *_benchmark*.cc -prune"
-PORTABLE_FILES=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "`
+PORTABLE_FILES=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCHMARK -o -name '*.cc' -print | sort | tr "\n" " "`
-PORTABLE_CPP=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cpp' -print | sort | tr "\n" " "`
+PORTABLE_CPP=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCHMARK -o -name '*.cpp' -print | sort | tr "\n" " "`
 set +f # re-enable globbing
 # The sources consist of the portable files, plus the platform-specific port
--- a/db/db_benchmark.cc
+++ b/db/db_benchmark.cc
--- a/table/table_reader_benchmark.cc
+++ b/table/table_reader_benchmark.cc
--- a/tools/blob_store_benchmark.cc
+++ b/tools/blob_store_benchmark.cc
--- a/util/benchmarkharness.cc
+++ b/util/benchmarkharness.cc
@ -0,0 +1,414 @@
 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 // This code is derived from Benchmark.cpp implemented in Folly, the opensourced
 // Facebook C++ library available at https://github.com/facebook/folly
 // The code has removed any dependence on other folly and boost libraries
 #include "util/benchmarkharness.h"
 #include <algorithm>
 #include <cmath>
 #include <cstring>
 #include <iostream>
 #include <limits>
 #include <string>
 #include <utility>
 #include <vector>
 using std::function;
 using std::get;
 using std::make_pair;
 using std::max;
 using std::min;
 using std::pair;
 using std::sort;
 using std::string;
 using std::tuple;
 using std::vector;
 DEFINE_bool(benchmark, false, "Run benchmarks.");
 DEFINE_int64(bm_min_usec, 100,
             "Minimum # of microseconds we'll accept for each benchmark.");
 DEFINE_int64(bm_min_iters, 1,
             "Minimum # of iterations we'll try for each benchmark.");
 DEFINE_int32(bm_max_secs, 1,
             "Maximum # of seconds we'll spend on each benchmark.");
 namespace rocksdb {
 namespace benchmark {
 BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
 typedef function<uint64_t(unsigned int)> BenchmarkFun;
 static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
 // Add the global baseline
 BENCHMARK(globalBenchmarkBaseline) {
  asm volatile("");
 }
 void detail::AddBenchmarkImpl(const char* file, const char* name,
                              BenchmarkFun fun) {
  benchmarks.emplace_back(file, name, std::move(fun));
 }
 /**
 * Given a point, gives density at that point as a number 0.0 < x <=
 * 1.0. The result is 1.0 if all samples are equal to where, and
 * decreases near 0 if all points are far away from it. The density is
 * computed with the help of a radial basis function.
 */
 static double Density(const double * begin, const double *const end,
                      const double where, const double bandwidth) {
  assert(begin < end);
  assert(bandwidth > 0.0);
  double sum = 0.0;
  for (auto i = begin; i < end; i++) {
    auto d = (*i - where) / bandwidth;
    sum += exp(- d * d);
  }
  return sum / (end - begin);
 }
 /**
 * Computes mean and variance for a bunch of data points. Note that
 * mean is currently not being used.
 */
 static pair<double, double>
 MeanVariance(const double * begin, const double *const end) {
  assert(begin < end);
  double sum = 0.0, sum2 = 0.0;
  for (auto i = begin; i < end; i++) {
    sum += *i;
    sum2 += *i * *i;
  }
  auto const n = end - begin;
  return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
 }
 /**
 * Computes the mode of a sample set through brute force. Assumes
 * input is sorted.
 */
 static double Mode(const double * begin, const double *const end) {
  assert(begin < end);
  // Lower bound and upper bound for result and their respective
  // densities.
  auto
    result = 0.0,
    bestDensity = 0.0;
  // Get the variance so we pass it down to Density()
  auto const sigma = MeanVariance(begin, end).second;
  if (!sigma) {
    // No variance means constant signal
    return *begin;
  }
  for (auto i = begin; i < end; i++) {
    assert(i == begin || *i >= i[-1]);
    auto candidate = Density(begin, end, *i, sigma * sqrt(2.0));
    if (candidate > bestDensity) {
      // Found a new best
      bestDensity = candidate;
      result = *i;
    } else {
      // Density is decreasing... we could break here if we definitely
      // knew this is unimodal.
    }
  }
  return result;
 }
 /**
 * Given a bunch of benchmark samples, estimate the actual run time.
 */
 static double EstimateTime(double * begin, double * end) {
  assert(begin < end);
  // Current state of the art: get the minimum. After some
  // experimentation, it seems taking the minimum is the best.
  return *std::min_element(begin, end);
  // What follows after estimates the time as the mode of the
  // distribution.
  // Select the awesomest (i.e. most frequent) result. We do this by
  // sorting and then computing the longest run length.
  sort(begin, end);
  // Eliminate outliers. A time much larger than the minimum time is
  // considered an outlier.
  while (end[-1] > 2.0 * *begin) {
    --end;
    if (begin == end) {
 //      LOG(INFO) << *begin;
    }
    assert(begin < end);
  }
  double result = 0;
  /* Code used just for comparison purposes */ {
    unsigned bestFrequency = 0;
    unsigned candidateFrequency = 1;
    double candidateValue = *begin;
    for (auto current = begin + 1; ; ++current) {
      if (current == end || *current != candidateValue) {
        // Done with the current run, see if it was best
        if (candidateFrequency > bestFrequency) {
          bestFrequency = candidateFrequency;
          result = candidateValue;
        }
        if (current == end) {
          break;
        }
        // Start a new run
        candidateValue = *current;
        candidateFrequency = 1;
      } else {
        // Cool, inside a run, increase the frequency
        ++candidateFrequency;
      }
    }
  }
  result = Mode(begin, end);
  return result;
 }
 static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
                                            const double globalBaseline) {
  // They key here is accuracy; too low numbers means the accuracy was
  // coarse. We up the ante until we get to at least minNanoseconds
  // timings.
  static uint64_t resolutionInNs = 0;
  if (!resolutionInNs) {
    timespec ts;
    ASSERT_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
    ASSERT_EQ(0, ts.tv_sec);  // "Clock sucks.";
    ASSERT_LT(0, ts.tv_nsec);  // "Clock too fast for its own good.";
    ASSERT_EQ(1, ts.tv_nsec);  // "Clock too coarse, upgrade your kernel.";
    resolutionInNs = ts.tv_nsec;
  }
  // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
  // the clock resolution is worse than that, it will be larger. In
  // essence we're aiming at making the quantization noise 0.01%.
  static const auto minNanoseconds =
    max(FLAGS_bm_min_usec * 1000UL,
        min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
  // We do measurements in several epochs and take the minimum, to
  // account for jitter.
  static const unsigned int epochs = 1000;
  // We establish a total time budget as we don't want a measurement
  // to take too long. This will curtail the number of actual epochs.
  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
  timespec global;
  ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
  double epochResults[epochs] = { 0 };
  size_t actualEpochs = 0;
  for (; actualEpochs < epochs; ++actualEpochs) {
    for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
      auto const nsecs = fun(n);
      if (nsecs < minNanoseconds) {
        continue;
      }
      // We got an accurate enough timing, done. But only save if
      // smaller than the current result.
      epochResults[actualEpochs] = max(0.0,
          static_cast<double>(nsecs) / n - globalBaseline);
      // Done with the current epoch, we got a meaningful timing.
      break;
    }
    timespec now;
    ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
    if (detail::TimespecDiff(now, global) >= timeBudgetInNs) {
      // No more time budget available.
      ++actualEpochs;
      break;
    }
  }
  // If the benchmark was basically drowned in baseline noise, it's
  // possible it became negative.
  return max(0.0, EstimateTime(epochResults, epochResults + actualEpochs));
 }
 struct ScaleInfo {
  double boundary;
  const char* suffix;
 };
 static const ScaleInfo kTimeSuffixes[] {
  { 365.25 * 24 * 3600, "years" },
  { 24 * 3600, "days" },
  { 3600, "hr" },
  { 60, "min" },
  { 1, "s" },
  { 1E-3, "ms" },
  { 1E-6, "us" },
  { 1E-9, "ns" },
  { 1E-12, "ps" },
  { 1E-15, "fs" },
  { 0, nullptr },
 };
 static const ScaleInfo kMetricSuffixes[] {
  { 1E24, "Y" },  // yotta
  { 1E21, "Z" },  // zetta
  { 1E18, "X" },  // "exa" written with suffix 'X' so as to not create
                  //   confusion with scientific notation
  { 1E15, "P" },  // peta
  { 1E12, "T" },  // terra
  { 1E9, "G" },   // giga
  { 1E6, "M" },   // mega
  { 1E3, "K" },   // kilo
  { 1, "" },
  { 1E-3, "m" },  // milli
  { 1E-6, "u" },  // micro
  { 1E-9, "n" },  // nano
  { 1E-12, "p" },  // pico
  { 1E-15, "f" },  // femto
  { 1E-18, "a" },  // atto
  { 1E-21, "z" },  // zepto
  { 1E-24, "y" },  // yocto
  { 0, nullptr },
 };
 static string HumanReadable(double n, unsigned int decimals,
                            const ScaleInfo* scales) {
  if (std::isinf(n) || std::isnan(n)) {
    return std::to_string(n);
  }
  const double absValue = fabs(n);
  const ScaleInfo* scale = scales;
  while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
    ++scale;
  }
  const double scaledValue = n / scale->boundary;
  char a[80];
  snprintf(a, sizeof(a), "%.*f%s", decimals, scaledValue, scale->suffix);
  return a;
 }
 static string ReadableTime(double n, unsigned int decimals) {
  return HumanReadable(n, decimals, kTimeSuffixes);
 }
 static string MetricReadable(double n, unsigned int decimals) {
  return HumanReadable(n, decimals, kMetricSuffixes);
 }
 static void PrintBenchmarkResultsAsTable(
  const vector<tuple<const char*, const char*, double> >& data) {
  // Width available
  static const uint columns = 76;
  // Compute the longest benchmark name
  size_t longestName = 0;
  for (auto i = 1; i < benchmarks.size(); i++) {
    longestName = max(longestName, strlen(get<1>(benchmarks[i])));
  }
  // Print a horizontal rule
  auto separator = [&](char pad) {
    puts(string(columns, pad).c_str());
  };
  // Print header for a file
  auto header = [&](const char* file) {
    separator('=');
    printf("%-*srelative  time/iter  iters/s\n",
           columns - 28, file);
    separator('=');
  };
  double baselineNsPerIter = std::numeric_limits<double>::max();
  const char* lastFile = "";
  for (auto& datum : data) {
    auto file = get<0>(datum);
    if (strcmp(file, lastFile)) {
      // New file starting
      header(file);
      lastFile = file;
    }
    string s = get<1>(datum);
    if (s == "-") {
      separator('-');
      continue;
    }
    bool useBaseline /* = void */;
    if (s[0] == '%') {
      s.erase(0, 1);
      useBaseline = true;
    } else {
      baselineNsPerIter = get<2>(datum);
      useBaseline = false;
    }
    s.resize(columns - 29, ' ');
    auto nsPerIter = get<2>(datum);
    auto secPerIter = nsPerIter / 1E9;
    auto itersPerSec = 1 / secPerIter;
    if (!useBaseline) {
      // Print without baseline
      printf("%*s           %9s  %7s\n",
             static_cast<int>(s.size()), s.c_str(),
             ReadableTime(secPerIter, 2).c_str(),
             MetricReadable(itersPerSec, 2).c_str());
    } else {
      // Print with baseline
      auto rel = baselineNsPerIter / nsPerIter * 100.0;
      printf("%*s %7.2f%%  %9s  %7s\n",
             static_cast<int>(s.size()), s.c_str(),
             rel,
             ReadableTime(secPerIter, 2).c_str(),
             MetricReadable(itersPerSec, 2).c_str());
    }
  }
  separator('=');
 }
 void RunBenchmarks() {
  ASSERT_TRUE(!benchmarks.empty());
  vector<tuple<const char*, const char*, double>> results;
  results.reserve(benchmarks.size() - 1);
  // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
  auto const globalBaseline = RunBenchmarkGetNSPerIteration(
    get<2>(benchmarks.front()), 0);
  for (auto i = 1; i < benchmarks.size(); i++) {
    double elapsed = 0.0;
    if (strcmp(get<1>(benchmarks[i]), "-") != 0) {  // skip separators
      elapsed = RunBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
                                              globalBaseline);
    }
    results.emplace_back(get<0>(benchmarks[i]),
                         get<1>(benchmarks[i]), elapsed);
  }
  // PLEASE MAKE NOISE. MEASUREMENTS DONE.
  PrintBenchmarkResultsAsTable(results);
 }
 }  // namespace benchmark
 }  // namespace rocksdb
--- a/util/benchmarkharness.h
+++ b/util/benchmarkharness.h
@ -0,0 +1,407 @@
 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 // This code is derived from Benchmark.h implemented in Folly, the opensourced
 // Facebook C++ library available at https://github.com/facebook/folly
 // The code has removed any dependence on other folly and boost libraries
 #pragma once
 #include <gflags/gflags.h>
 #include <cassert>
 #include <ctime>
 #include <functional>
 #include <limits>
 #include "util/testharness.h"
 namespace rocksdb {
 namespace benchmark {
 /**
 * Runs all benchmarks defined. Usually put in main().
 */
 void RunBenchmarks();
 namespace detail {
 /**
 * This is the clock ID used for measuring time. On older kernels, the
 * resolution of this clock will be very coarse, which will cause the
 * benchmarks to fail.
 */
 enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
 /**
 * Adds a benchmark wrapped in a std::function. Only used
 * internally. Pass by value is intentional.
 */
 void AddBenchmarkImpl(const char* file,
                      const char* name,
                      std::function<uint64_t(unsigned int)>);
 /**
 * Takes the difference between two timespec values. end is assumed to
 * occur after start.
 */
 inline uint64_t TimespecDiff(timespec end, timespec start) {
  if (end.tv_sec == start.tv_sec) {
    assert(end.tv_nsec >= start.tv_nsec);
    return end.tv_nsec - start.tv_nsec;
  }
  assert(end.tv_sec > start.tv_sec &&
         end.tv_sec - start.tv_sec <
         std::numeric_limits<uint64_t>::max() / 1000000000UL);
  return (end.tv_sec - start.tv_sec) * 1000000000UL
    + end.tv_nsec - start.tv_nsec;
 }
 /**
 * Takes the difference between two sets of timespec values. The first
 * two come from a high-resolution clock whereas the other two come
 * from a low-resolution clock. The crux of the matter is that
 * high-res values may be bogus as documented in
 * http://linux.die.net/man/3/clock_gettime. The trouble is when the
 * running process migrates from one CPU to another, which is more
 * likely for long-running processes. Therefore we watch for high
 * differences between the two timings.
 *
 * This function is subject to further improvements.
 */
 inline uint64_t TimespecDiff(timespec end, timespec start,
                             timespec endCoarse, timespec startCoarse) {
  auto fine = TimespecDiff(end, start);
  auto coarse = TimespecDiff(endCoarse, startCoarse);
  if (coarse - fine >= 1000000) {
    // The fine time is in all likelihood bogus
    return coarse;
  }
  return fine;
 }
 }  // namespace detail
 /**
 * Supporting type for BENCHMARK_SUSPEND defined below.
 */
 struct BenchmarkSuspender {
  BenchmarkSuspender() {
    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
  }
  BenchmarkSuspender(const BenchmarkSuspender &) = delete;
  BenchmarkSuspender(BenchmarkSuspender && rhs) {
    start_ = rhs.start_;
    rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
  }
  BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
  BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
    if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
      tally();
    }
    start_ = rhs.start_;
    rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
    return *this;
  }
  ~BenchmarkSuspender() {
    if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
      tally();
    }
  }
  void Dismiss() {
    assert(start_.tv_nsec > 0 || start_.tv_sec > 0);
    tally();
    start_.tv_nsec = start_.tv_sec = 0;
  }
  void Rehire() {
    assert(start_.tv_nsec == 0 || start_.tv_sec == 0);
    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
  }
  /**
   * This helps the macro definition. To get around the dangers of
   * operator bool, returns a pointer to member (which allows no
   * arithmetic).
   */
  /* implicit */
  operator int BenchmarkSuspender::*() const {
    return nullptr;
  }
  /**
   * Accumulates nanoseconds spent outside benchmark.
   */
  typedef uint64_t NanosecondsSpent;
  static NanosecondsSpent nsSpent;
 private:
  void tally() {
    timespec end;
    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
    nsSpent += detail::TimespecDiff(end, start_);
    start_ = end;
  }
  timespec start_;
 };
 /**
 * Adds a benchmark. Usually not called directly but instead through
 * the macro BENCHMARK defined below. The lambda function involved
 * must take exactly one parameter of type unsigned, and the benchmark
 * uses it with counter semantics (iteration occurs inside the
 * function).
 */
 template <typename Lambda>
 void
 AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) {
  auto execute = [=](unsigned int times) -> uint64_t {
    BenchmarkSuspender::nsSpent = 0;
    timespec start, end;
    // CORE MEASUREMENT STARTS
    auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
    lambda(times);
    auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
    // CORE MEASUREMENT ENDS
    ASSERT_EQ(0, r1);
    ASSERT_EQ(0, r2);
    return detail::TimespecDiff(end, start) - BenchmarkSuspender::nsSpent;
  };
  detail::AddBenchmarkImpl(file, name,
                           std::function<uint64_t(unsigned int)>(execute));
 }
 /**
 * Adds a benchmark. Usually not called directly but instead through
 * the macro BENCHMARK defined below. The lambda function involved
 * must take zero parameters, and the benchmark calls it repeatedly
 * (iteration occurs outside the function).
 */
 template <typename Lambda>
 void
 AddBenchmark(const char* file, const char* name, Lambda&& lambda) {
  AddBenchmark_n(file, name, [=](unsigned int times) {
      while (times-- > 0) {
        lambda();
      }
    });
 }
 }  // namespace benchmark
 }  // namespace rocksdb
 /**
 * FB_ONE_OR_NONE(hello, world) expands to hello and
 * FB_ONE_OR_NONE(hello) expands to nothing. This macro is used to
 * insert or eliminate text based on the presence of another argument.
 */
 #define FB_ONE_OR_NONE(a, ...) FB_THIRD(a, ## __VA_ARGS__, a)
 #define FB_THIRD(a, b, ...) __VA_ARGS__
 #define FB_CONCATENATE_IMPL(s1, s2) s1##s2
 #define FB_CONCATENATE(s1, s2) FB_CONCATENATE_IMPL(s1, s2)
 #define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __LINE__)
 #define FB_STRINGIZE(x) #x
 /**
 * Introduces a benchmark function. Used internally, see BENCHMARK and
 * friends below.
 */
 #define BENCHMARK_IMPL_N(funName, stringName, paramType, paramName)     \
  static void funName(paramType);                                       \
  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (         \
    ::rocksdb::benchmark::AddBenchmark_n(__FILE__, stringName,          \
      [](paramType paramName) { funName(paramName); }),                 \
    true);                                                              \
  static void funName(paramType paramName)
 #define BENCHMARK_IMPL(funName, stringName)                             \
  static void funName();                                                \
  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (         \
    ::rocksdb::benchmark::AddBenchmark(__FILE__, stringName,            \
      []() { funName(); }),                                             \
    true);                                                              \
  static void funName()
 /**
 * Introduces a benchmark function. Use with either one one or two
 * arguments. The first is the name of the benchmark. Use something
 * descriptive, such as insertVectorBegin. The second argument may be
 * missing, or could be a symbolic counter. The counter dictates how
 * many internal iteration the benchmark does. Example:
 *
 * BENCHMARK(vectorPushBack) {
 *   vector<int> v;
 *   v.push_back(42);
 * }
 *
 * BENCHMARK_N(insertVectorBegin, n) {
 *   vector<int> v;
 *   FOR_EACH_RANGE (i, 0, n) {
 *     v.insert(v.begin(), 42);
 *   }
 * }
 */
 #define BENCHMARK_N(name, ...)                                  \
  BENCHMARK_IMPL_N(                                             \
    name,                                                       \
    FB_STRINGIZE(name),                                         \
    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
    __VA_ARGS__)
 #define BENCHMARK(name)                                         \
  BENCHMARK_IMPL(                                               \
    name,                                                       \
    FB_STRINGIZE(name))
 /**
 * Defines a benchmark that passes a parameter to another one. This is
 * common for benchmarks that need a "problem size" in addition to
 * "number of iterations". Consider:
 *
 * void pushBack(uint n, size_t initialSize) {
 *   vector<int> v;
 *   BENCHMARK_SUSPEND {
 *     v.resize(initialSize);
 *   }
 *   FOR_EACH_RANGE (i, 0, n) {
 *    v.push_back(i);
 *   }
 * }
 * BENCHMARK_PARAM(pushBack, 0)
 * BENCHMARK_PARAM(pushBack, 1000)
 * BENCHMARK_PARAM(pushBack, 1000000)
 *
 * The benchmark above estimates the speed of push_back at different
 * initial sizes of the vector. The framework will pass 0, 1000, and
 * 1000000 for initialSize, and the iteration count for n.
 */
 #define BENCHMARK_PARAM(name, param)                                    \
  BENCHMARK_NAMED_PARAM(name, param, param)
 /*
 * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
 * parameter, rather than using the parameter value.
 *
 * Useful when the parameter value is not a valid token for string pasting,
 * of when you want to specify multiple parameter arguments.
 *
 * For example:
 *
 * void addValue(uint n, int64_t bucketSize, int64_t min, int64_t max) {
 *   Histogram<int64_t> hist(bucketSize, min, max);
 *   int64_t num = min;
 *   FOR_EACH_RANGE (i, 0, n) {
 *     hist.addValue(num);
 *     ++num;
 *     if (num > max) { num = min; }
 *   }
 * }
 *
 * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
 * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
 * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
 */
 #define BENCHMARK_NAMED_PARAM(name, param_name, ...)                    \
  BENCHMARK_IMPL(                                                       \
      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
      FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
      unsigned,                                                         \
      iters) {                                                          \
    name(iters, ## __VA_ARGS__);                                        \
  }
 /**
 * Just like BENCHMARK, but prints the time relative to a
 * baseline. The baseline is the most recent BENCHMARK() seen in
 * lexical order. Example:
 *
 * // This is the baseline
 * BENCHMARK_N(insertVectorBegin, n) {
 *   vector<int> v;
 *   FOR_EACH_RANGE (i, 0, n) {
 *     v.insert(v.begin(), 42);
 *   }
 * }
 *
 * BENCHMARK_RELATIVE_N(insertListBegin, n) {
 *   list<int> s;
 *   FOR_EACH_RANGE (i, 0, n) {
 *     s.insert(s.begin(), 42);
 *   }
 * }
 *
 * Any number of relative benchmark can be associated with a
 * baseline. Another BENCHMARK() occurrence effectively establishes a
 * new baseline.
 */
 #define BENCHMARK_RELATIVE_N(name, ...)                         \
  BENCHMARK_IMPL_N(                                             \
    name,                                                       \
    "%" FB_STRINGIZE(name),                                     \
    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
    __VA_ARGS__)
 #define BENCHMARK_RELATIVE(name)                                \
  BENCHMARK_IMPL(                                               \
    name,                                                       \
    "%" FB_STRINGIZE(name))
 /**
 * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
 */
 #define BENCHMARK_RELATIVE_PARAM(name, param)                           \
  BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
 /**
 * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
 */
 #define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...)           \
  BENCHMARK_IMPL(                                                       \
      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
      unsigned,                                                         \
      iters) {                                                          \
    name(iters, ## __VA_ARGS__);                                        \
  }
 /**
 * Draws a line of dashes.
 */
 #define BENCHMARK_DRAW_LINE()                                       \
  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (     \
    ::rocksdb::benchmark::AddBenchmark(__FILE__, "-", []() { }),               \
    true);
 /**
 * Allows execution of code that doesn't count torward the benchmark's
 * time budget. Example:
 *
 * BENCHMARK_START_GROUP(insertVectorBegin, n) {
 *   vector<int> v;
 *   BENCHMARK_SUSPEND {
 *     v.reserve(n);
 *   }
 *   FOR_EACH_RANGE (i, 0, n) {
 *     v.insert(v.begin(), 42);
 *   }
 * }
 */
 #define BENCHMARK_SUSPEND                               \
  if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) =   \
      ::rocksdb::benchmark::BenchmarkSuspender()) {}               \
  else
--- a/util/benchmarkharness_test.cc
+++ b/util/benchmarkharness_test.cc
@ -0,0 +1,69 @@
 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 #include "util/benchmarkharness.h"
 #include <vector>
 namespace rocksdb {
 namespace benchmark {
 BENCHMARK(insertFrontVector) {
  std::vector<int> v;
  for (int i = 0; i < 100; i++) {
    v.insert(v.begin(), i);
  }
 }
 BENCHMARK_RELATIVE(insertBackVector) {
  std::vector<int> v;
  for (int i = 0; i < 100; i++) {
    v.insert(v.end(), i);
  }
 }
 BENCHMARK_N(insertFrontVector_n, n) {
  std::vector<int> v;
  for (int i = 0; i < n; i++) {
    v.insert(v.begin(), i);
  }
 }
 BENCHMARK_RELATIVE_N(insertBackVector_n, n) {
  std::vector<int> v;
  for (int i = 0; i < n; i++) {
    v.insert(v.end(), i);
  }
 }
 BENCHMARK_N(insertFrontEnd_n, n) {
  std::vector<int> v;
  for (int i = 0; i < n; i++) {
    v.insert(v.begin(), i);
  }
  for (int i = 0; i < n; i++) {
    v.insert(v.end(), i);
  }
 }
 BENCHMARK_RELATIVE_N(insertFrontEndSuspend_n, n) {
  std::vector<int> v;
  for (int i = 0; i < n; i++) {
    v.insert(v.begin(), i);
  }
  BENCHMARK_SUSPEND {
    for (int i = 0; i < n; i++) {
      v.insert(v.end(), i);
    }
  }
 }
 }  // namespace benchmark
 }  // namespace rocksdb
 int main(int argc, char** argv) {
  rocksdb::benchmark::RunBenchmarks();
  return 0;
 }
--- a/util/log_write_benchmark.cc
+++ b/util/log_write_benchmark.cc
@ -12,7 +12,7 @@
 // A simple benchmark to simulate transactional logs
-DEFINE_int32(num_records, 6000, "Size of each record.");
+DEFINE_int32(num_records, 6000, "Number of records.");
 DEFINE_int32(record_size, 249, "Size of each record.");
 DEFINE_int32(record_interval, 10000, "Interval between records (microSec)");
 DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions");
@ -20,7 +20,7 @@ DEFINE_bool(enable_sync, false, "sync after each write.");
 namespace rocksdb {
 void RunBenchmark() {
-  std::string file_name = test::TmpDir() + "/log_write_bench.log";
+  std::string file_name = test::TmpDir() + "/log_write_benchmark.log";
  Env* env = Env::Default();
  EnvOptions env_options;
  env_options.use_mmap_writes = false;