From e637470f64d0e6844c8ac0a748948e427aef4a0b Mon Sep 17 00:00:00 2001
From: Alan Paxton <alan.paxton@gmail.com>
Date: Mon, 25 Jul 2022 14:44:10 -0700
Subject: [PATCH] Run new benchmark script in branch. (#10303)

Summary:
Configure CI to run modernised benchmark script

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10303

Reviewed By: ramvadiv

Differential Revision: D37719116

Pulled By: jay-zhuang

fbshipit-source-id: 79ecb1cd0abd4d800c6906ba6673268c2adee10e
---
 .circleci/config.yml              |  17 +--
 build_tools/benchmark_log_tool.py |   4 +
 tools/benchmark_ci.py             | 165 ++++++++++++++++++++++++++++++
 tools/benchmark_compare.sh        |   8 +-
 4 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100755 tools/benchmark_ci.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8c2c7e931..d9bf25fdf 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -190,14 +190,17 @@ commands:
   perform-benchmarks:
     steps:
       - run:
-          name: "Run basic benchmark, 5 min"
-          command: ./tools/benchmark.sh fillseq_enable_wal
+          name: "Test low-variance benchmarks"
+          command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 10000000
           environment:
             LD_LIBRARY_PATH: /usr/local/lib
-            DB_DIR: /tmp/rocksdb-benchmark-datadir
-            WAL_DIR: /tmp/rocksdb-benchmark-waldir
-            OUTPUT_DIR: /tmp/benchmark-results
-            NUM_KEYS: 10000000
+            # How long to run parts of the test(s)
+            DURATION_RO: 450
+            DURATION_RW: 450
+            # The benchmark host has 32GB memory
+            # The folllowing values are tailored to work with that
+            # Note, tests may not exercise the targeted issues if the memory is increased on new test hosts.
+
 
   post-benchmarks:
     steps:
@@ -209,7 +212,7 @@ commands:
           command: |
             set +e
             set +o pipefail
-            ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3/_doc
+            ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3_rix/_doc
             true
 
 executors:
diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py
index 572888eef..2d5f962e1 100755
--- a/build_tools/benchmark_log_tool.py
+++ b/build_tools/benchmark_log_tool.py
@@ -62,7 +62,11 @@ class BenchmarkUtils:
 
     def conform_opensearch(row):
         (dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True)
+        # create a test_date field, which was previously what was expected
+        # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
+        # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
         row['test_date'] = dt.isoformat()
+        row['date'] = dt.isoformat()
         return dict((key.replace('.', '_'), value)
                     for (key, value) in row.items())
 
diff --git a/tools/benchmark_ci.py b/tools/benchmark_ci.py
new file mode 100755
index 000000000..efe18963a
--- /dev/null
+++ b/tools/benchmark_ci.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+#  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+#  This source code is licensed under both the GPLv2 (found in the
+#  COPYING file in the root directory) and Apache 2.0 License
+#  (found in the LICENSE.Apache file in the root directory).
+
+'''Run benchmark_compare.sh on the most recent build, for CI
+'''
+
+import argparse
+import glob
+import os
+import re
+import shutil
+import subprocess
+import sys
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+class Config:
+    def __init__(self, args):
+        self.version_file = './include/rocksdb/version.h'
+        self.data_dir = os.path.expanduser(f"{args.db_dir}")
+        self.results_dir = os.path.expanduser(f"{args.output_dir}")
+        self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh"
+        self.benchmark_cwd = f"{os.getcwd()}/tools"
+
+    benchmark_env_keys = ['LD_LIBRARY_PATH',
+                          'NUM_KEYS',
+                          'KEY_SIZE',
+                          'VALUE_SIZE',
+                          'CACHE_SIZE_MB',
+                          'DURATION_RW',
+                          'DURATION_RO',
+                          'MB_WRITE_PER_SEC',
+                          'NUM_THREADS',
+                          'COMPRESSION_TYPE',
+                          'MIN_LEVEL_TO_COMPRESS',
+                          'WRITE_BUFFER_SIZE_MB',
+                          'TARGET_FILE_SIZE_BASE_MB',
+                          'MAX_BYTES_FOR_LEVEL_BASE_MB',
+                          'MAX_BACKGROUND_JOBS',
+                          'CACHE_INDEX_AND_FILTER_BLOCKS',
+                          'USE_O_DIRECT',
+                          'STATS_INTERVAL_SECONDS',
+                          'SUBCOMPACTIONS',
+                          'COMPACTION_STYLE']
+
+
+def read_version(config):
+    majorRegex = re.compile(r'#define ROCKSDB_MAJOR\s([0-9]+)')
+    minorRegex = re.compile(r'#define ROCKSDB_MINOR\s([0-9]+)')
+    patchRegex = re.compile(r'#define ROCKSDB_PATCH\s([0-9]+)')
+    with open(config.version_file, 'r') as reader:
+        major = None
+        minor = None
+        patch = None
+        for line in reader:
+            if major is None:
+                major = majorRegex.match(line)
+            elif minor is None:
+                minor = minorRegex.match(line)
+            elif patch is None:
+                patch = patchRegex.match(line)
+
+            if patch is not None:
+                break
+
+        if patch is not None:
+            return (major.group(1), minor.group(1), patch.group(1))
+
+    # Didn't complete a match
+    return None
+
+
+def prepare(version_str, config):
+    old_files = glob.glob(f"{config.results_dir}/{version_str}/**",
+                          recursive=True)
+    for f in old_files:
+        if os.path.isfile(f):
+            logging.debug(f"remove file {f}")
+            os.remove(f)
+    for f in old_files:
+        if os.path.isdir(f):
+            logging.debug(f"remove dir {f}")
+            os.rmdir(f)
+
+    db_bench_vers = f"{config.benchmark_cwd}/db_bench.{version_str}"
+
+    # Create a symlink to the db_bench executable
+    os.symlink(f"{os.getcwd()}/db_bench", db_bench_vers)
+
+
+def results(version_str, config):
+    # Copy the report TSV file back to the top level of results
+    shutil.copyfile(f"{config.results_dir}/{version_str}/report.tsv",
+                    f"{config.results_dir}/report.tsv")
+
+
+def cleanup(version_str, config):
+    # Remove the symlink to the db_bench executable
+    db_bench_vers = f"{config.benchmark_cwd}/db_bench.{version_str}"
+    os.remove(db_bench_vers)
+
+
+def get_benchmark_env():
+    env = []
+    for key in Config.benchmark_env_keys:
+        value = os.getenv(key)
+        if value is not None:
+            env.append((key, value))
+    return env
+
+
+def main():
+    '''Tool for running benchmark_compare.sh on the most recent build, for CI
+    This tool will
+
+    (1) Work out the current version of RocksDB
+    (2) Run benchmark_compare with that version alone
+    '''
+
+    parser = argparse.ArgumentParser(
+        description='benchmark_compare.sh Python wrapper for CI.')
+
+    # --tsvfile is the name of the file to read results from
+    # --esdocument is the ElasticSearch document to push these results into
+    #
+    parser.add_argument('--db_dir', default='~/tmp/rocksdb-benchmark-datadir',
+                        help='Database directory hierarchy to use')
+    parser.add_argument('--output_dir', default='~/tmp/benchmark-results',
+                        help='Benchmark output goes here')
+    parser.add_argument('--num_keys', default='10000',
+                        help='Number of database keys to use in benchmark test(s) (determines size of test job)')
+    args = parser.parse_args()
+    config = Config(args)
+
+    version = read_version(config)
+    if version is None:
+        raise Exception(
+            f"Could not read RocksDB version from {config.version_file}")
+    version_str = f"{version[0]}.{version[1]}.{version[2]}"
+    logging.info(f"Run benchmark_ci with RocksDB version {version_str}")
+
+    prepare(version_str, config)
+
+    try:
+        env = get_benchmark_env()
+        env.append(('NUM_KEYS', args.num_keys))
+        cmd = [config.benchmark_script,
+               config.data_dir, config.results_dir, version_str]
+        logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}")
+        subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd)
+
+        results(version_str, config)
+    finally:
+        cleanup(version_str, config)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh
index 327f6b4f6..9adea432c 100755
--- a/tools/benchmark_compare.sh
+++ b/tools/benchmark_compare.sh
@@ -9,6 +9,9 @@ odir=$2
 K=1024
 M=$((1024 * K))
 
+# Dynamic loader configuration
+ld_library_path=${LD_LIBRARY_PATH:-""}
+
 # Benchmark configuration
 duration_rw=${DURATION_RW:-65}
 duration_ro=${DURATION_RO:-65}
@@ -64,8 +67,11 @@ blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}}
 blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"}
 blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1}
 
+# Arguments for dynamic loading
+base_args=( LD_LIBRARY_PATH="$ld_library_path" )
+
 # Arguments used for all tests
-base_args=( NUM_KEYS="$num_keys" )
+base_args+=( NUM_KEYS="$num_keys" )
 base_args+=( NUM_THREADS="$num_threads" )
 base_args+=( KEY_SIZE="$key_size" )
 base_args+=( VALUE_SIZE="$value_size" )