Allow regression_test.sh to specify OPTIONS_FILE. Add header comments.

Summary: This patch does the following improvement for regression_test.sh * Allow regression_test.sh to specify OPTIONS_FILE. * Add header comments that includes examples on how to run the script and introduce all configurable parameters. * bug fix. Test Plan: Run the example commands in the header comments of regression_test.sh Reviewers: sdong, yiwu, gunnarku Reviewed By: gunnarku Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59175
10 years ago · fda098461b
parent 0fee896841
commit fda098461b
1 changed files with 111 additions and 19 deletions
--- a/tools/regression_test.sh
+++ b/tools/regression_test.sh
@ -1,5 +1,81 @@
 #!/bin/bash
-# REQUIRE: db_bench binary exists in the current directory
+# The RocksDB regression test script.
+# REQUIREMENT: must be able to run make db_bench in the current directory
+#
+# This script will do the following things in order:
+#
+# 1. check out the specified rocksdb commit.
+# 2. build db_bench using the specified commit
+# 3. setup test directory $TEST_PATH.  If not specified, then the test directory
+#    will be "/tmp/rocksdb/regression_test"
+# 4. run set of benchmarks on the specified host
+#    (can be either locally or remotely)
+# 5. generate report in the $RESULT_PATH.  If RESULT_PATH is not specified,
+#    RESULT_PATH will be set to $TEST_PATH/current_time
+#
+# = Examples =
+# * Run the regression test using rocksdb commit abcdef that outputs results
+#   and temp files in "/my/output/dir" 
+# 
+#   TEST_PATH=/my/output/dir COMMIT_ID=abcdef ./tools/regression_test.sh
+#
+# * Run the regression test on a remost host under "/my/output/dir" directory
+#   and stores the result locally in "/my/benchmark/results" using commit
+#   abcdef and with the rocksdb options specified in /my/path/to/OPTIONS-012345
+#   with 1000000000 keys in each benchmark in the regression test where each
+#   key and value are 100 and 900 bytes respectively:
+#
+#   REMOTE_USER_AT_HOST=yhchiang@my.remote.host \
+#       TEST_PATH=/my/output/dir \
+#       RESULT_PATH=/my/benchmark/results \
+#       COMMIT_ID=abcdef \
+#       OPTIONS_FILE=/my/path/to/OPTIONS-012345 \
+#       NUM_KEYS=1000000000 \
+#       KEY_SIZE=100 \
+#       VALUE_SIZE=900 \
+#       ./tools/regression_test.sh
+#
+# = Regression test environmental parameters =
+#   TEST_PATH: the root directory of the regression test.
+#       Default: "/tmp/rocksdb/regression_test"
+#   RESULT_PATH: the directory where the regression results will be generated.
+#       Default: "$TEST_PATH/current_time"
+#   REMOTE_USER_AT_HOST: If set, then test will run on the specified host under
+#       TEST_PATH directory and outputs test results locally in RESULT_PATH
+#       The REMOTE_USER_AT_HOST should follow the format user-id@host.name
+#   DB_PATH: the path where the rocksdb database will be created during the
+#       regression test.  Default:  $TEST_PATH/db
+#   WAL_PATH: the path where the rocksdb WAL will be outputed.
+#       Default:  $TEST_PATH/wal
+#   OPTIONS_FILE:  If specified, then the regression test will use the specified
+#       file to initialize the RocksDB options in its benchmarks.  Note that
+#       this feature only work for commits after 88acd93 or rocksdb version
+#       later than 4.9.
+#
+# = db_bench parameters =
+#   NUM_THREADS:  The number of concurrent foreground threads that will issue
+#       database operations in the benchmark.  Default: 16.
+#   NUM_KEYS:  The number of keys issued by each thread in the benchmark.
+#       Default: 1G.
+#   KEY_SIZE:  The size of each key in bytes in db_bench.  Default: 100.
+#   VALUE_SIZE:  The size of each value in bytes in db_bench.  Default: 900.
+#   CACHE_SIZE:  The size of RocksDB block cache used in db_bench.  Default: 1G
+#   STATISTICS:  If 1, then statistics is on in db_bench.  Default: 0.
+#   COMPRESSION_RATIO:  The compression ratio of the key generated in db_bench.
+#       Default: 0.5.
+#   HISTOGRAM:  If 1, then the histogram feature on performance feature is on.
+#   STATS_PER_INTERVAL:  If 1, then the statistics will be reported for every
+#       STATS_INTERVAL_SECONDS seconds.  Default 1.
+#   STATS_INTERVAL_SECONDS:  If STATS_PER_INTERVAL is set to 1, then statistics
+#       will be reported for every STATS_INTERVAL_SECONDS.  Default 60.
+#   MAX_BACKGROUND_FLUSHES:  The maxinum number of concurrent flushes in
+#       db_bench.  Default: 4.
+#   MAX_BACKGROUND_COMPACTIONS:  The maximum number of concurrent compactions
+#       in db_bench.  Default: 16.
+#   SEEK_NEXTS:  Controls how many Next() will be called after seek.
+#       Default: 10.
+#   SEED:  random seed that controls the randomness of the benchmark.
+#       Default: $( date +%s )

 function main {
  commit=${1:-"origin/master"}
@ -32,18 +108,20 @@ function init_arguments {
  G=$((1024 * M))

  current_time=$(date +"%F-%H:%M:%S")
-  RESULT_PATH=${2:-"$1/results/$current_time"}
+  RESULT_PATH=${RESULT_PATH:-"$1/results/$current_time"}
  COMMIT_ID=`git log | head -n1 | cut -c 8-`
  SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"

  DB_PATH=${3:-"$1/db/"}
  WAL_PATH=${4:-"$1/wal/"}
-  if [ -z "$REMOTE_HOST_USER" ]; then
+  if [ -z "$REMOTE_USER_AT_HOST" ]; then
    DB_BENCH_DIR=${5:-"."}
  else
    DB_BENCH_DIR=${5:-"$1/db_bench"}
  fi

+  SCP=${SCP:-"scp"}
+  SSH=${SSH:-"ssh"}
  NUM_THREADS=${NUM_THREADS:-16}
  NUM_KEYS=${NUM_KEYS:-$((1 * G))}
  KEY_SIZE=${KEY_SIZE:-100}
@ -73,6 +151,7 @@ function run_db_bench {
  echo "======================================================================="
  echo ""
  db_bench_error=0
+  options_file_arg=$(setup_options_file)
  db_bench_cmd="$DB_BENCH_DIR/db_bench \
      --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
      --use_existing_db=$USE_EXISTING_DB \
@ -82,6 +161,7 @@ function run_db_bench {
      --value_size=$VALUE_SIZE \
      --cache_size=$CACHE_SIZE \
      --statistics=$STATISTICS \
+      $options_file_arg \
      --compression_ratio=$COMPRESSION_RATIO \
      --histogram=$HISTOGRAM \
      --seek_nexts=$SEEK_NEXTS \
@ -92,10 +172,11 @@ function run_db_bench {
      --seed=$SEED 2>&1"
  kill_db_bench_cmd="pkill db_bench"
  ps_cmd="ps aux"
-  if ! [ -z "$REMOTE_HOST_USER" ]; then
-    kill_db_bench_cmd="$SSH $REMOTE_HOST_USER $kill_db_bench_cmd"
-    db_bench_cmd="$SSH $REMOTE_HOST_USER $db_bench_cmd"
-    ps_cmd="$SSH $REMOTE_HOST_USER $ps_cmd"
+  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
+    echo "Running benchmark remotely on $REMOTE_USER_AT_HOST"
+    kill_db_bench_cmd="$SSH $REMOTE_USER_AT_HOST $kill_db_bench_cmd"
+    db_bench_cmd="$SSH $REMOTE_USER_AT_HOST $db_bench_cmd"
+    ps_cmd="$SSH $REMOTE_USER_AT_HOST $ps_cmd"
  fi

  ## kill existing db_bench processes
@ -113,7 +194,7 @@ function run_db_bench {
  exit_on_error $? "$ps_cmd"

  # perform the actual command to check whether db_bench is running
-  grep_output="$(eval $ps_cmd | grep db_bench)"
+  grep_output="$(eval $ps_cmd | grep db_bench | grep -v grep)"
  if [ "$grep_output" != "" ]; then
    echo "Stopped regression_test.sh as there're still db_bench processes running:"
    echo $grep_output
@ -193,21 +274,32 @@ function build_db_bench {
 }

 function run_remote {
-  if ! [ -z "$REMOTE_HOST_USER" ]; then
-    cmd="$SSH $REMOTE_HOST_USER $1"
+  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
+    cmd="$SSH $REMOTE_USER_AT_HOST $1"
  else
    cmd="$1"
  fi
  
-  result=0
-  eval "($cmd) || result=1"
-  exit_on_error $result "$cmd"
+  eval "$cmd"
+  exit_on_error $? "$cmd"
 }

 function run_local {
-  result=0
-  eval "($1 || result=1)"
-  exit_on_error $result
+  eval "$1"
+  exit_on_error $?
+}
+
+function setup_options_file {
+  if ! [ -z "$OPTIONS_FILE" ]; then
+    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
+      options_file="$DB_BENCH_DIR/OPTIONS_FILE"
+      run_local "$SCP $OPTIONS_FILE $REMOTE_USER_AT_HOST:$options_file"
+    else
+      options_file="$OPTIONS_FILE"
+    fi
+    echo "--options_file=$options_file"
+  fi
+  echo ""
 }

 function setup_test_directory {
@ -215,14 +307,14 @@ function setup_test_directory {

  run_remote "rm -rf $DB_PATH"
  run_remote "rm -rf $WAL_PATH"
-  if ! [ -z "$REMOTE_HOST_USER" ]; then
+  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
    run_remote "rm -rf $DB_BENCH_DIR"
  fi
  run_remote "mkdir -p $DB_PATH"
  run_remote "mkdir -p $WAL_PATH"
-  if ! [ -z "$REMOTE_HOST_USER" ]; then
+  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
    run_remote "mkdir -p $DB_BENCH_DIR"
-    run_local "$SCP ./db_bench $REMOTE_HOST_USER:$DB_BENCH_DIR/db_bench"
+    run_local "$SCP ./db_bench $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/db_bench"
  fi
  
  run_local "rm -rf $RESULT_PATH"