diff --git a/tools/regression_test.sh b/tools/regression_test.sh index 8bb7caaa9..2743c5aee 100755 --- a/tools/regression_test.sh +++ b/tools/regression_test.sh @@ -46,6 +46,7 @@ # Default: 1 # TEST_PATH: the root directory of the regression test. # Default: "/tmp/rocksdb/regression_test" +# !!! NOTE !!! - a DB will also be saved in $TEST_PATH/../db # RESULT_PATH: the directory where the regression results will be generated. # Default: "$TEST_PATH/current_time" # REMOTE_USER_AT_HOST: If set, then test will run on the specified host under @@ -125,15 +126,14 @@ function main { setup_test_directory if [ $TEST_MODE -le 1 ]; then - tmp=$DB_PATH - DB_PATH=$ORIGIN_PATH - test_remote "test -d $DB_PATH" + test_remote "test -d $ORIGIN_PATH" if [[ $? -ne 0 ]]; then echo "Building DB..." # compactall alone will not print ops or threads, which will fail update_report run_db_bench "fillseq,compactall" $NUM_KEYS 1 0 0 + # only save for future use on success + test_remote "mv $DB_PATH $ORIGIN_PATH" fi - DB_PATH=$tmp fi if [ $TEST_MODE -ge 1 ]; then build_checkpoint @@ -204,9 +204,32 @@ function init_arguments { # $4 --- use_existing_db. Default: 1 # $5 --- update_report. Default: 1 function run_db_bench { - # this will terminate all currently-running db_bench - find_db_bench_cmd="ps aux | grep db_bench | grep -v grep | grep -v aux | awk '{print \$2}'" + # Make sure no other db_bench is running. (Make sure command succeeds if pidof + # command exists but finds nothing.) + pids_cmd='pidof db_bench || pidof --version > /dev/null' + # But first, make best effort to kill any db_bench that have run for more + # than 12 hours, as that indicates a hung or runaway process. + kill_old_cmd='for PID in $(pidof db_bench); do [ "$(($(stat -c %Y /proc/$PID) + 43200))" -lt "$(date +%s)" ] && echo "Killing old db_bench $PID" && kill $PID && sleep 5 && kill -9 $PID && sleep 5; done; pidof --version > /dev/null' + if ! [ -z "$REMOTE_USER_AT_HOST" ]; then + pids_cmd="$SSH $REMOTE_USER_AT_HOST '$pids_cmd'" + kill_old_cmd="$SSH $REMOTE_USER_AT_HOST '$kill_old_cmd'" + fi + + eval $kill_old_cmd + exit_on_error $? "$kill_old_cmd" + + pids_output="$(eval $pids_cmd)" + exit_on_error $? "$pids_cmd" + + if [ "$pids_output" != "" ]; then + echo "Stopped regression_test.sh as there're still recent db_bench " + echo "processes running: $pids_output" + echo "Clean up test directory" + cleanup_test_directory $TEST_ROOT_DIR + exit 2 + fi + # Build db_bench command ops=${2:-$NUM_OPS} threads=${3:-$NUM_THREADS} USE_EXISTING_DB=${4:-1} @@ -220,7 +243,7 @@ function run_db_bench { options_file_arg=$(setup_options_file) echo "$options_file_arg" # use `which time` to avoid using bash's internal time command - db_bench_cmd="("'\$(which time)'" -p $DB_BENCH_DIR/db_bench \ + db_bench_cmd="\$(which time) -p $DB_BENCH_DIR/db_bench \ --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \ --use_existing_db=$USE_EXISTING_DB \ --perf_level=$PERF_LEVEL \ @@ -248,38 +271,16 @@ function run_db_bench { --seed=$SEED \ --multiread_batched=true \ --batch_size=$MULTIREAD_BATCH_SIZE \ - --multiread_stride=$MULTIREAD_STRIDE) 2>&1" - ps_cmd="ps aux" + --multiread_stride=$MULTIREAD_STRIDE 2>&1" if ! [ -z "$REMOTE_USER_AT_HOST" ]; then echo "Running benchmark remotely on $REMOTE_USER_AT_HOST" - db_bench_cmd="$SSH $REMOTE_USER_AT_HOST \"$db_bench_cmd\"" - ps_cmd="$SSH $REMOTE_USER_AT_HOST $ps_cmd" - fi - - ## make sure no db_bench is running - # The following statement is necessary make sure "eval $ps_cmd" will success. - # Otherwise, if we simply check whether "$(eval $ps_cmd | grep db_bench)" is - # successful or not, then it will always be false since grep will return - # non-zero status when there's no matching output. - ps_output="$(eval $ps_cmd)" - exit_on_error $? "$ps_cmd" - - # perform the actual command to check whether db_bench is running - grep_output="$(eval $ps_cmd | grep db_bench | grep -v grep)" - if [ "$grep_output" != "" ]; then - echo "Stopped regression_test.sh as there're still db_bench processes running:" - echo $grep_output - echo "Clean up test directory" - cleanup_test_directory $TEST_ROOT_DIR - exit 2 + db_bench_cmd="$SSH $REMOTE_USER_AT_HOST '$db_bench_cmd'" fi + echo db_bench_cmd="$db_bench_cmd" - ## run the db_bench - cmd="($db_bench_cmd || db_bench_error=1) | tee -a $RESULT_PATH/$1" - exit_on_error $? - echo $cmd - eval $cmd - exit_on_error $db_bench_error + # Run the db_bench command + eval $db_bench_cmd | tee -a "$RESULT_PATH/$1" + exit_on_error ${PIPESTATUS[0]} db_bench if [ $UPDATE_REPORT -ne 0 ]; then update_report "$1" "$RESULT_PATH/$1" $ops $threads fi @@ -397,7 +398,7 @@ function test_remote { function run_local { eval "$1" - exit_on_error $? + exit_on_error $? "$1" } function setup_options_file { @@ -416,8 +417,14 @@ function setup_options_file { function setup_test_directory { echo "Deleting old regression test directories and creating new ones" + run_local 'test "$DB_PATH" != "."' run_remote "rm -rf $DB_PATH" - run_remote "rm -rf $DB_BENCH_DIR" + + if [ "$DB_BENCH_DIR" != "." ]; then + run_remote "rm -rf $DB_BENCH_DIR" + fi + + run_local 'test "$RESULT_PATH" != "."' run_local "rm -rf $RESULT_PATH" if ! [ -z "$WAL_PATH" ]; then