From e937d471801b3a9237a0b19cb3f4af43c20ed9a9 Mon Sep 17 00:00:00 2001 From: Mayank Agarwal Date: Tue, 12 Mar 2013 23:20:14 -0700 Subject: [PATCH] Python script to periodically run and kill the db_stress test Summary: The script runs and kills the stress test periodically. Default values have been used in the script now. Should I make this a part of the Makefile or automated rocksdb build? The values can be easily changed in the script right now, but should I add some support for variable values or input to the script? I believe the script achieves its objective of unsafe crashes and reopening to expect sanity in the database. Test Plan: python tools/db_crashtest.py Reviewers: dhruba, vamsi, MarkCallaghan Reviewed By: vamsi CC: leveldb Differential Revision: https://reviews.facebook.net/D9369 --- tools/db_crashtest.py | 93 +++++++++++++++++++++++++++++++++++++++++++ tools/db_stress.cc | 7 ++-- 2 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 tools/db_crashtest.py diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py new file mode 100644 index 000000000..af275d37d --- /dev/null +++ b/tools/db_crashtest.py @@ -0,0 +1,93 @@ +import os +import sys +import time +import shlex +import getopt +import logging +import subprocess + +# This python script runs and kills db_stress multiple times with +# test-batches-snapshot ON, +# total operations much less than the total keys, and +# a high read percentage. +# This checks consistency in case of unsafe crashes in Rocksdb + +def main(argv): + os.system("make -C ~/rocksdb db_stress") + try: + opts, args = getopt.getopt(argv, "hd:t:i:o:b:") + except getopt.GetoptError: + print "db_crashtest.py -d -t <#threads> " \ + "-i -o \n" + sys.exit(2) + + # default values, will be overridden by cmdline args + interval = 120 # time for one db_stress instance to run + duration = 6000 # total time for this script to test db_stress + threads = 32 + ops_per_thread = 500000 + write_buf_size = 4 * 1024 * 1024 + + for opt, arg in opts: + if opt == '-h': + print "db_crashtest.py -d -t <#threads> " \ + "-i -o "\ + "-b \n" + sys.exit() + elif opt == ("-d"): + duration = int(arg) + elif opt == ("-t"): + threads = int(arg) + elif opt == ("-i"): + interval = int(arg) + elif opt == ("-o"): + ops_per_thread = int(arg) + elif opt == ("-b"): + write_buf_size = int(arg) + else: + print "db_crashtest.py -d -t <#threads> " \ + "-i -o " \ + "-b \n" + sys.exit(2) + + exit_time = time.time() + duration + + while time.time() < exit_time: + run_had_errors = False + print "Running db_stress \n" + os.system("mkdir -p /tmp/rocksdb/crashtest") + killtime = time.time() + interval + child = subprocess.Popen(['~/rocksdb/db_stress \ + --test_batches_snapshots=1 \ + --ops_per_thread=0' + str(ops_per_thread) + ' \ + --threads=0' + str(threads) + ' \ + --write_buffer_size=' + str(write_buf_size) + '\ + --reopen=10 \ + --readpercent=50 \ + --db=/tmp/rocksdb/crashtest \ + --max_key=100'], stderr=subprocess.PIPE, shell=True) + time.sleep(interval) + while True: + if time.time() > killtime: + if child.poll() is not None: + logging.warn("WARNING: db_stress completed before kill\n") + else: + child.kill() + print "KILLED \n" + time.sleep(1) # time to stabilize after a kill + + while True: + line = child.stderr.readline().strip() + if line != '': + run_had_errors = True + print '***' + line + '^' + else: + break + if run_had_errors: + sys.exit(2) + break + + time.sleep(1) # time to stabilize before the next run + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index a915c2bf4..b5ad3ed1e 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -232,8 +232,7 @@ class Stats { double micros = now - last_op_finish_; hist_.Add(micros); if (micros > 20000) { - fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); - fflush(stderr); + fprintf(stdout, "long op: %.1f micros%30s\r", micros, ""); } last_op_finish_ = now; } @@ -247,8 +246,7 @@ class Stats { else if (next_report_ < 100000) next_report_ += 10000; else if (next_report_ < 500000) next_report_ += 50000; else next_report_ += 100000; - fprintf(stderr, "... finished %ld ops%30s\r", done_, ""); - fflush(stderr); + fprintf(stdout, "... finished %ld ops%30s\r", done_, ""); } } @@ -868,6 +866,7 @@ class StressTest { fprintf(stdout, "Number of threads : %d\n", FLAGS_threads); fprintf(stdout, "Ops per thread : %d\n", FLAGS_ops_per_thread); fprintf(stdout, "Read percentage : %d\n", FLAGS_readpercent); + fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size); fprintf(stdout, "Delete percentage : %d\n", FLAGS_delpercent); fprintf(stdout, "Max key : %ld\n", FLAGS_max_key); fprintf(stdout, "Ratio #ops/#keys : %ld\n",