#! /usr/bin/env python import os import re import sys import time import random import getopt import logging import tempfile import subprocess import shutil # This script runs and kills db_stress multiple times. It checks consistency # in case of unsafe crashes in RocksDB. def main(argv): try: opts, args = getopt.getopt(argv, "hsd:t:i:o:b:") except getopt.GetoptError: print("db_crashtest.py -d <duration_test> -t <#threads> " "-i <interval for one run> -o <ops_per_thread> " "-b <write_buffer_size> [-s (simple mode)]\n") sys.exit(2) # default values, will be overridden by cmdline args interval = 120 # time for one db_stress instance to run duration = 6000 # total time for this script to test db_stress threads = 32 # since we will be killing anyway, use large value for ops_per_thread ops_per_thread = 100000000 write_buf_size = 4 * 1024 * 1024 simple_mode = False write_buf_size_set = False for opt, arg in opts: if opt == '-h': print("db_crashtest.py -d <duration_test>" " -t <#threads> -i <interval for one run>" " -o <ops_per_thread> -b <write_buffer_size>" " [-s (simple mode)]\n") sys.exit() elif opt == '-s': simple_mode = True if not write_buf_size_set: write_buf_size = 32 * 1024 * 1024 elif opt == "-d": duration = int(arg) elif opt == "-t": threads = int(arg) elif opt == "-i": interval = int(arg) elif opt == "-o": ops_per_thread = int(arg) elif opt == "-b": write_buf_size = int(arg) write_buf_size_set = True else: print("db_crashtest.py -d <duration_test>" " -t <#threads> -i <interval for one run>" " -o <ops_per_thread> -b <write_buffer_size>\n") sys.exit(2) exit_time = time.time() + duration print("Running blackbox-crash-test with \ninterval_between_crash=" + str(interval) + "\ntotal-duration=" + str(duration) + "\nthreads=" + str(threads) + "\nops_per_thread=" + str(ops_per_thread) + "\nwrite_buffer_size=" + str(write_buf_size) + "\n") test_tmpdir = os.environ.get("TEST_TMPDIR") if test_tmpdir is None or test_tmpdir == "": dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_') else: dbname = test_tmpdir + "/rocksdb_crashtest" shutil.rmtree(dbname, True) while time.time() < exit_time: run_had_errors = False killtime = time.time() + interval if simple_mode: cmd = re.sub('\s+', ' ', """ ./db_stress --column_families=1 --test_batches_snapshots=0 --ops_per_thread=%s --threads=%s --write_buffer_size=%s --destroy_db_initially=0 --reopen=20 --readpercent=50 --prefixpercent=0 --writepercent=35 --delpercent=5 --iterpercent=10 --db=%s --max_key=100000000 --mmap_read=%s --block_size=16384 --cache_size=1048576 --open_files=-1 --verify_checksum=1 --sync=0 --progress_reports=0 --disable_wal=0 --disable_data_sync=1 --target_file_size_base=16777216 --target_file_size_multiplier=1 --max_write_buffer_number=3 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --filter_deletes=%s --memtablerep=skip_list --prefix_size=0 --set_options_one_in=0 """ % (ops_per_thread, threads, write_buf_size, dbname, random.randint(0, 1), random.randint(0, 1))) else: cmd = re.sub('\s+', ' ', """ ./db_stress --test_batches_snapshots=1 --ops_per_thread=%s --threads=%s --write_buffer_size=%s --destroy_db_initially=0 --reopen=20 --readpercent=45 --prefixpercent=5 --writepercent=35 --delpercent=5 --iterpercent=10 --db=%s --max_key=100000000 --mmap_read=%s --block_size=16384 --cache_size=1048576 --open_files=500000 --verify_checksum=1 --sync=0 --progress_reports=0 --disable_wal=0 --disable_data_sync=1 --target_file_size_base=2097152 --target_file_size_multiplier=2 --max_write_buffer_number=3 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --filter_deletes=%s --memtablerep=prefix_hash --prefix_size=7 --set_options_one_in=10000 """ % (ops_per_thread, threads, write_buf_size, dbname, random.randint(0, 1), random.randint(0, 1))) child = subprocess.Popen([cmd], stderr=subprocess.PIPE, shell=True) print("Running db_stress with pid=%d: %s\n\n" % (child.pid, cmd)) stop_early = False while time.time() < killtime: if child.poll() is not None: print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode) stop_early = True break time.sleep(1) if not stop_early: if child.poll() is not None: print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode) else: child.kill() print("KILLED %d\n" % child.pid) time.sleep(1) # time to stabilize after a kill while True: line = child.stderr.readline().strip() if line != '': run_had_errors = True print('***' + line + '^') else: break if run_had_errors: sys.exit(2) time.sleep(1) # time to stabilize before the next run # we need to clean up after ourselves -- only do this on test success shutil.rmtree(dbname, True) if __name__ == "__main__": sys.exit(main(sys.argv[1:]))