#7916298: merge tools/db_crashtest2.py into tools/db_crashtest.py
Summary: merge tools/db_crashtest2.py into tools/db_crashtest.py python tools/db_crashtest.py -h # show help message, ALL parameters can be overwrite by arguments Example usages: python tools/db_crashtest.py blackbox # run blackbox with default parameters python tools/db_crashtest.py blackbox --simple python tools/db_crashtest.py whitebox # run whitebox with default parameters python tools/db_crashtest.py whitebox --simple all default parameters are identical to previous version. Test Plan: `make crash_test` and make sure it can run with expected parameters pased to db_stress. Reviewers: igor, rven, anthony, IslamAbdelRahman, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D48567main
parent
ec1f8354a9
commit
4575de5b9e
@ -1,248 +0,0 @@ |
|||||||
#! /usr/bin/env python |
|
||||||
import os |
|
||||||
import re |
|
||||||
import sys |
|
||||||
import time |
|
||||||
import random |
|
||||||
import getopt |
|
||||||
import logging |
|
||||||
import tempfile |
|
||||||
import subprocess |
|
||||||
import shutil |
|
||||||
|
|
||||||
# This python script runs db_stress multiple times. Some runs with |
|
||||||
# kill_random_test that causes rocksdb to crash at various points in code. |
|
||||||
|
|
||||||
def main(argv): |
|
||||||
try: |
|
||||||
opts, args = getopt.getopt(argv, "hsd:t:k:o:b:") |
|
||||||
except getopt.GetoptError: |
|
||||||
print str(getopt.GetoptError) |
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\ |
|
||||||
"-b <write_buffer_size> [-s (simple mode)]\n" |
|
||||||
sys.exit(2) |
|
||||||
|
|
||||||
# default values, will be overridden by cmdline args |
|
||||||
kill_random_test = 97 # kill with probability 1/97 by default |
|
||||||
duration = 10000 # total time for this script to test db_stress |
|
||||||
threads = 32 |
|
||||||
ops_per_thread = 200000 |
|
||||||
write_buf_size = 4 * 1024 * 1024 |
|
||||||
simple_mode = False |
|
||||||
write_buf_size_set = False |
|
||||||
|
|
||||||
for opt, arg in opts: |
|
||||||
if opt == '-h': |
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \ |
|
||||||
"-b <write_buffer_size> [-s (simple mode)]\n" |
|
||||||
sys.exit() |
|
||||||
elif opt == '-s': |
|
||||||
simple_mode = True |
|
||||||
if not write_buf_size_set: |
|
||||||
write_buf_size = 32 * 1024 * 1024 |
|
||||||
elif opt == "-d": |
|
||||||
duration = int(arg) |
|
||||||
elif opt == "-t": |
|
||||||
threads = int(arg) |
|
||||||
elif opt == "-k": |
|
||||||
kill_random_test = int(arg) |
|
||||||
elif opt == "-o": |
|
||||||
ops_per_thread = int(arg) |
|
||||||
elif opt == "-b": |
|
||||||
write_buf_size = int(arg) |
|
||||||
write_buf_size_set = True |
|
||||||
else: |
|
||||||
print "unrecognized option " + str(opt) + "\n" |
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \ |
|
||||||
"-b <write_buffer_size>\n" |
|
||||||
sys.exit(2) |
|
||||||
|
|
||||||
cur_time = time.time() |
|
||||||
exit_time = cur_time + duration |
|
||||||
half_time = cur_time + duration / 2 |
|
||||||
|
|
||||||
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \ |
|
||||||
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \ |
|
||||||
+ str(ops_per_thread) + "\nwrite_buffer_size=" \ |
|
||||||
+ str(write_buf_size) + "\n" |
|
||||||
|
|
||||||
total_check_mode = 4 |
|
||||||
check_mode = 0 |
|
||||||
kill_mode = 0 |
|
||||||
|
|
||||||
test_tmpdir = os.environ.get("TEST_TMPDIR") |
|
||||||
if test_tmpdir is None or test_tmpdir == "": |
|
||||||
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest2_') |
|
||||||
else: |
|
||||||
dbname = test_tmpdir + "/rocksdb_crashtest2" |
|
||||||
shutil.rmtree(dbname, True) |
|
||||||
|
|
||||||
while time.time() < exit_time: |
|
||||||
killoption = "" |
|
||||||
if check_mode == 0: |
|
||||||
# run with kill_random_test |
|
||||||
if kill_mode == 0: |
|
||||||
killoption = " --kill_random_test=" + str(kill_random_test) |
|
||||||
elif kill_mode == 1: |
|
||||||
# Remove kill point for normal reads and reduce kill odds |
|
||||||
# by 3, so that it still runs about one minutes in average |
|
||||||
# before hitting a crash point. |
|
||||||
killoption = " --kill_random_test=" + \ |
|
||||||
str(kill_random_test / 3 + 1) |
|
||||||
killoption += \ |
|
||||||
" --kill_prefix_blacklist=WritableFileWriter::Append," \ |
|
||||||
"WritableFileWriter::WriteBuffered" |
|
||||||
# Run kill mode 0 and 1 by turn. |
|
||||||
kill_mode = (kill_mode + 1) % 2 |
|
||||||
# use large ops per thread since we will kill it anyway |
|
||||||
additional_opts = "--ops_per_thread=" + \ |
|
||||||
str(100 * ops_per_thread) + killoption |
|
||||||
elif check_mode == 1: |
|
||||||
# normal run with universal compaction mode |
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \ |
|
||||||
" --compaction_style=1" |
|
||||||
elif check_mode == 2: |
|
||||||
# normal run with FIFO compaction mode |
|
||||||
# ops_per_thread is divided by 5 because FIFO compaction |
|
||||||
# style is quite a bit slower on reads with lot of files |
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread / 5) + \ |
|
||||||
" --compaction_style=2" |
|
||||||
else: |
|
||||||
# normal run |
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread) |
|
||||||
|
|
||||||
if simple_mode: |
|
||||||
cmd = re.sub('\s+', ' ', """ |
|
||||||
./db_stress |
|
||||||
--column_families=1 |
|
||||||
--threads=%s |
|
||||||
--write_buffer_size=%s |
|
||||||
--destroy_db_initially=0 |
|
||||||
--reopen=20 |
|
||||||
--prefixpercent=0 |
|
||||||
--readpercent=50 |
|
||||||
--writepercent=35 |
|
||||||
--delpercent=5 |
|
||||||
--iterpercent=10 |
|
||||||
--db=%s |
|
||||||
--max_key=100000000 |
|
||||||
--mmap_read=%s |
|
||||||
--block_size=16384 |
|
||||||
--cache_size=1048576 |
|
||||||
--open_files=500000 |
|
||||||
--verify_checksum=1 |
|
||||||
--sync=0 |
|
||||||
--progress_reports=0 |
|
||||||
--disable_wal=0 |
|
||||||
--disable_data_sync=0 |
|
||||||
--target_file_size_base=16777216 |
|
||||||
--target_file_size_multiplier=1 |
|
||||||
--max_write_buffer_number=3 |
|
||||||
--max_background_compactions=1 |
|
||||||
--max_bytes_for_level_base=67108864 |
|
||||||
--filter_deletes=%s |
|
||||||
--memtablerep=skip_list |
|
||||||
--prefix_size=0 |
|
||||||
--nooverwritepercent=1 |
|
||||||
--log2_keys_per_lock=10 |
|
||||||
%s |
|
||||||
""" % (threads, |
|
||||||
write_buf_size, |
|
||||||
dbname, |
|
||||||
random.randint(0, 1), |
|
||||||
random.randint(0, 1), |
|
||||||
additional_opts)) |
|
||||||
else: |
|
||||||
cmd = re.sub('\s+', ' ', """ |
|
||||||
./db_stress |
|
||||||
--test_batches_snapshots=%s |
|
||||||
--threads=%s |
|
||||||
--write_buffer_size=%s |
|
||||||
--destroy_db_initially=0 |
|
||||||
--reopen=20 |
|
||||||
--readpercent=45 |
|
||||||
--prefixpercent=5 |
|
||||||
--writepercent=35 |
|
||||||
--delpercent=5 |
|
||||||
--iterpercent=10 |
|
||||||
--db=%s |
|
||||||
--max_key=100000000 |
|
||||||
--mmap_read=%s |
|
||||||
--block_size=16384 |
|
||||||
--cache_size=1048576 |
|
||||||
--open_files=500000 |
|
||||||
--verify_checksum=1 |
|
||||||
--sync=0 |
|
||||||
--progress_reports=0 |
|
||||||
--disable_wal=0 |
|
||||||
--disable_data_sync=0 |
|
||||||
--target_file_size_base=2097152 |
|
||||||
--target_file_size_multiplier=2 |
|
||||||
--max_write_buffer_number=3 |
|
||||||
--max_background_compactions=20 |
|
||||||
--max_bytes_for_level_base=10485760 |
|
||||||
--filter_deletes=%s |
|
||||||
--memtablerep=prefix_hash |
|
||||||
--prefix_size=7 |
|
||||||
--nooverwritepercent=1 |
|
||||||
--log2_keys_per_lock=10 |
|
||||||
%s |
|
||||||
""" % (random.randint(0, 1), |
|
||||||
threads, |
|
||||||
write_buf_size, |
|
||||||
dbname, |
|
||||||
random.randint(0, 1), |
|
||||||
random.randint(0, 1), |
|
||||||
additional_opts)) |
|
||||||
|
|
||||||
print "Running:" + cmd + "\n" |
|
||||||
|
|
||||||
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE, |
|
||||||
stderr=subprocess.STDOUT, |
|
||||||
shell=True) |
|
||||||
stdoutdata, stderrdata = popen.communicate() |
|
||||||
retncode = popen.returncode |
|
||||||
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format( |
|
||||||
check_mode, killoption, retncode)) |
|
||||||
print msg |
|
||||||
print stdoutdata |
|
||||||
|
|
||||||
expected = False |
|
||||||
if (killoption == '') and (retncode == 0): |
|
||||||
# we expect zero retncode if no kill option |
|
||||||
expected = True |
|
||||||
elif killoption != '' and retncode < 0: |
|
||||||
# we expect negative retncode if kill option was given |
|
||||||
expected = True |
|
||||||
|
|
||||||
if not expected: |
|
||||||
print "TEST FAILED. See kill option and exit code above!!!\n" |
|
||||||
sys.exit(1) |
|
||||||
|
|
||||||
stdoutdata = stdoutdata.lower() |
|
||||||
errorcount = (stdoutdata.count('error') - |
|
||||||
stdoutdata.count('got errors 0 times')) |
|
||||||
print "#times error occurred in output is " + str(errorcount) + "\n" |
|
||||||
|
|
||||||
if (errorcount > 0): |
|
||||||
print "TEST FAILED. Output has 'error'!!!\n" |
|
||||||
sys.exit(2) |
|
||||||
if (stdoutdata.find('fail') >= 0): |
|
||||||
print "TEST FAILED. Output has 'fail'!!!\n" |
|
||||||
sys.exit(2) |
|
||||||
|
|
||||||
# First half of the duration, keep doing kill test. For the next half, |
|
||||||
# try different modes. |
|
||||||
if time.time() > half_time: |
|
||||||
# we need to clean up after ourselves -- only do this on test |
|
||||||
# success |
|
||||||
shutil.rmtree(dbname, True) |
|
||||||
check_mode = (check_mode + 1) % total_check_mode |
|
||||||
|
|
||||||
time.sleep(1) # time to stabilize after a kill |
|
||||||
|
|
||||||
if __name__ == "__main__": |
|
||||||
sys.exit(main(sys.argv[1:])) |
|
Loading…
Reference in new issue