Summary: This is initial version. A few ways in which this could be extended in the future are: (a) Killing from more places in source code (b) Hashing stack and using that hash in determining whether to crash. This is to avoid crashing more often at source lines that are executed more often. (c) Raising exceptions or returning errors instead of killing Test Plan: This whole thing is for testing. Here is part of output: python2.7 tools/db_crashtest2.py -d 600 Running db_stress db_stress retncode -15 output LevelDB version : 1.5 Number of threads : 32 Ops per thread : 10000000 Read percentage : 50 Write-buffer-size : 4194304 Delete percentage : 30 Max key : 1000 Ratio #ops/#keys : 320000 Num times DB reopens: 0 Batches/snapshots : 1 Purge redundant % : 50 Num keys per lock : 4 Compression : snappy ------------------------------------------------ No lock creation because test_batches_snapshots set 2013/04/26-17:55:17 Starting database operations Created bg thread 0x7fc1f07ff700 ... finished 60000 ops Running db_stress db_stress retncode -15 output LevelDB version : 1.5 Number of threads : 32 Ops per thread : 10000000 Read percentage : 50 Write-buffer-size : 4194304 Delete percentage : 30 Max key : 1000 Ratio #ops/#keys : 320000 Num times DB reopens: 0 Batches/snapshots : 1 Purge redundant % : 50 Num keys per lock : 4 Compression : snappy ------------------------------------------------ Created bg thread 0x7ff0137ff700 No lock creation because test_batches_snapshots set 2013/04/26-17:56:15 Starting database operations ... finished 90000 ops Revert Plan: OK Task ID: #2252691 Reviewers: dhruba, emayanke Reviewed By: emayanke CC: leveldb, haobo Differential Revision: https://reviews.facebook.net/D10581main
parent
87d0af15d8
commit
760dd4750f
@ -0,0 +1,108 @@ |
||||
#! /usr/bin/env python |
||||
import os |
||||
import sys |
||||
import time |
||||
import shlex |
||||
import getopt |
||||
import logging |
||||
import tempfile |
||||
import subprocess |
||||
|
||||
# This python script runs db_stress multiple times with kill_random_test |
||||
# that causes leveldb to crash at various points in code. |
||||
# It also has test-batches-snapshot ON so that basic atomic/consistency |
||||
# checks can be performed. |
||||
# |
||||
def main(argv): |
||||
os.system("make -C ~/rocksdb db_stress") |
||||
try: |
||||
opts, args = getopt.getopt(argv, "hd:t:k:o:b:") |
||||
except getopt.GetoptError: |
||||
print str(getopt.GetoptError) |
||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\ |
||||
"-b <write_buffer_size>\n" |
||||
sys.exit(2) |
||||
|
||||
# default values, will be overridden by cmdline args |
||||
kill_random_test = 97 # kill with probability 1/97 by default |
||||
duration = 6000 # total time for this script to test db_stress |
||||
threads = 32 |
||||
ops_per_thread = 200000 |
||||
write_buf_size = 4 * 1024 * 1024 |
||||
|
||||
for opt, arg in opts: |
||||
if opt == '-h': |
||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\ |
||||
"-b <write_buffer_size>\n" |
||||
sys.exit() |
||||
elif opt == ("-d"): |
||||
duration = int(arg) |
||||
elif opt == ("-t"): |
||||
threads = int(arg) |
||||
elif opt == ("-k"): |
||||
kill_random_test = int(arg) |
||||
elif opt == ("-i"): |
||||
interval = int(arg) |
||||
elif opt == ("-o"): |
||||
ops_per_thread = int(arg) |
||||
elif opt == ("-b"): |
||||
write_buf_size = int(arg) |
||||
else: |
||||
print "unrecognized option " + str(opt) + "\n" |
||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \ |
||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \ |
||||
"-b <write_buffer_size>\n" |
||||
sys.exit(2) |
||||
|
||||
exit_time = time.time() + duration |
||||
|
||||
dirpath = tempfile.mkdtemp() |
||||
|
||||
# kill in every alternate run. toggle tracks which run we are doing. |
||||
toggle = True |
||||
|
||||
while time.time() < exit_time: |
||||
run_had_errors = False |
||||
print "Running db_stress \n" |
||||
|
||||
if toggle: |
||||
# since we are going to kill anyway, use more ops per thread |
||||
new_ops_per_thread = 100 * ops_per_thread |
||||
killoption = '--kill_random_test=' + str(kill_random_test) |
||||
else: |
||||
new_ops_per_thread = ops_per_thread |
||||
killoption = '' |
||||
|
||||
toggle = not toggle |
||||
|
||||
cmd = ['~/rocksdb/db_stress \ |
||||
--test_batches_snapshots=1 \ |
||||
--ops_per_thread=0' + str(new_ops_per_thread) + ' \ |
||||
--threads=0' + str(threads) + ' \ |
||||
--write_buffer_size=' + str(write_buf_size) + ' \ |
||||
--destroy_db_initially=0 ' + killoption + ' \ |
||||
--reopen=0 \ |
||||
--readpercent=50 \ |
||||
--db=' + dirpath + ' \ |
||||
--max_key=10000'] |
||||
try: |
||||
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) |
||||
if killoption != '': |
||||
logging.warn("WARNING: db_stress did not kill itself\n") |
||||
continue |
||||
|
||||
except subprocess.CalledProcessError as e: |
||||
msg = "db_stress retncode {0} output {1}".format(e.returncode, |
||||
e.output) |
||||
logging.info(msg) |
||||
print msg |
||||
msglower = msg.lower() |
||||
if ('error' in msglower) or ('fail' in msglower): |
||||
print "TEST FAILED!!!\n" |
||||
sys.exit(2) |
||||
time.sleep(1) # time to stabilize after a kill |
||||
|
||||
if __name__ == "__main__": |
||||
sys.exit(main(sys.argv[1:])) |
Loading…
Reference in new issue