fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
185 lines
5.1 KiB
185 lines
5.1 KiB
#!/bin/bash
|
|
# REQUIRE: db_bench binary exists in the current directory
|
|
#
|
|
# This should be used with the LevelDB fork listed here to use additional test options.
|
|
# For more details on the changes see the blog post listed below.
|
|
# https://github.com/mdcallag/leveldb-1
|
|
# http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html
|
|
|
|
if [ $# -ne 1 ]; then
|
|
echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]"
|
|
exit 0
|
|
fi
|
|
|
|
# size constants
|
|
K=1024
|
|
M=$((1024 * K))
|
|
G=$((1024 * M))
|
|
|
|
if [ -z $DB_DIR ]; then
|
|
echo "DB_DIR is not defined"
|
|
exit 0
|
|
fi
|
|
|
|
output_dir=${OUTPUT_DIR:-/tmp/}
|
|
if [ ! -d $output_dir ]; then
|
|
mkdir -p $output_dir
|
|
fi
|
|
|
|
# all multithreaded tests run with sync=1 unless
|
|
# $DB_BENCH_NO_SYNC is defined
|
|
syncval="1"
|
|
if [ ! -z $DB_BENCH_NO_SYNC ]; then
|
|
echo "Turning sync off for all multithreaded tests"
|
|
syncval="0";
|
|
fi
|
|
|
|
num_threads=${NUM_THREADS:-16}
|
|
# Only for *whilewriting, *whilemerging
|
|
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
|
|
cache_size=${CACHE_SIZE:-$((1 * G))}
|
|
|
|
num_keys=${NUM_KEYS:-$((1 * G))}
|
|
key_size=20
|
|
value_size=${VALUE_SIZE:-400}
|
|
block_size=${BLOCK_SIZE:-4096}
|
|
|
|
const_params="
|
|
--db=$DB_DIR \
|
|
\
|
|
--num=$num_keys \
|
|
--value_size=$value_size \
|
|
--cache_size=$cache_size \
|
|
--compression_ratio=0.5 \
|
|
\
|
|
--write_buffer_size=$((2 * M)) \
|
|
\
|
|
--histogram=1 \
|
|
\
|
|
--bloom_bits=10 \
|
|
--open_files=$((20 * K))"
|
|
|
|
params_w="$const_params "
|
|
|
|
function summarize_result {
|
|
test_out=$1
|
|
test_name=$2
|
|
bench_name=$3
|
|
nthr=$4
|
|
|
|
usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
|
|
mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' )
|
|
ops=$( grep "^Count:" $test_out | awk '{ print $2 }' )
|
|
ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc )
|
|
avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' )
|
|
p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' )
|
|
echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \
|
|
>> $output_dir/report.txt
|
|
}
|
|
|
|
function run_fillseq {
|
|
# This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
|
|
# client can discover where to restart a load after a crash. I think this is a good way to load.
|
|
echo "Loading $num_keys keys sequentially"
|
|
cmd="./db_bench --benchmarks=fillseq \
|
|
--use_existing_db=0 \
|
|
--sync=0 \
|
|
$params_w \
|
|
--threads=1 \
|
|
--seed=$( date +%s ) \
|
|
2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
|
|
echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
|
|
eval $cmd
|
|
summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1
|
|
}
|
|
|
|
function run_change {
|
|
operation=$1
|
|
echo "Do $num_keys random $operation"
|
|
out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
|
|
cmd="./db_bench --benchmarks=$operation \
|
|
--use_existing_db=1 \
|
|
--sync=$syncval \
|
|
$params_w \
|
|
--threads=$num_threads \
|
|
--seed=$( date +%s ) \
|
|
2>&1 | tee -a $output_dir/${out_name}"
|
|
echo $cmd | tee $output_dir/${out_name}
|
|
eval $cmd
|
|
summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads
|
|
}
|
|
|
|
function run_readrandom {
|
|
echo "Reading $num_keys random keys"
|
|
out_name="benchmark_readrandom.t${num_threads}.log"
|
|
cmd="./db_bench --benchmarks=readrandom \
|
|
--use_existing_db=1 \
|
|
$params_w \
|
|
--threads=$num_threads \
|
|
--seed=$( date +%s ) \
|
|
2>&1 | tee -a $output_dir/${out_name}"
|
|
echo $cmd | tee $output_dir/${out_name}
|
|
eval $cmd
|
|
summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads
|
|
}
|
|
|
|
function run_readwhile {
|
|
operation=$1
|
|
echo "Reading $num_keys random keys while $operation"
|
|
out_name="benchmark_readwhile${operation}.t${num_threads}.log"
|
|
cmd="./db_bench --benchmarks=readwhile${operation} \
|
|
--use_existing_db=1 \
|
|
--sync=$syncval \
|
|
$params_w \
|
|
--threads=$num_threads \
|
|
--writes_per_second=$writes_per_second \
|
|
--seed=$( date +%s ) \
|
|
2>&1 | tee -a $output_dir/${out_name}"
|
|
echo $cmd | tee $output_dir/${out_name}
|
|
eval $cmd
|
|
summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads
|
|
}
|
|
|
|
function now() {
|
|
echo `date +"%s"`
|
|
}
|
|
|
|
report="$output_dir/report.txt"
|
|
schedule="$output_dir/schedule.txt"
|
|
|
|
echo "===== Benchmark ====="
|
|
|
|
# Run!!!
|
|
IFS=',' read -a jobs <<< $1
|
|
for job in ${jobs[@]}; do
|
|
|
|
if [ $job != debug ]; then
|
|
echo "Start $job at `date`" | tee -a $schedule
|
|
fi
|
|
|
|
start=$(now)
|
|
if [ $job = fillseq ]; then
|
|
run_fillseq
|
|
elif [ $job = overwrite ]; then
|
|
run_change overwrite
|
|
elif [ $job = readrandom ]; then
|
|
run_readrandom
|
|
elif [ $job = readwhilewriting ]; then
|
|
run_readwhile writing
|
|
elif [ $job = debug ]; then
|
|
num_keys=1000; # debug
|
|
echo "Setting num_keys to $num_keys"
|
|
else
|
|
echo "unknown job $job"
|
|
exit
|
|
fi
|
|
end=$(now)
|
|
|
|
if [ $job != debug ]; then
|
|
echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
|
|
fi
|
|
|
|
echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest"
|
|
tail -1 $output_dir/report.txt
|
|
|
|
done
|
|
|