Make the benchmark scripts configurable and add tests

Summary:
This makes run_flash_bench.sh configurable. Previously it was hardwired for 1B keys and tests
ran for 12 hours each. That kept me from using it. This makes it configuable, adds more tests,
makes the duration per-test configurable and refactors the test scripts.

Adds the seekrandomwhilemerging test to db_bench which is the same as seekrandomwhilewriting except
the writer thread does Merge rather than Put.

Forces the stall-time column in compaction IO stats to use a fixed format (H:M:S) which makes
it easier to scrape and parse. Also adds an option to AppendHumanMicros to force a fixed format.
Sometimes automation and humans want different format.

Calls thread->stats.AddBytes(bytes); in db_bench for more tests to get the MB/sec summary
stats in the output at test end.

Adds the average ingest rate to compaction IO stats. Output now looks like:
https://gist.github.com/mdcallag/2bd64d18be1b93adc494

More information on the benchmark output is at https://gist.github.com/mdcallag/db43a58bd5ac624f01e1

For benchmark.sh changes default RocksDB configuration to reduce stalls:
* min_level_to_compress from 2 to 3
* hard_rate_limit from 2 to 3
* max_grandparent_overlap_factor and max_bytes_for_level_multiplier from 10 to 8
* L0 file count triggers from 4,8,12 to 4,12,20 for (start,stall,stop)

Task ID: #6596829

Blame Rev:

Test Plan:
run tools/run_flash_bench.sh

Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -

Reviewers: igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D36075
main
Mark Callaghan 10 years ago
parent 2158e0f831
commit 99ec2412e5
  1. 44
      db/db_bench.cc
  2. 72
      db/internal_stats.cc
  3. 266
      tools/benchmark.sh
  4. 232
      tools/run_flash_bench.sh
  5. 11
      util/logging.cc
  6. 3
      util/logging.h
  7. 2
      util/thread_status_impl.cc

@ -71,6 +71,7 @@ DEFINE_string(benchmarks,
"newiteratorwhilewriting,"
"seekrandom,"
"seekrandomwhilewriting,"
"seekrandomwhilemerging,"
"readseq,"
"readreverse,"
"compact,"
@ -130,8 +131,12 @@ DEFINE_string(benchmarks,
"\treadrandommergerandom -- perform N random read-or-merge "
"operations. Must be used with merge_operator\n"
"\tnewiterator -- repeated iterator creation\n"
"\tseekrandom -- N random seeks\n"
"\tseekrandom -- 1 writer, N threads doing random seeks\n"
"\tseekrandom -- N random seeks, call Next seek_nexts times "
"per seek\n"
"\tseekrandomwhilewriting -- seekrandom and 1 thread doing "
"overwrite\n"
"\tseekrandomwhilemerging -- seekrandom and 1 thread doing "
"merge\n"
"\tcrc32c -- repeated crc32c of 4K of data\n"
"\txxhash -- repeated xxHash of 4K of data\n"
"\tacquireload -- load N*1000 times\n"
@ -182,7 +187,8 @@ DEFINE_int32(value_size, 100, "Size of each value");
DEFINE_int32(seek_nexts, 0,
"How many times to call Next() after Seek() in "
"fillseekseq and seekrandom");
"fillseekseq, seekrandom, seekrandomwhilewriting and "
"seekrandomwhilemerging");
DEFINE_bool(reverse_iterator, false,
"When true use Prev rather than Next for iterators that do "
@ -1590,6 +1596,9 @@ class Benchmark {
} else if (name == Slice("seekrandomwhilewriting")) {
num_threads++; // Add extra thread for writing
method = &Benchmark::SeekRandomWhileWriting;
} else if (name == Slice("seekrandomwhilemerging")) {
num_threads++; // Add extra thread for merging
method = &Benchmark::SeekRandomWhileMerging;
} else if (name == Slice("readrandomsmall")) {
reads_ /= 1000;
method = &Benchmark::ReadRandom;
@ -2535,6 +2544,7 @@ class Benchmark {
void ReadRandom(ThreadState* thread) {
int64_t read = 0;
int64_t found = 0;
int64_t bytes = 0;
ReadOptions options(FLAGS_verify_checksum, true);
std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard);
@ -2558,6 +2568,7 @@ class Benchmark {
}
if (s.ok()) {
found++;
bytes += key.size() + value.size();
} else if (!s.IsNotFound()) {
fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
abort();
@ -2569,6 +2580,7 @@ class Benchmark {
snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)\n",
found, read);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);
if (FLAGS_perf_level > 0) {
@ -2640,6 +2652,7 @@ class Benchmark {
void SeekRandom(ThreadState* thread) {
int64_t read = 0;
int64_t found = 0;
int64_t bytes = 0;
ReadOptions options(FLAGS_verify_checksum, true);
options.tailing = FLAGS_use_tailing_iterator;
@ -2696,6 +2709,7 @@ class Benchmark {
Slice value = iter_to_use->value();
memcpy(value_buffer, value.data(),
std::min(value.size(), sizeof(value_buffer)));
bytes += iter_to_use->key().size() + iter_to_use->value().size();
if (!FLAGS_reverse_iterator) {
iter_to_use->Next();
@ -2715,6 +2729,7 @@ class Benchmark {
char msg[100];
snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)\n",
found, read);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);
if (FLAGS_perf_level > 0) {
thread->stats.AddMessage(perf_context.ToString());
@ -2729,6 +2744,14 @@ class Benchmark {
}
}
void SeekRandomWhileMerging(ThreadState* thread) {
if (thread->tid > 0) {
SeekRandom(thread);
} else {
BGWriter(thread, kMerge);
}
}
void DoDelete(ThreadState* thread, bool seq) {
WriteBatch batch;
Duration duration(seq ? 0 : FLAGS_duration, num_);
@ -2784,6 +2807,7 @@ class Benchmark {
double last = FLAGS_env->NowMicros();
int writes_per_second_by_10 = 0;
int num_writes = 0;
int64_t bytes = 0;
// --writes_per_second rate limit is enforced per 100 milliseconds
// intervals to avoid a burst of writes at the start of each second.
@ -2820,6 +2844,7 @@ class Benchmark {
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
exit(1);
}
bytes += key.size() + value_size_;
thread->stats.FinishedOps(&db_, db_.db, 1);
++num_writes;
@ -2836,6 +2861,7 @@ class Benchmark {
}
}
}
thread->stats.AddBytes(bytes);
}
// Given a key K and value V, this puts (K+"0", V), (K+"1", V), (K+"2", V)
@ -3054,6 +3080,7 @@ class Benchmark {
RandomGenerator gen;
std::string value;
int64_t found = 0;
int64_t bytes = 0;
Duration duration(FLAGS_duration, readwrites_);
std::unique_ptr<const char[]> key_guard;
@ -3066,6 +3093,7 @@ class Benchmark {
auto status = db->Get(options, key, &value);
if (status.ok()) {
++found;
bytes += key.size() + value.size();
} else if (!status.IsNotFound()) {
fprintf(stderr, "Get returned an error: %s\n",
status.ToString().c_str());
@ -3077,11 +3105,13 @@ class Benchmark {
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
exit(1);
}
bytes += key.size() + value_size_;
thread->stats.FinishedOps(nullptr, db, 1);
}
char msg[100];
snprintf(msg, sizeof(msg),
"( updates:%" PRIu64 " found:%" PRIu64 ")", readwrites_, found);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);
}
@ -3093,6 +3123,7 @@ class Benchmark {
RandomGenerator gen;
std::string value;
int64_t found = 0;
int64_t bytes = 0;
std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard);
@ -3105,6 +3136,7 @@ class Benchmark {
auto status = db->Get(options, key, &value);
if (status.ok()) {
++found;
bytes += key.size() + value.size();
} else if (!status.IsNotFound()) {
fprintf(stderr, "Get returned an error: %s\n",
status.ToString().c_str());
@ -3128,12 +3160,14 @@ class Benchmark {
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
exit(1);
}
bytes += key.size() + value.size();
thread->stats.FinishedOps(nullptr, db, 1);
}
char msg[100];
snprintf(msg, sizeof(msg), "( updates:%" PRIu64 " found:%" PRIu64 ")",
readwrites_, found);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);
}
@ -3149,7 +3183,7 @@ class Benchmark {
// FLAGS_merge_keys.
void MergeRandom(ThreadState* thread) {
RandomGenerator gen;
int64_t bytes = 0;
std::unique_ptr<const char[]> key_guard;
Slice key = AllocateKey(&key_guard);
// The number of iterations is the larger of read_ or write_
@ -3164,12 +3198,14 @@ class Benchmark {
fprintf(stderr, "merge error: %s\n", s.ToString().c_str());
exit(1);
}
bytes += key.size() + value_size_;
thread->stats.FinishedOps(nullptr, db, 1);
}
// Print some statistics
char msg[100];
snprintf(msg, sizeof(msg), "( updates:%" PRIu64 ")", readwrites_);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);
}

@ -13,6 +13,7 @@
#endif
#include <inttypes.h>
#include <algorithm>
#include <vector>
#include "db/column_family.h"
@ -408,22 +409,31 @@ void InternalStats::DumpDBStats(std::string* value) {
// writes/batches is the average group commit size.
//
// The format is the same for interval stats.
AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen);
snprintf(buf, sizeof(buf),
"Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
" batches, %.1f writes per batch, %.2f GB user ingest, "
"stall time: %s\n",
write_other + write_self, num_keys_written, write_self,
"Cumulative writes: %s writes, %s keys, %s batches, "
"%.1f writes per batch, ingest: %.2f GB, %.2f MB/s\n",
NumberToHumanString(write_other + write_self).c_str(),
NumberToHumanString(num_keys_written).c_str(),
NumberToHumanString(write_self).c_str(),
(write_other + write_self) / static_cast<double>(write_self + 1),
user_bytes_written / kGB, human_micros);
user_bytes_written / kGB, user_bytes_written / kMB / seconds_up);
value->append(buf);
// WAL
snprintf(buf, sizeof(buf),
"Cumulative WAL: %" PRIu64 " writes, %" PRIu64 " syncs, "
"%.2f writes per sync, %.2f GB written\n",
write_with_wal, wal_synced,
"Cumulative WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f GB, %.2f MB/s\n",
NumberToHumanString(write_with_wal).c_str(),
NumberToHumanString(wal_synced).c_str(),
write_with_wal / static_cast<double>(wal_synced + 1),
wal_bytes / kGB);
wal_bytes / kGB, wal_bytes / kMB / seconds_up);
value->append(buf);
// Stall
AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true);
snprintf(buf, sizeof(buf),
"Cumulative stall: %s, %.1f percent\n",
human_micros,
// 10000 = divide by 1M to get secs, then multiply by 100 for pct
write_stall_micros / 10000.0 / std::max(seconds_up, 0.001));
value->append(buf);
// Interval
@ -431,19 +441,18 @@ void InternalStats::DumpDBStats(std::string* value) {
uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
uint64_t interval_num_keys_written =
num_keys_written - db_stats_snapshot_.num_keys_written;
AppendHumanMicros(
write_stall_micros - db_stats_snapshot_.write_stall_micros,
human_micros, kHumanMicrosLen);
snprintf(buf, sizeof(buf),
"Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
" batches, %.1f writes per batch, %.1f MB user ingest, "
"stall time: %s\n",
interval_write_other + interval_write_self,
interval_num_keys_written, interval_write_self,
"Interval writes: %s writes, %s keys, %s batches, "
"%.1f writes per batch, ingest: %.2f MB, %.2f MB/s\n",
NumberToHumanString(
interval_write_other + interval_write_self).c_str(),
NumberToHumanString(interval_num_keys_written).c_str(),
NumberToHumanString(interval_write_self).c_str(),
static_cast<double>(interval_write_other + interval_write_self) /
(interval_write_self + 1),
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
human_micros);
(user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB /
std::max(interval_seconds_up, 0.001)),
value->append(buf);
uint64_t interval_write_with_wal =
@ -452,13 +461,26 @@ void InternalStats::DumpDBStats(std::string* value) {
uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;
snprintf(buf, sizeof(buf),
"Interval WAL: %" PRIu64 " writes, %" PRIu64 " syncs, "
"%.2f writes per sync, %.2f MB written\n",
interval_write_with_wal,
interval_wal_synced,
"Interval WAL: %s writes, %s syncs, "
"%.2f writes per sync, written: %.2f MB, %.2f MB/s\n",
NumberToHumanString(interval_write_with_wal).c_str(),
NumberToHumanString(interval_wal_synced).c_str(),
interval_write_with_wal /
static_cast<double>(interval_wal_synced + 1),
interval_wal_bytes / kGB);
interval_wal_bytes / kGB,
interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001));
value->append(buf);
// Stall
AppendHumanMicros(
write_stall_micros - db_stats_snapshot_.write_stall_micros,
human_micros, kHumanMicrosLen, true);
snprintf(buf, sizeof(buf),
"Interval stall: %s, %.1f percent\n",
human_micros,
// 10000 = divide by 1M to get secs, then multiply by 100 for pct
(write_stall_micros - db_stats_snapshot_.write_stall_micros) /
10000.0 / std::max(interval_seconds_up, 0.001));
value->append(buf);
db_stats_snapshot_.seconds_up = seconds_up;
@ -557,7 +579,7 @@ void InternalStats::DumpCFStats(std::string* value) {
value->append(buf);
snprintf(buf, sizeof(buf),
"Flush(GB): accumulative %.3f, interval %.3f\n",
"Flush(GB): cumulative %.3f, interval %.3f\n",
curr_ingest / kGB, interval_ingest / kGB);
value->append(buf);

@ -35,21 +35,24 @@ if [ ! -z $DB_BENCH_NO_SYNC ]; then
syncval="0";
fi
num_read_threads=${NUM_READ_THREADS:-16}
# Only for readwhilewriting, readwhilemerging
writes_per_second=${WRITES_PER_SEC:-$((80 * K))}
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} # (only for rangescanwhilewriting)
cache_size=$((1 * G))
num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
# Only for tests that do range scans
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
cache_size=${CACHE_SIZE:-$((1 * G))}
duration=${DURATION:-0}
num_keys=${NUM_KEYS:-$((1 * G))}
key_size=20
value_size=800
value_size=${VALUE_SIZE:-400}
const_params="
--db=$DB_DIR \
--wal_dir=$WAL_DIR \
--disable_data_sync=0 \
\
--num=$num_keys \
--num_levels=6 \
--key_size=$key_size \
--value_size=$value_size \
@ -57,10 +60,10 @@ const_params="
--cache_size=$cache_size \
--cache_numshardbits=6 \
--compression_type=zlib \
--min_level_to_compress=2 \
--min_level_to_compress=3 \
--compression_ratio=0.5 \
\
--hard_rate_limit=2 \
--hard_rate_limit=3 \
--rate_limit_delay_max_milliseconds=1000000 \
--write_buffer_size=$((128 * M)) \
--max_write_buffer_number=3 \
@ -69,7 +72,8 @@ const_params="
\
--verify_checksum=1 \
--delete_obsolete_files_period_micros=$((60 * M)) \
--max_grandparent_overlap_factor=10 \
--max_grandparent_overlap_factor=8 \
--max_bytes_for_level_multiplier=8 \
\
--statistics=1 \
--stats_per_interval=1 \
@ -82,158 +86,173 @@ const_params="
l0_config="
--level0_file_num_compaction_trigger=4 \
--level0_slowdown_writes_trigger=8 \
--level0_stop_writes_trigger=12"
--level0_slowdown_writes_trigger=12 \
--level0_stop_writes_trigger=20"
if [ $duration -gt 0 ]; then
const_params="$const_params --duration=$duration"
fi
params_r="$const_params $l0_config --max_background_compactions=4 --max_background_flushes=1"
params_w="$const_params $l0_config --max_background_compactions=16 --max_background_flushes=16"
params_bulkload="$const_params --max_background_compactions=16 --max_background_flushes=16 \
--level0_file_num_compaction_trigger=$((10 * M)) \
--level0_slowdown_writes_trigger=$((10 * M)) \
--level0_stop_writes_trigger=$((10 * M))"
function summarize_result {
test_out=$1
test_name=$2
bench_name=$3
uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' )
stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' )
stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' )
ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' )
mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' )
lo_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $8 }' )
sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $8 }' )
wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc )
wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc )
usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
p50=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $3 }' )
p75=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $5 }' )
p99=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $7 }' )
echo -e "$ops_sec\t$mb_sec\t$lo_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$uptime\t$stall_time\t$stall_pct\t$test_name" \
>> $output_dir/report.txt
}
function run_bulkload {
echo "Bulk loading $num_keys random keys into database..."
cmd="./db_bench $params_bulkload --benchmarks=fillrandom \
echo "Bulk loading $num_keys random keys"
cmd="./db_bench --benchmarks=fillrandom \
--use_existing_db=0 \
--num=$num_keys \
--disable_auto_compactions=1 \
--sync=0 \
--disable_data_sync=0 \
--threads=1 2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
$params_bulkload \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log
eval $cmd
summarize_result $output_dir/benchmark_bulkload_fillrandom.log bulkload fillrandom
echo "Compacting..."
cmd="./db_bench $params_w --benchmarks=compact \
cmd="./db_bench --benchmarks=compact \
--use_existing_db=1 \
--num=$num_keys \
--disable_auto_compactions=1 \
--sync=0 \
--disable_data_sync=0 \
--threads=1 2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
echo $cmd | tee $output_dir/benchmark_bulkload_compact.log
eval $cmd
}
function run_fillseq {
echo "Loading $num_keys keys sequentially into database..."
cmd="./db_bench $params_w --benchmarks=fillseq \
echo "Loading $num_keys keys sequentially"
cmd="./db_bench --benchmarks=fillseq \
--use_existing_db=0 \
--num=$num_keys \
--sync=0 \
--disable_data_sync=0 \
--threads=1 2>&1 | tee -a $output_dir/benchmark_fillseq.log"
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_fillseq.log"
echo $cmd | tee $output_dir/benchmark_fillseq.log
eval $cmd
summarize_result $output_dir/benchmark_fillseq.log fillseq fillseq
}
function run_overwrite {
echo "Loading $num_keys keys sequentially into database..."
cmd="./db_bench $params_w --benchmarks=overwrite \
function run_change {
operation=$1
echo "Do $num_keys random $operation"
out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
cmd="./db_bench --benchmarks=$operation \
--use_existing_db=1 \
--num=$num_keys \
--sync=0 \
--disable_data_sync=0 \
--threads=1 2>&1 | tee -a $output_dir/benchmark_overwrite.log"
echo $cmd | tee $output_dir/benchmark_overwrite.log
--sync=$syncval \
$params_w \
--threads=$num_threads \
--merge_operator=\"put\" \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation
}
function run_filluniquerandom {
echo "Loading $num_keys unique keys randomly into database..."
cmd="./db_bench $params_w --benchmarks=filluniquerandom \
echo "Loading $num_keys unique keys randomly"
cmd="./db_bench --benchmarks=filluniquerandom \
--use_existing_db=0 \
--num=$num_keys \
--sync=0 \
--disable_data_sync=0 \
--threads=1 2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
echo $cmd | tee $output_dir/benchmark_filluniquerandom.log
eval $cmd
summarize_result $output_dir/benchmark_filluniquerandom.log filluniquerandom filluniquerandom
}
function run_readrandom {
echo "Reading $num_keys random keys from database..."
cmd="./db_bench $params_r --benchmarks=readrandom \
echo "Reading $num_keys random keys"
out_name="benchmark_readrandom.t${num_threads}.log"
cmd="./db_bench --benchmarks=readrandom \
--use_existing_db=1 \
--num=$num_keys \
--threads=$num_read_threads \
--disable_auto_compactions=1 \
2>&1 | tee -a $output_dir/benchmark_readrandom.log"
echo $cmd | tee $output_dir/benchmark_readrandom.log
eval $cmd
}
function run_readwhilewriting {
echo "Reading $num_keys random keys from database while writing.."
cmd="./db_bench $params_r --benchmarks=readwhilewriting \
--use_existing_db=1 \
--num=$num_keys \
--sync=$syncval \
--disable_data_sync=0 \
--threads=$num_read_threads \
--writes_per_second=$writes_per_second \
2>&1 | tee -a $output_dir/benchmark_readwhilewriting.log"
echo $cmd | tee $output_dir/benchmark_readwhilewriting.log
$params_w \
--threads=$num_threads \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom
}
function run_readwhilemerging {
echo "Reading $num_keys random keys from database while merging.."
cmd="./db_bench $params_r --benchmarks=readwhilemerging \
function run_readwhile {
operation=$1
echo "Reading $num_keys random keys while $operation"
out_name="benchmark_readwhile${operation}.t${num_threads}.log"
cmd="./db_bench --benchmarks=readwhile${operation} \
--use_existing_db=1 \
--num=$num_keys \
--sync=$syncval \
--disable_data_sync=0 \
--threads=$num_read_threads \
$params_w \
--threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \
2>&1 | tee -a $output_dir/benchmark_readwhilemerging.log"
echo $cmd | tee $output_dir/benchmark_readwhilemerging.log
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation}
}
function run_rangescanwhilewriting {
echo "Range scan $num_keys random keys from database while writing.."
cmd="./db_bench $params_r --benchmarks=seekrandomwhilewriting \
function run_rangewhile {
operation=$1
full_name=$2
reverse_arg=$3
out_name="benchmark_${full_name}.t${num_threads}.log"
echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}"
cmd="./db_bench --benchmarks=seekrandomwhile${operation} \
--use_existing_db=1 \
--num=$num_keys \
--sync=$syncval \
--disable_data_sync=0 \
--threads=$num_read_threads \
$params_w \
--threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \
--seek_nexts=$num_nexts_per_seek \
2>&1 | tee -a $output_dir/benchmark_rangescanwhilewriting.log"
echo $cmd | tee $output_dir/benchmark_rangescanwhilewriting.log
eval $cmd
}
function run_updaterandom {
echo "Read/Modify/Write $num_keys random keys (not using merge).."
cmd="./db_bench $params_w --benchmarks=updaterandom \
--use_existing_db=1 \
--num=$num_keys \
--sync=$syncval \
--disable_data_sync=0 \
--threads=$num_read_threads 2>&1 | tee -a $output_dir/benchmark_updaterandom.log"
echo $cmd | tee $output_dir/benchmark_updaterandom.log
--reverse_iterator=$reverse_arg \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandomwhile${operation}
}
function run_mergerandom {
echo "Read/Modify/Write $num_keys random keys (using merge operator).."
cmd="./db_bench $params_w --benchmarks=mergerandom \
function run_range {
full_name=$1
reverse_arg=$2
out_name="benchmark_${full_name}.t${num_threads}.log"
echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}"
cmd="./db_bench --benchmarks=seekrandom \
--use_existing_db=1 \
--num=$num_keys \
--sync=$syncval \
--disable_data_sync=0 \
--merge_operator=\"put\" \
--threads=$num_read_threads 2>&1 | tee -a $output_dir/benchmark_mergerandom.log"
echo $cmd | tee $output_dir/benchmark_mergerandom.log
$params_w \
--threads=$num_threads \
--seek_nexts=$num_nexts_per_seek \
--reverse_iterator=$reverse_arg \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandom
}
function now() {
@ -241,6 +260,7 @@ function now() {
}
report="$output_dir/report.txt"
schedule="$output_dir/schedule.txt"
echo "===== Benchmark ====="
@ -249,7 +269,7 @@ IFS=',' read -a jobs <<< $1
for job in ${jobs[@]}; do
if [ $job != debug ]; then
echo "Start $job at `date`" | tee -a $report
echo "Start $job at `date`" | tee -a $schedule
fi
start=$(now)
@ -258,21 +278,31 @@ for job in ${jobs[@]}; do
elif [ $job = fillseq ]; then
run_fillseq
elif [ $job = overwrite ]; then
run_overwrite
run_change overwrite
elif [ $job = updaterandom ]; then
run_change updaterandom
elif [ $job = mergerandom ]; then
run_change mergerandom
elif [ $job = filluniquerandom ]; then
run_filluniquerandom
elif [ $job = readrandom ]; then
run_readrandom
elif [ $job = fwdrange ]; then
run_range $job false
elif [ $job = revrange ]; then
run_range $job true
elif [ $job = readwhilewriting ]; then
run_readwhilewriting
run_readwhile writing
elif [ $job = readwhilemerging ]; then
run_readwhilemerging
elif [ $job = rangescanwhilewriting ]; then
run_rangescanwhilewriting
elif [ $job = updaterandom ]; then
run_updaterandom
elif [ $job = mergerandom ]; then
run_mergerandom
run_readwhile merging
elif [ $job = fwdrangewhilewriting ]; then
run_rangewhile writing $job false
elif [ $job = revrangewhilewriting ]; then
run_rangewhile writing $job true
elif [ $job = fwdrangewhilemerging ]; then
run_rangewhile merging $job false
elif [ $job = revrangewhilemerging ]; then
run_rangewhile merging $job true
elif [ $job = debug ]; then
num_keys=1000; # debug
echo "Setting num_keys to $num_keys"
@ -283,24 +313,10 @@ for job in ${jobs[@]}; do
end=$(now)
if [ $job != debug ]; then
echo "Complete $job in $((end-start)) seconds" | tee -a $report
echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
fi
if [[ $job == readrandom || $job == readwhilewriting \
|| $job == rangescanwhilewriting || $job == updaterandom \
|| $job == mergerandom || $job == readwhilemerging ]]; then
lat=$(grep "micros\/op" "$output_dir/benchmark_$job.log" \
| grep "ops\/sec" | awk '{print $3}')
qps=$(grep "micros\/op" "$output_dir/benchmark_$job.log" \
| grep "ops\/sec" | awk '{print $5}')
line=$(grep "rocksdb.db.get.micros" "$output_dir/benchmark_$job.log")
p50=$(echo $line | awk '{print $7}')
p99=$(echo $line | awk '{print $13}')
print_percentile=$(echo "$p50 != 0 || $p99 != 0" | bc);
if [ "$print_percentile" == "1" ]; then
echo "Read latency p50 = $p50 us, p99 = $p99 us" | tee -a $report
fi
echo "QPS = $qps ops/sec" | tee -a $report
echo "Avg Latency = $lat micros/op " | tee -a $report
fi
echo -e "ops/sec\tmb/sec\tL0_MB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tUptime\tStall-time\tStall%\tTest"
tail -1 $output_dir/report.txt
done

@ -3,50 +3,220 @@
# After execution of this script, log files are generated in $output_dir.
# report.txt provides a high level statistics
# This should be run from the parent of the tools directory. The command line is:
# [$env_vars] tools/run_flash_bench.sh [list-of-threads]
#
# This runs a sequence of tests in the following sequence:
# step 1) load - bulkload, compact, fillseq, overwrite
# step 2) read-only for each number of threads
# step 3) read-write for each number of threads
# step 4) merge for each number of threads
#
# The list of threads is optional and when not set is equivalent to "24".
# Were list-of-threads specified as "1 2 4" then the tests in steps 2, 3 and
# 4 above would be repeated for 1, 2 and 4 threads. The tests in step 1 are
# only run for 1 thread.
# Test output is written to $OUTPUT_DIR, currently /tmp/output. The performance
# summary is in $OUTPUT_DIR/report.txt. There is one file in $OUTPUT_DIR per
# test and the tests are listed below.
#
# The environment variables are also optional. The variables are:
# NKEYS - number of key/value pairs to load
# NWRITESPERSEC - the writes/second rate limit for the *whilewriting* tests.
# If this is too large then the non-writer threads can get
# starved.
# NSECONDS - number of seconds for which to run each test in steps 2,
# 3 and 4. There are currently 15 tests in those steps and
# they are repeated for each entry in list-of-threads so
# this variable lets you control the total duration to
# finish the benchmark.
# RANGE_LIMIT - the number of rows to read per range query for tests that
# do range queries.
# VAL_SIZE - the length of the value in the key/value pairs loaded.
# You can estimate the size of the test database from this,
# NKEYS and the compression rate (--compression_ratio) set
# in tools/benchmark.sh
# CACHE_BYTES - the size of the RocksDB block cache in bytes
# DATA_DIR - directory in which to create database files
# LOG_DIR - directory in which to create WAL files, may be the same
# as DATA_DIR
# DO_SETUP - when set to 0 then a backup of the database is copied from
# $DATA_DIR.bak to $DATA_DIR and the load tests from step 1
# The WAL directory is also copied from a backup if
# DATA_DIR != LOG_DIR. This allows tests from steps 2, 3, 4
# to be repeated faster.
# SAVE_SETUP - saves a copy of the database at the end of step 1 to
# $DATA_DIR.bak. When LOG_DIR != DATA_DIR then it is copied
# to $LOG_DIR.bak.
# Size constants
K=1024
M=$((1024 * K))
G=$((1024 * M))
n=$((1 * G))
wps=$((80 * K))
duration=$((12 * 60 * 60))
num_read_threads=24
num_keys=${NKEYS:-$((1 * G))}
wps=${NWRITESPERSEC:-$((10 * K))}
duration=${NSECONDS:-$((60 * 60))}
nps=${RANGE_LIMIT:-10}
vs=${VAL_SIZE:-400}
cs=${CACHE_BYTES:-$(( 1 * G ))}
# If no command line arguments then run for 24 threads.
if [[ $# -eq 0 ]]; then
nthreads=( 24 )
else
nthreads=( "$@" )
fi
for num_thr in "${nthreads[@]}" ; do
echo Will run for $num_thr threads
done
# Update these parameters before execution !!!
db_dir="/tmp/rocksdb/"
wal_dir="/tmp/rocksdb/"
db_dir=${DATA_DIR:-"/tmp/rocksdb/"}
wal_dir=${LOG_DIR:-"/tmp/rocksdb/"}
do_setup=${DO_SETUP:-1}
save_setup=${SAVE_SETUP:-0}
output_dir="/tmp/output"
ARGS="\
OUTPUT_DIR=$output_dir \
NUM_KEYS=$num_keys \
DB_DIR=$db_dir \
WAL_DIR=$wal_dir \
VALUE_SIZE=$vs \
CACHE_SIZE=$cs"
mkdir -p $output_dir
echo -e "ops/sec\tmb/sec\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tUptime\tStall-time\tStall%\tTest" \
> $output_dir/report.txt
# Notes on test sequence:
# step 1) Setup database via sequential fill followed by overwrite to fragment it.
# Done without setting DURATION to make sure that overwrite does $num_keys writes
# step 2) read-only tests for all levels of concurrency requested
# step 3) non read-only tests for all levels of concurrency requested
# step 4) merge tests for all levels of concurrency requested. These must come last.
###### Setup the database
if [[ $do_setup != 0 ]]; then
echo Doing setup
# Test 1: bulk load
env $ARGS ./tools/benchmark.sh bulkload
# Test 2: sequential fill
env $ARGS ./tools/benchmark.sh fillseq
# Test 3: single-threaded overwrite
env $ARGS NUM_THREADS=1 DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite
else
echo Restoring from backup
rm -rf $db_dir
if [ ! -d ${db_dir}.bak ]; then
echo Database backup does not exist at ${db_dir}.bak
exit -1
fi
echo Restore database from ${db_dir}.bak
cp -p -r ${db_dir}.bak $db_dir
if [[ $db_dir != $wal_dir ]]; then
rm -rf $wal_dir
if [ ! -d ${wal_dir}.bak ]; then
echo WAL backup does not exist at ${wal_dir}.bak
exit -1
fi
echo Restore WAL from ${wal_dir}.bak
cp -p -r ${wal_dir}.bak $wal_dir
fi
fi
if [[ $save_setup != 0 ]]; then
echo Save database to ${db_dir}.bak
cp -p -r $db_dir ${db_dir}.bak
if [[ $db_dir != $wal_dir ]]; then
echo Save WAL to ${wal_dir}.bak
cp -p -r $wal_dir ${wal_dir}.bak
fi
fi
###### Read-only tests
for num_thr in "${nthreads[@]}" ; do
# Test 4: random read
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh readrandom
# Test 5: random range scans
env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \
./tools/benchmark.sh fwdrange
# Test 6: random reverse range scans
env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \
./tools/benchmark.sh revrange
done
###### Non read-only tests
for num_thr in "${nthreads[@]}" ; do
# Test 7: overwrite with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \
./tools/benchmark.sh overwrite
# Test 8: overwrite with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh overwrite
# Test 9: random update with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \
./tools/benchmark.sh updaterandom
# Test 10: random update with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom
# Test 11: random read while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
./tools/benchmark.sh readwhilewriting
# Test 12: range scan while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting
# Test 13: reverse range scan while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting
done
# Test 1: bulk load
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
./benchmark.sh bulkload
###### Merge tests
# Test 2: sequential fill
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
./benchmark.sh fillseq
for num_thr in "${nthreads[@]}" ; do
# Test 14: random merge with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \
./tools/benchmark.sh mergerandom
# Test 3: overwrite
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
./benchmark.sh overwrite
# Test 15: random merge with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh mergerandom
# Prepare: populate DB with random data
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
./benchmark.sh filluniquerandom
# Test 16: random read while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
./tools/benchmark.sh readwhilemerging
# Test 4: random read
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
DURATION=$duration NUM_READ_THREADS=$num_read_threads \
./benchmark.sh readrandom
# Test 17: range scan while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging
# Test 5: random read while writing
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
DURATION=$duration NUM_READ_THREADS=$num_read_threads WRITES_PER_SECOND=$wps \
./benchmark.sh readwhilewriting
# Test 18: reverse range scan while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \
NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging
done
# Test 6: random seek + next()'s while writing
OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \
DURATION=$duration NUM_READ_THREADS=$num_read_threads WRITES_PER_SECOND=$wps \
NUM_NEXTS_PER_SEEK=10 \
./benchmark.sh rangescanwhilewriting
cat $output_dir/report.txt

@ -29,16 +29,17 @@ namespace rocksdb {
// for micros >= 10 sec, print "XX sec".
// for micros <= 1 hour, print Y:X M:S".
// for micros > 1 hour, print Z:Y:X H:M:S".
int AppendHumanMicros(uint64_t micros, char* output, int len) {
if (micros < 10000) {
int AppendHumanMicros(uint64_t micros, char* output, int len,
bool fixed_format) {
if (micros < 10000 && !fixed_format) {
return snprintf(output, len, "%" PRIu64 " us", micros);
} else if (micros < 10000000) {
} else if (micros < 10000000 && !fixed_format) {
return snprintf(output, len, "%.3lf ms",
static_cast<double>(micros) / 1000);
} else if (micros < 1000000l * 60) {
} else if (micros < 1000000l * 60 && !fixed_format) {
return snprintf(output, len, "%.3lf sec",
static_cast<double>(micros) / 1000000);
} else if (micros < 1000000ll * 60 * 60) {
} else if (micros < 1000000ll * 60 * 60 && !fixed_format) {
return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S",
micros / 1000000 / 60,
static_cast<double>(micros % 60000000) / 1000000);

@ -21,7 +21,8 @@ namespace rocksdb {
class Slice;
// Append a human-readable time in micros.
int AppendHumanMicros(uint64_t micros, char* output, int len);
int AppendHumanMicros(uint64_t micros, char* output, int len,
bool fixed_format);
// Append a human-readable size in bytes
int AppendHumanBytes(uint64_t bytes, char* output, int len);

@ -40,7 +40,7 @@ const std::string ThreadStatus::MicrosToString(uint64_t micros) {
}
const int kBufferLen = 100;
char buffer[kBufferLen];
AppendHumanMicros(micros, buffer, kBufferLen);
AppendHumanMicros(micros, buffer, kBufferLen, false);
return std::string(buffer);
}

Loading…
Cancel
Save