Enhance db_bench write rate limit

Summary:
1) changes tools/{benchmark,run_flash_bench}.sh to optionally use the write rate limit
2) removes code for --writes_per_second and switches the 'background' write rate limit
to use --benchmark_write_rate_limit

Replaces https://reviews.facebook.net/D49113

Task ID: #9555881

Blame Rev:

Test Plan:
tools/run_flash_bench.sh

Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -

Reviewers: igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D52485
main
Mark Callaghan 9 years ago
parent 3993432050
commit 4041903ecd
  1. 6
      build_tools/regression_build_test.sh
  2. 60
      db/db_bench.cc
  3. 6
      tools/benchmark.sh
  4. 41
      tools/run_flash_bench.sh

@ -243,7 +243,7 @@ make release
--bloom_bits=10 \ --bloom_bits=10 \
--num=$((NUM / 4)) \ --num=$((NUM / 4)) \
--reads=$((NUM / 4)) \ --reads=$((NUM / 4)) \
--writes_per_second=1000 \ --benchmark_write_rate_limit=$(( 110 * 1024 )) \
--write_buffer_size=100000000 \ --write_buffer_size=100000000 \
--cache_size=6442450944 \ --cache_size=6442450944 \
--cache_numshardbits=6 \ --cache_numshardbits=6 \
@ -329,7 +329,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--use_existing_db=1 \ --use_existing_db=1 \
--duration=600 \ --duration=600 \
--threads=32 \ --threads=32 \
--writes_per_second=81920 > ${STAT_FILE}.readwhilewriting_in_ram --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram
# Seekrandomwhilewriting # Seekrandomwhilewriting
./db_bench \ ./db_bench \
@ -342,7 +342,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--use_tailing_iterator=1 \ --use_tailing_iterator=1 \
--duration=600 \ --duration=600 \
--threads=32 \ --threads=32 \
--writes_per_second=81920 > ${STAT_FILE}.seekwhilewriting_in_ram --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram
# measure fillseq with bunch of column families # measure fillseq with bunch of column families
./db_bench \ ./db_bench \

@ -420,10 +420,6 @@ static class std::shared_ptr<rocksdb::Statistics> dbstats;
DEFINE_int64(writes, -1, "Number of write operations to do. If negative, do" DEFINE_int64(writes, -1, "Number of write operations to do. If negative, do"
" --num reads."); " --num reads.");
DEFINE_int32(writes_per_second, 0, "Per-thread rate limit on writes and merges "
" per second. No limit when <= 0. Only for the readwhilewriting "
" and readwhilemerging tests.");
DEFINE_bool(sync, false, "Sync all writes to disk"); DEFINE_bool(sync, false, "Sync all writes to disk");
DEFINE_bool(disable_data_sync, false, "If true, do not wait until data is" DEFINE_bool(disable_data_sync, false, "If true, do not wait until data is"
@ -668,7 +664,8 @@ DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value.");
DEFINE_uint64( DEFINE_uint64(
benchmark_write_rate_limit, 0, benchmark_write_rate_limit, 0,
"If non-zero, db_bench will rate-limit the writes going into RocksDB"); "If non-zero, db_bench will rate-limit the writes going into RocksDB. This "
"is the global rate in bytes/second.");
DEFINE_int32(max_grandparent_overlap_factor, 10, "Control maximum bytes of " DEFINE_int32(max_grandparent_overlap_factor, 10, "Control maximum bytes of "
"overlaps in grandparent (i.e., level+2) before we stop building a" "overlaps in grandparent (i.e., level+2) before we stop building a"
@ -1312,6 +1309,11 @@ class Stats {
} }
} }
void ResetLastOpTime() {
// Set to now to avoid latency from calls to SleepForMicroseconds
last_op_finish_ = FLAGS_env->NowMicros();
}
void FinishedOps(DBWithColumnFamilies* db_with_cfh, DB* db, int64_t num_ops, void FinishedOps(DBWithColumnFamilies* db_with_cfh, DB* db, int64_t num_ops,
enum OperationType op_type = kOthers) { enum OperationType op_type = kOthers) {
if (reporter_agent_) { if (reporter_agent_) {
@ -1633,7 +1635,8 @@ class Benchmark {
(((FLAGS_key_size + FLAGS_value_size * FLAGS_compression_ratio) (((FLAGS_key_size + FLAGS_value_size * FLAGS_compression_ratio)
* num_) * num_)
/ 1048576.0)); / 1048576.0));
fprintf(stdout, "Writes per second: %d\n", FLAGS_writes_per_second); fprintf(stdout, "Write rate: %" PRIu64 " bytes/second\n",
FLAGS_benchmark_write_rate_limit);
if (FLAGS_enable_numa) { if (FLAGS_enable_numa) {
fprintf(stderr, "Running in NUMA enabled mode.\n"); fprintf(stderr, "Running in NUMA enabled mode.\n");
#ifndef NUMA #ifndef NUMA
@ -2845,14 +2848,22 @@ class Benchmark {
} }
} }
} }
size_t id = thread->rand.Next() % num_key_gens; size_t id = thread->rand.Next() % num_key_gens;
DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id); DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id);
batch.Clear(); batch.Clear();
if (thread->shared->write_rate_limiter.get() != nullptr) {
thread->shared->write_rate_limiter->Request(
entries_per_batch_ * (value_size_ + key_size_),
Env::IO_HIGH);
// Set time at which last op finished to Now() to hide latency and
// sleep from rate limiter. Also, do the check once per batch, not
// once per write.
thread->stats.ResetLastOpTime();
}
for (int64_t j = 0; j < entries_per_batch_; j++) { for (int64_t j = 0; j < entries_per_batch_; j++) {
if (thread->shared->write_rate_limiter.get() != nullptr) {
thread->shared->write_rate_limiter->Request(value_size_ + key_size_,
Env::IO_HIGH);
}
int64_t rand_num = key_gens[id]->Next(); int64_t rand_num = key_gens[id]->Next();
GenerateKeyFromInt(rand_num, FLAGS_num, &key); GenerateKeyFromInt(rand_num, FLAGS_num, &key);
if (FLAGS_num_column_families <= 1) { if (FLAGS_num_column_families <= 1) {
@ -3253,16 +3264,13 @@ class Benchmark {
void BGWriter(ThreadState* thread, enum OperationType write_merge) { void BGWriter(ThreadState* thread, enum OperationType write_merge) {
// Special thread that keeps writing until other threads are done. // Special thread that keeps writing until other threads are done.
RandomGenerator gen; RandomGenerator gen;
uint64_t last = FLAGS_env->NowMicros();
int writes_per_second_by_10 = 0;
int num_writes = 0;
int64_t bytes = 0; int64_t bytes = 0;
// --writes_per_second rate limit is enforced per 100 milliseconds std::unique_ptr<RateLimiter> write_rate_limiter;
// intervals to avoid a burst of writes at the start of each second. if (FLAGS_benchmark_write_rate_limit > 0) {
write_rate_limiter.reset(
if (FLAGS_writes_per_second > 0) NewGenericRateLimiter(FLAGS_benchmark_write_rate_limit));
writes_per_second_by_10 = FLAGS_writes_per_second / 10; }
// Don't merge stats from this thread with the readers. // Don't merge stats from this thread with the readers.
thread->stats.SetExcludeFromMerge(); thread->stats.SetExcludeFromMerge();
@ -3296,18 +3304,10 @@ class Benchmark {
bytes += key.size() + value_size_; bytes += key.size() + value_size_;
thread->stats.FinishedOps(&db_, db_.db, 1, kWrite); thread->stats.FinishedOps(&db_, db_.db, 1, kWrite);
++num_writes; if (FLAGS_benchmark_write_rate_limit > 0) {
if (writes_per_second_by_10 && num_writes >= writes_per_second_by_10) { write_rate_limiter->Request(
uint64_t now = FLAGS_env->NowMicros(); entries_per_batch_ * (value_size_ + key_size_),
uint64_t usecs_since_last = now - last; Env::IO_HIGH);
num_writes = 0;
last = now;
if (usecs_since_last < 100000) {
FLAGS_env->SleepForMicroseconds(static_cast<int>(100000 - usecs_since_last));
last = FLAGS_env->NowMicros();
}
} }
} }
thread->stats.AddBytes(bytes); thread->stats.AddBytes(bytes);

@ -37,8 +37,7 @@ if [ ! -z $DB_BENCH_NO_SYNC ]; then
fi fi
num_threads=${NUM_THREADS:-16} num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging mb_written_per_sec=${MB_WRITE_PER_SEC:-0}
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
# Only for tests that do range scans # Only for tests that do range scans
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
cache_size=${CACHE_SIZE:-$((1 * G))} cache_size=${CACHE_SIZE:-$((1 * G))}
@ -67,6 +66,7 @@ const_params="
--level_compaction_dynamic_level_bytes=true \ --level_compaction_dynamic_level_bytes=true \
--bytes_per_sync=$((8 * M)) \ --bytes_per_sync=$((8 * M)) \
--cache_index_and_filter_blocks=0 \ --cache_index_and_filter_blocks=0 \
--benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \
\ \
--hard_rate_limit=3 \ --hard_rate_limit=3 \
--rate_limit_delay_max_milliseconds=1000000 \ --rate_limit_delay_max_milliseconds=1000000 \
@ -231,7 +231,6 @@ function run_readwhile {
--sync=$syncval \ --sync=$syncval \
$params_w \ $params_w \
--threads=$num_threads \ --threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \ --merge_operator=\"put\" \
--seed=$( date +%s ) \ --seed=$( date +%s ) \
2>&1 | tee -a $output_dir/${out_name}" 2>&1 | tee -a $output_dir/${out_name}"
@ -251,7 +250,6 @@ function run_rangewhile {
--sync=$syncval \ --sync=$syncval \
$params_w \ $params_w \
--threads=$num_threads \ --threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \ --merge_operator=\"put\" \
--seek_nexts=$num_nexts_per_seek \ --seek_nexts=$num_nexts_per_seek \
--reverse_iterator=$reverse_arg \ --reverse_iterator=$reverse_arg \

@ -23,9 +23,13 @@
# #
# The environment variables are also optional. The variables are: # The environment variables are also optional. The variables are:
# NKEYS - number of key/value pairs to load # NKEYS - number of key/value pairs to load
# NWRITESPERSEC - the writes/second rate limit for the *whilewriting* tests. # BG_MBWRITEPERSEC - write rate limit in MB/second for tests in which
# there is one thread doing writes and stats are
# reported for read threads. "BG" stands for background.
# If this is too large then the non-writer threads can get # If this is too large then the non-writer threads can get
# starved. # starved. This is used for the "readwhile" tests.
# FG_MBWRITEPERSEC - write rate limit in MB/second for tests like overwrite
# where stats are reported for the write threads.
# NSECONDS - number of seconds for which to run each test in steps 2, # NSECONDS - number of seconds for which to run each test in steps 2,
# 3 and 4. There are currently 15 tests in those steps and # 3 and 4. There are currently 15 tests in those steps and
# they are repeated for each entry in list-of-threads so # they are repeated for each entry in list-of-threads so
@ -57,7 +61,10 @@ M=$((1024 * K))
G=$((1024 * M)) G=$((1024 * M))
num_keys=${NKEYS:-$((1 * G))} num_keys=${NKEYS:-$((1 * G))}
wps=${NWRITESPERSEC:-$((10 * K))} # write rate for readwhile... tests
bg_mbwps=${BG_MBWRITEPERSEC:-4}
# write rate for tests other than readwhile, 0 means no limit
fg_mbwps=${FG_MBWRITEPERSEC:-0}
duration=${NSECONDS:-$((60 * 60))} duration=${NSECONDS:-$((60 * 60))}
nps=${RANGE_LIMIT:-10} nps=${RANGE_LIMIT:-10}
vs=${VAL_SIZE:-400} vs=${VAL_SIZE:-400}
@ -178,29 +185,30 @@ done
for num_thr in "${nthreads[@]}" ; do for num_thr in "${nthreads[@]}" ; do
# Test 7: overwrite with sync=0 # Test 7: overwrite with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
./tools/benchmark.sh overwrite DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite
# Test 8: overwrite with sync=1 # Test 8: overwrite with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh overwrite env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
./tools/benchmark.sh overwrite
# Test 9: random update with sync=0 # Test 9: random update with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \
./tools/benchmark.sh updaterandom ./tools/benchmark.sh updaterandom
# Test 10: random update with sync=1 # Test 10: random update with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom
# Test 11: random read while writing # Test 11: random read while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilewriting DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilewriting
# Test 12: range scan while writing # Test 12: range scan while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting
# Test 13: reverse range scan while writing # Test 13: reverse range scan while writing
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting
done done
@ -208,22 +216,23 @@ done
for num_thr in "${nthreads[@]}" ; do for num_thr in "${nthreads[@]}" ; do
# Test 14: random merge with sync=0 # Test 14: random merge with sync=0
env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
./tools/benchmark.sh mergerandom DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh mergerandom
# Test 15: random merge with sync=1 # Test 15: random merge with sync=1
env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh mergerandom env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
./tools/benchmark.sh mergerandom
# Test 16: random read while merging # Test 16: random read while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging
# Test 17: range scan while merging # Test 17: range scan while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging
# Test 18: reverse range scan while merging # Test 18: reverse range scan while merging
env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging
done done

Loading…
Cancel
Save