From 4041903ecd97cb7a956c12b0b97971f159e2bc59 Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Mon, 4 Jan 2016 12:01:27 -0800 Subject: [PATCH] Enhance db_bench write rate limit Summary: 1) changes tools/{benchmark,run_flash_bench}.sh to optionally use the write rate limit 2) removes code for --writes_per_second and switches the 'background' write rate limit to use --benchmark_write_rate_limit Replaces https://reviews.facebook.net/D49113 Task ID: #9555881 Blame Rev: Test Plan: tools/run_flash_bench.sh Revert Plan: Database Impact: Memcache Impact: Other Notes: EImportant: - begin *PUBLIC* platform impact section - Bugzilla: # - end platform impact - Reviewers: igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D52485 --- build_tools/regression_build_test.sh | 6 +-- db/db_bench.cc | 60 ++++++++++++++-------------- tools/benchmark.sh | 6 +-- tools/run_flash_bench.sh | 41 +++++++++++-------- 4 files changed, 60 insertions(+), 53 deletions(-) diff --git a/build_tools/regression_build_test.sh b/build_tools/regression_build_test.sh index ee2d334f0..8ac1ceece 100755 --- a/build_tools/regression_build_test.sh +++ b/build_tools/regression_build_test.sh @@ -243,7 +243,7 @@ make release --bloom_bits=10 \ --num=$((NUM / 4)) \ --reads=$((NUM / 4)) \ - --writes_per_second=1000 \ + --benchmark_write_rate_limit=$(( 110 * 1024 )) \ --write_buffer_size=100000000 \ --cache_size=6442450944 \ --cache_numshardbits=6 \ @@ -329,7 +329,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \ --use_existing_db=1 \ --duration=600 \ --threads=32 \ - --writes_per_second=81920 > ${STAT_FILE}.readwhilewriting_in_ram + --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram # Seekrandomwhilewriting ./db_bench \ @@ -342,7 +342,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \ --use_tailing_iterator=1 \ --duration=600 \ --threads=32 \ - --writes_per_second=81920 > ${STAT_FILE}.seekwhilewriting_in_ram + --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram # measure fillseq with bunch of column families ./db_bench \ diff --git a/db/db_bench.cc b/db/db_bench.cc index 7718d7391..a07798b41 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -420,10 +420,6 @@ static class std::shared_ptr dbstats; DEFINE_int64(writes, -1, "Number of write operations to do. If negative, do" " --num reads."); -DEFINE_int32(writes_per_second, 0, "Per-thread rate limit on writes and merges " - " per second. No limit when <= 0. Only for the readwhilewriting " - " and readwhilemerging tests."); - DEFINE_bool(sync, false, "Sync all writes to disk"); DEFINE_bool(disable_data_sync, false, "If true, do not wait until data is" @@ -668,7 +664,8 @@ DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value."); DEFINE_uint64( benchmark_write_rate_limit, 0, - "If non-zero, db_bench will rate-limit the writes going into RocksDB"); + "If non-zero, db_bench will rate-limit the writes going into RocksDB. This " + "is the global rate in bytes/second."); DEFINE_int32(max_grandparent_overlap_factor, 10, "Control maximum bytes of " "overlaps in grandparent (i.e., level+2) before we stop building a" @@ -1312,6 +1309,11 @@ class Stats { } } + void ResetLastOpTime() { + // Set to now to avoid latency from calls to SleepForMicroseconds + last_op_finish_ = FLAGS_env->NowMicros(); + } + void FinishedOps(DBWithColumnFamilies* db_with_cfh, DB* db, int64_t num_ops, enum OperationType op_type = kOthers) { if (reporter_agent_) { @@ -1633,7 +1635,8 @@ class Benchmark { (((FLAGS_key_size + FLAGS_value_size * FLAGS_compression_ratio) * num_) / 1048576.0)); - fprintf(stdout, "Writes per second: %d\n", FLAGS_writes_per_second); + fprintf(stdout, "Write rate: %" PRIu64 " bytes/second\n", + FLAGS_benchmark_write_rate_limit); if (FLAGS_enable_numa) { fprintf(stderr, "Running in NUMA enabled mode.\n"); #ifndef NUMA @@ -2845,14 +2848,22 @@ class Benchmark { } } } + size_t id = thread->rand.Next() % num_key_gens; DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id); batch.Clear(); + + if (thread->shared->write_rate_limiter.get() != nullptr) { + thread->shared->write_rate_limiter->Request( + entries_per_batch_ * (value_size_ + key_size_), + Env::IO_HIGH); + // Set time at which last op finished to Now() to hide latency and + // sleep from rate limiter. Also, do the check once per batch, not + // once per write. + thread->stats.ResetLastOpTime(); + } + for (int64_t j = 0; j < entries_per_batch_; j++) { - if (thread->shared->write_rate_limiter.get() != nullptr) { - thread->shared->write_rate_limiter->Request(value_size_ + key_size_, - Env::IO_HIGH); - } int64_t rand_num = key_gens[id]->Next(); GenerateKeyFromInt(rand_num, FLAGS_num, &key); if (FLAGS_num_column_families <= 1) { @@ -3253,16 +3264,13 @@ class Benchmark { void BGWriter(ThreadState* thread, enum OperationType write_merge) { // Special thread that keeps writing until other threads are done. RandomGenerator gen; - uint64_t last = FLAGS_env->NowMicros(); - int writes_per_second_by_10 = 0; - int num_writes = 0; int64_t bytes = 0; - // --writes_per_second rate limit is enforced per 100 milliseconds - // intervals to avoid a burst of writes at the start of each second. - - if (FLAGS_writes_per_second > 0) - writes_per_second_by_10 = FLAGS_writes_per_second / 10; + std::unique_ptr write_rate_limiter; + if (FLAGS_benchmark_write_rate_limit > 0) { + write_rate_limiter.reset( + NewGenericRateLimiter(FLAGS_benchmark_write_rate_limit)); + } // Don't merge stats from this thread with the readers. thread->stats.SetExcludeFromMerge(); @@ -3296,18 +3304,10 @@ class Benchmark { bytes += key.size() + value_size_; thread->stats.FinishedOps(&db_, db_.db, 1, kWrite); - ++num_writes; - if (writes_per_second_by_10 && num_writes >= writes_per_second_by_10) { - uint64_t now = FLAGS_env->NowMicros(); - uint64_t usecs_since_last = now - last; - - num_writes = 0; - last = now; - - if (usecs_since_last < 100000) { - FLAGS_env->SleepForMicroseconds(static_cast(100000 - usecs_since_last)); - last = FLAGS_env->NowMicros(); - } + if (FLAGS_benchmark_write_rate_limit > 0) { + write_rate_limiter->Request( + entries_per_batch_ * (value_size_ + key_size_), + Env::IO_HIGH); } } thread->stats.AddBytes(bytes); diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 3c862fd15..b0d1babd9 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -37,8 +37,7 @@ if [ ! -z $DB_BENCH_NO_SYNC ]; then fi num_threads=${NUM_THREADS:-16} -# Only for *whilewriting, *whilemerging -writes_per_second=${WRITES_PER_SECOND:-$((10 * K))} +mb_written_per_sec=${MB_WRITE_PER_SEC:-0} # Only for tests that do range scans num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} cache_size=${CACHE_SIZE:-$((1 * G))} @@ -67,6 +66,7 @@ const_params=" --level_compaction_dynamic_level_bytes=true \ --bytes_per_sync=$((8 * M)) \ --cache_index_and_filter_blocks=0 \ + --benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \ \ --hard_rate_limit=3 \ --rate_limit_delay_max_milliseconds=1000000 \ @@ -231,7 +231,6 @@ function run_readwhile { --sync=$syncval \ $params_w \ --threads=$num_threads \ - --writes_per_second=$writes_per_second \ --merge_operator=\"put\" \ --seed=$( date +%s ) \ 2>&1 | tee -a $output_dir/${out_name}" @@ -251,7 +250,6 @@ function run_rangewhile { --sync=$syncval \ $params_w \ --threads=$num_threads \ - --writes_per_second=$writes_per_second \ --merge_operator=\"put\" \ --seek_nexts=$num_nexts_per_seek \ --reverse_iterator=$reverse_arg \ diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index b80eee6e6..1f59a5ada 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -23,9 +23,13 @@ # # The environment variables are also optional. The variables are: # NKEYS - number of key/value pairs to load -# NWRITESPERSEC - the writes/second rate limit for the *whilewriting* tests. +# BG_MBWRITEPERSEC - write rate limit in MB/second for tests in which +# there is one thread doing writes and stats are +# reported for read threads. "BG" stands for background. # If this is too large then the non-writer threads can get -# starved. +# starved. This is used for the "readwhile" tests. +# FG_MBWRITEPERSEC - write rate limit in MB/second for tests like overwrite +# where stats are reported for the write threads. # NSECONDS - number of seconds for which to run each test in steps 2, # 3 and 4. There are currently 15 tests in those steps and # they are repeated for each entry in list-of-threads so @@ -57,7 +61,10 @@ M=$((1024 * K)) G=$((1024 * M)) num_keys=${NKEYS:-$((1 * G))} -wps=${NWRITESPERSEC:-$((10 * K))} +# write rate for readwhile... tests +bg_mbwps=${BG_MBWRITEPERSEC:-4} +# write rate for tests other than readwhile, 0 means no limit +fg_mbwps=${FG_MBWRITEPERSEC:-0} duration=${NSECONDS:-$((60 * 60))} nps=${RANGE_LIMIT:-10} vs=${VAL_SIZE:-400} @@ -178,29 +185,30 @@ done for num_thr in "${nthreads[@]}" ; do # Test 7: overwrite with sync=0 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ - ./tools/benchmark.sh overwrite + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite # Test 8: overwrite with sync=1 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh overwrite + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + ./tools/benchmark.sh overwrite # Test 9: random update with sync=0 env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ - ./tools/benchmark.sh updaterandom + ./tools/benchmark.sh updaterandom # Test 10: random update with sync=1 env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom # Test 11: random read while writing - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilewriting # Test 12: range scan while writing - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting # Test 13: reverse range scan while writing - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting done @@ -208,22 +216,23 @@ done for num_thr in "${nthreads[@]}" ; do # Test 14: random merge with sync=0 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ - ./tools/benchmark.sh mergerandom + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh mergerandom # Test 15: random merge with sync=1 - env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh mergerandom + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \ + ./tools/benchmark.sh mergerandom # Test 16: random read while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging # Test 17: range scan while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging # Test 18: reverse range scan while merging - env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \ DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging done