diff --git a/db/db_bench.cc b/db/db_bench.cc index b39cc40de..6341ac733 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -71,6 +71,7 @@ DEFINE_string(benchmarks, "newiteratorwhilewriting," "seekrandom," "seekrandomwhilewriting," + "seekrandomwhilemerging," "readseq," "readreverse," "compact," @@ -130,8 +131,12 @@ DEFINE_string(benchmarks, "\treadrandommergerandom -- perform N random read-or-merge " "operations. Must be used with merge_operator\n" "\tnewiterator -- repeated iterator creation\n" - "\tseekrandom -- N random seeks\n" - "\tseekrandom -- 1 writer, N threads doing random seeks\n" + "\tseekrandom -- N random seeks, call Next seek_nexts times " + "per seek\n" + "\tseekrandomwhilewriting -- seekrandom and 1 thread doing " + "overwrite\n" + "\tseekrandomwhilemerging -- seekrandom and 1 thread doing " + "merge\n" "\tcrc32c -- repeated crc32c of 4K of data\n" "\txxhash -- repeated xxHash of 4K of data\n" "\tacquireload -- load N*1000 times\n" @@ -182,7 +187,8 @@ DEFINE_int32(value_size, 100, "Size of each value"); DEFINE_int32(seek_nexts, 0, "How many times to call Next() after Seek() in " - "fillseekseq and seekrandom"); + "fillseekseq, seekrandom, seekrandomwhilewriting and " + "seekrandomwhilemerging"); DEFINE_bool(reverse_iterator, false, "When true use Prev rather than Next for iterators that do " @@ -1590,6 +1596,9 @@ class Benchmark { } else if (name == Slice("seekrandomwhilewriting")) { num_threads++; // Add extra thread for writing method = &Benchmark::SeekRandomWhileWriting; + } else if (name == Slice("seekrandomwhilemerging")) { + num_threads++; // Add extra thread for merging + method = &Benchmark::SeekRandomWhileMerging; } else if (name == Slice("readrandomsmall")) { reads_ /= 1000; method = &Benchmark::ReadRandom; @@ -2535,6 +2544,7 @@ class Benchmark { void ReadRandom(ThreadState* thread) { int64_t read = 0; int64_t found = 0; + int64_t bytes = 0; ReadOptions options(FLAGS_verify_checksum, true); std::unique_ptr key_guard; Slice key = AllocateKey(&key_guard); @@ -2558,6 +2568,7 @@ class Benchmark { } if (s.ok()) { found++; + bytes += key.size() + value.size(); } else if (!s.IsNotFound()) { fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str()); abort(); @@ -2569,6 +2580,7 @@ class Benchmark { snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)\n", found, read); + thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg); if (FLAGS_perf_level > 0) { @@ -2640,6 +2652,7 @@ class Benchmark { void SeekRandom(ThreadState* thread) { int64_t read = 0; int64_t found = 0; + int64_t bytes = 0; ReadOptions options(FLAGS_verify_checksum, true); options.tailing = FLAGS_use_tailing_iterator; @@ -2696,6 +2709,7 @@ class Benchmark { Slice value = iter_to_use->value(); memcpy(value_buffer, value.data(), std::min(value.size(), sizeof(value_buffer))); + bytes += iter_to_use->key().size() + iter_to_use->value().size(); if (!FLAGS_reverse_iterator) { iter_to_use->Next(); @@ -2715,6 +2729,7 @@ class Benchmark { char msg[100]; snprintf(msg, sizeof(msg), "(%" PRIu64 " of %" PRIu64 " found)\n", found, read); + thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg); if (FLAGS_perf_level > 0) { thread->stats.AddMessage(perf_context.ToString()); @@ -2729,6 +2744,14 @@ class Benchmark { } } + void SeekRandomWhileMerging(ThreadState* thread) { + if (thread->tid > 0) { + SeekRandom(thread); + } else { + BGWriter(thread, kMerge); + } + } + void DoDelete(ThreadState* thread, bool seq) { WriteBatch batch; Duration duration(seq ? 0 : FLAGS_duration, num_); @@ -2784,6 +2807,7 @@ class Benchmark { double last = FLAGS_env->NowMicros(); int writes_per_second_by_10 = 0; int num_writes = 0; + int64_t bytes = 0; // --writes_per_second rate limit is enforced per 100 milliseconds // intervals to avoid a burst of writes at the start of each second. @@ -2820,6 +2844,7 @@ class Benchmark { fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); exit(1); } + bytes += key.size() + value_size_; thread->stats.FinishedOps(&db_, db_.db, 1); ++num_writes; @@ -2836,6 +2861,7 @@ class Benchmark { } } } + thread->stats.AddBytes(bytes); } // Given a key K and value V, this puts (K+"0", V), (K+"1", V), (K+"2", V) @@ -3054,6 +3080,7 @@ class Benchmark { RandomGenerator gen; std::string value; int64_t found = 0; + int64_t bytes = 0; Duration duration(FLAGS_duration, readwrites_); std::unique_ptr key_guard; @@ -3066,6 +3093,7 @@ class Benchmark { auto status = db->Get(options, key, &value); if (status.ok()) { ++found; + bytes += key.size() + value.size(); } else if (!status.IsNotFound()) { fprintf(stderr, "Get returned an error: %s\n", status.ToString().c_str()); @@ -3077,11 +3105,13 @@ class Benchmark { fprintf(stderr, "put error: %s\n", s.ToString().c_str()); exit(1); } + bytes += key.size() + value_size_; thread->stats.FinishedOps(nullptr, db, 1); } char msg[100]; snprintf(msg, sizeof(msg), "( updates:%" PRIu64 " found:%" PRIu64 ")", readwrites_, found); + thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg); } @@ -3093,6 +3123,7 @@ class Benchmark { RandomGenerator gen; std::string value; int64_t found = 0; + int64_t bytes = 0; std::unique_ptr key_guard; Slice key = AllocateKey(&key_guard); @@ -3105,6 +3136,7 @@ class Benchmark { auto status = db->Get(options, key, &value); if (status.ok()) { ++found; + bytes += key.size() + value.size(); } else if (!status.IsNotFound()) { fprintf(stderr, "Get returned an error: %s\n", status.ToString().c_str()); @@ -3128,12 +3160,14 @@ class Benchmark { fprintf(stderr, "put error: %s\n", s.ToString().c_str()); exit(1); } + bytes += key.size() + value.size(); thread->stats.FinishedOps(nullptr, db, 1); } char msg[100]; snprintf(msg, sizeof(msg), "( updates:%" PRIu64 " found:%" PRIu64 ")", readwrites_, found); + thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg); } @@ -3149,7 +3183,7 @@ class Benchmark { // FLAGS_merge_keys. void MergeRandom(ThreadState* thread) { RandomGenerator gen; - + int64_t bytes = 0; std::unique_ptr key_guard; Slice key = AllocateKey(&key_guard); // The number of iterations is the larger of read_ or write_ @@ -3164,12 +3198,14 @@ class Benchmark { fprintf(stderr, "merge error: %s\n", s.ToString().c_str()); exit(1); } + bytes += key.size() + value_size_; thread->stats.FinishedOps(nullptr, db, 1); } // Print some statistics char msg[100]; snprintf(msg, sizeof(msg), "( updates:%" PRIu64 ")", readwrites_); + thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg); } diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 98d0e0f5c..868c988e9 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -13,6 +13,7 @@ #endif #include +#include #include #include "db/column_family.h" @@ -408,22 +409,31 @@ void InternalStats::DumpDBStats(std::string* value) { // writes/batches is the average group commit size. // // The format is the same for interval stats. - AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen); snprintf(buf, sizeof(buf), - "Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 - " batches, %.1f writes per batch, %.2f GB user ingest, " - "stall time: %s\n", - write_other + write_self, num_keys_written, write_self, + "Cumulative writes: %s writes, %s keys, %s batches, " + "%.1f writes per batch, ingest: %.2f GB, %.2f MB/s\n", + NumberToHumanString(write_other + write_self).c_str(), + NumberToHumanString(num_keys_written).c_str(), + NumberToHumanString(write_self).c_str(), (write_other + write_self) / static_cast(write_self + 1), - user_bytes_written / kGB, human_micros); + user_bytes_written / kGB, user_bytes_written / kMB / seconds_up); value->append(buf); // WAL snprintf(buf, sizeof(buf), - "Cumulative WAL: %" PRIu64 " writes, %" PRIu64 " syncs, " - "%.2f writes per sync, %.2f GB written\n", - write_with_wal, wal_synced, + "Cumulative WAL: %s writes, %s syncs, " + "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", + NumberToHumanString(write_with_wal).c_str(), + NumberToHumanString(wal_synced).c_str(), write_with_wal / static_cast(wal_synced + 1), - wal_bytes / kGB); + wal_bytes / kGB, wal_bytes / kMB / seconds_up); + value->append(buf); + // Stall + AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true); + snprintf(buf, sizeof(buf), + "Cumulative stall: %s, %.1f percent\n", + human_micros, + // 10000 = divide by 1M to get secs, then multiply by 100 for pct + write_stall_micros / 10000.0 / std::max(seconds_up, 0.001)); value->append(buf); // Interval @@ -431,19 +441,18 @@ void InternalStats::DumpDBStats(std::string* value) { uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self; uint64_t interval_num_keys_written = num_keys_written - db_stats_snapshot_.num_keys_written; - AppendHumanMicros( - write_stall_micros - db_stats_snapshot_.write_stall_micros, - human_micros, kHumanMicrosLen); snprintf(buf, sizeof(buf), - "Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64 - " batches, %.1f writes per batch, %.1f MB user ingest, " - "stall time: %s\n", - interval_write_other + interval_write_self, - interval_num_keys_written, interval_write_self, + "Interval writes: %s writes, %s keys, %s batches, " + "%.1f writes per batch, ingest: %.2f MB, %.2f MB/s\n", + NumberToHumanString( + interval_write_other + interval_write_self).c_str(), + NumberToHumanString(interval_num_keys_written).c_str(), + NumberToHumanString(interval_write_self).c_str(), static_cast(interval_write_other + interval_write_self) / (interval_write_self + 1), (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB, - human_micros); + (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB / + std::max(interval_seconds_up, 0.001)), value->append(buf); uint64_t interval_write_with_wal = @@ -452,13 +461,26 @@ void InternalStats::DumpDBStats(std::string* value) { uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes; snprintf(buf, sizeof(buf), - "Interval WAL: %" PRIu64 " writes, %" PRIu64 " syncs, " - "%.2f writes per sync, %.2f MB written\n", - interval_write_with_wal, - interval_wal_synced, + "Interval WAL: %s writes, %s syncs, " + "%.2f writes per sync, written: %.2f MB, %.2f MB/s\n", + NumberToHumanString(interval_write_with_wal).c_str(), + NumberToHumanString(interval_wal_synced).c_str(), interval_write_with_wal / static_cast(interval_wal_synced + 1), - interval_wal_bytes / kGB); + interval_wal_bytes / kGB, + interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001)); + value->append(buf); + + // Stall + AppendHumanMicros( + write_stall_micros - db_stats_snapshot_.write_stall_micros, + human_micros, kHumanMicrosLen, true); + snprintf(buf, sizeof(buf), + "Interval stall: %s, %.1f percent\n", + human_micros, + // 10000 = divide by 1M to get secs, then multiply by 100 for pct + (write_stall_micros - db_stats_snapshot_.write_stall_micros) / + 10000.0 / std::max(interval_seconds_up, 0.001)); value->append(buf); db_stats_snapshot_.seconds_up = seconds_up; @@ -557,7 +579,7 @@ void InternalStats::DumpCFStats(std::string* value) { value->append(buf); snprintf(buf, sizeof(buf), - "Flush(GB): accumulative %.3f, interval %.3f\n", + "Flush(GB): cumulative %.3f, interval %.3f\n", curr_ingest / kGB, interval_ingest / kGB); value->append(buf); diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 07850069c..2055ec898 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -35,21 +35,24 @@ if [ ! -z $DB_BENCH_NO_SYNC ]; then syncval="0"; fi -num_read_threads=${NUM_READ_THREADS:-16} -# Only for readwhilewriting, readwhilemerging -writes_per_second=${WRITES_PER_SEC:-$((80 * K))} -num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} # (only for rangescanwhilewriting) -cache_size=$((1 * G)) +num_threads=${NUM_THREADS:-16} +# Only for *whilewriting, *whilemerging +writes_per_second=${WRITES_PER_SECOND:-$((10 * K))} +# Only for tests that do range scans +num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} +cache_size=${CACHE_SIZE:-$((1 * G))} duration=${DURATION:-0} num_keys=${NUM_KEYS:-$((1 * G))} key_size=20 -value_size=800 +value_size=${VALUE_SIZE:-400} const_params=" --db=$DB_DIR \ --wal_dir=$WAL_DIR \ + --disable_data_sync=0 \ \ + --num=$num_keys \ --num_levels=6 \ --key_size=$key_size \ --value_size=$value_size \ @@ -57,10 +60,10 @@ const_params=" --cache_size=$cache_size \ --cache_numshardbits=6 \ --compression_type=zlib \ - --min_level_to_compress=2 \ + --min_level_to_compress=3 \ --compression_ratio=0.5 \ \ - --hard_rate_limit=2 \ + --hard_rate_limit=3 \ --rate_limit_delay_max_milliseconds=1000000 \ --write_buffer_size=$((128 * M)) \ --max_write_buffer_number=3 \ @@ -69,7 +72,8 @@ const_params=" \ --verify_checksum=1 \ --delete_obsolete_files_period_micros=$((60 * M)) \ - --max_grandparent_overlap_factor=10 \ + --max_grandparent_overlap_factor=8 \ + --max_bytes_for_level_multiplier=8 \ \ --statistics=1 \ --stats_per_interval=1 \ @@ -82,158 +86,173 @@ const_params=" l0_config=" --level0_file_num_compaction_trigger=4 \ - --level0_slowdown_writes_trigger=8 \ - --level0_stop_writes_trigger=12" + --level0_slowdown_writes_trigger=12 \ + --level0_stop_writes_trigger=20" if [ $duration -gt 0 ]; then const_params="$const_params --duration=$duration" fi -params_r="$const_params $l0_config --max_background_compactions=4 --max_background_flushes=1" params_w="$const_params $l0_config --max_background_compactions=16 --max_background_flushes=16" params_bulkload="$const_params --max_background_compactions=16 --max_background_flushes=16 \ --level0_file_num_compaction_trigger=$((10 * M)) \ --level0_slowdown_writes_trigger=$((10 * M)) \ --level0_stop_writes_trigger=$((10 * M))" +function summarize_result { + test_out=$1 + test_name=$2 + bench_name=$3 + + uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' ) + stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' ) + stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' ) + ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' ) + mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' ) + lo_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $8 }' ) + sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $8 }' ) + wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc ) + wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc ) + usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' ) + p50=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $3 }' ) + p75=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $5 }' ) + p99=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $7 }' ) + echo -e "$ops_sec\t$mb_sec\t$lo_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$uptime\t$stall_time\t$stall_pct\t$test_name" \ + >> $output_dir/report.txt +} + function run_bulkload { - echo "Bulk loading $num_keys random keys into database..." - cmd="./db_bench $params_bulkload --benchmarks=fillrandom \ + echo "Bulk loading $num_keys random keys" + cmd="./db_bench --benchmarks=fillrandom \ --use_existing_db=0 \ - --num=$num_keys \ --disable_auto_compactions=1 \ --sync=0 \ - --disable_data_sync=0 \ - --threads=1 2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log" + $params_bulkload \ + --threads=1 \ + 2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log" echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log eval $cmd + summarize_result $output_dir/benchmark_bulkload_fillrandom.log bulkload fillrandom echo "Compacting..." - cmd="./db_bench $params_w --benchmarks=compact \ + cmd="./db_bench --benchmarks=compact \ --use_existing_db=1 \ - --num=$num_keys \ --disable_auto_compactions=1 \ --sync=0 \ - --disable_data_sync=0 \ - --threads=1 2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log" + $params_w \ + --threads=1 \ + 2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log" echo $cmd | tee $output_dir/benchmark_bulkload_compact.log eval $cmd } function run_fillseq { - echo "Loading $num_keys keys sequentially into database..." - cmd="./db_bench $params_w --benchmarks=fillseq \ + echo "Loading $num_keys keys sequentially" + cmd="./db_bench --benchmarks=fillseq \ --use_existing_db=0 \ - --num=$num_keys \ --sync=0 \ - --disable_data_sync=0 \ - --threads=1 2>&1 | tee -a $output_dir/benchmark_fillseq.log" + $params_w \ + --threads=1 \ + 2>&1 | tee -a $output_dir/benchmark_fillseq.log" echo $cmd | tee $output_dir/benchmark_fillseq.log eval $cmd + summarize_result $output_dir/benchmark_fillseq.log fillseq fillseq } -function run_overwrite { - echo "Loading $num_keys keys sequentially into database..." - cmd="./db_bench $params_w --benchmarks=overwrite \ +function run_change { + operation=$1 + echo "Do $num_keys random $operation" + out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log" + cmd="./db_bench --benchmarks=$operation \ --use_existing_db=1 \ - --num=$num_keys \ - --sync=0 \ - --disable_data_sync=0 \ - --threads=1 2>&1 | tee -a $output_dir/benchmark_overwrite.log" - echo $cmd | tee $output_dir/benchmark_overwrite.log + --sync=$syncval \ + $params_w \ + --threads=$num_threads \ + --merge_operator=\"put\" \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} eval $cmd + summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation } function run_filluniquerandom { - echo "Loading $num_keys unique keys randomly into database..." - cmd="./db_bench $params_w --benchmarks=filluniquerandom \ + echo "Loading $num_keys unique keys randomly" + cmd="./db_bench --benchmarks=filluniquerandom \ --use_existing_db=0 \ - --num=$num_keys \ --sync=0 \ - --disable_data_sync=0 \ - --threads=1 2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log" + $params_w \ + --threads=1 \ + 2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log" echo $cmd | tee $output_dir/benchmark_filluniquerandom.log eval $cmd + summarize_result $output_dir/benchmark_filluniquerandom.log filluniquerandom filluniquerandom } function run_readrandom { - echo "Reading $num_keys random keys from database..." - cmd="./db_bench $params_r --benchmarks=readrandom \ - --use_existing_db=1 \ - --num=$num_keys \ - --threads=$num_read_threads \ - --disable_auto_compactions=1 \ - 2>&1 | tee -a $output_dir/benchmark_readrandom.log" - echo $cmd | tee $output_dir/benchmark_readrandom.log - eval $cmd -} - -function run_readwhilewriting { - echo "Reading $num_keys random keys from database while writing.." - cmd="./db_bench $params_r --benchmarks=readwhilewriting \ + echo "Reading $num_keys random keys" + out_name="benchmark_readrandom.t${num_threads}.log" + cmd="./db_bench --benchmarks=readrandom \ --use_existing_db=1 \ - --num=$num_keys \ - --sync=$syncval \ - --disable_data_sync=0 \ - --threads=$num_read_threads \ - --writes_per_second=$writes_per_second \ - 2>&1 | tee -a $output_dir/benchmark_readwhilewriting.log" - echo $cmd | tee $output_dir/benchmark_readwhilewriting.log + $params_w \ + --threads=$num_threads \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} eval $cmd + summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom } -function run_readwhilemerging { - echo "Reading $num_keys random keys from database while merging.." - cmd="./db_bench $params_r --benchmarks=readwhilemerging \ +function run_readwhile { + operation=$1 + echo "Reading $num_keys random keys while $operation" + out_name="benchmark_readwhile${operation}.t${num_threads}.log" + cmd="./db_bench --benchmarks=readwhile${operation} \ --use_existing_db=1 \ - --num=$num_keys \ --sync=$syncval \ - --disable_data_sync=0 \ - --threads=$num_read_threads \ + $params_w \ + --threads=$num_threads \ --writes_per_second=$writes_per_second \ --merge_operator=\"put\" \ - 2>&1 | tee -a $output_dir/benchmark_readwhilemerging.log" - echo $cmd | tee $output_dir/benchmark_readwhilemerging.log + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} eval $cmd + summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} } -function run_rangescanwhilewriting { - echo "Range scan $num_keys random keys from database while writing.." - cmd="./db_bench $params_r --benchmarks=seekrandomwhilewriting \ +function run_rangewhile { + operation=$1 + full_name=$2 + reverse_arg=$3 + out_name="benchmark_${full_name}.t${num_threads}.log" + echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}" + cmd="./db_bench --benchmarks=seekrandomwhile${operation} \ --use_existing_db=1 \ - --num=$num_keys \ --sync=$syncval \ - --disable_data_sync=0 \ - --threads=$num_read_threads \ + $params_w \ + --threads=$num_threads \ --writes_per_second=$writes_per_second \ + --merge_operator=\"put\" \ --seek_nexts=$num_nexts_per_seek \ - 2>&1 | tee -a $output_dir/benchmark_rangescanwhilewriting.log" - echo $cmd | tee $output_dir/benchmark_rangescanwhilewriting.log + --reverse_iterator=$reverse_arg \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} eval $cmd + summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandomwhile${operation} } -function run_updaterandom { - echo "Read/Modify/Write $num_keys random keys (not using merge).." - cmd="./db_bench $params_w --benchmarks=updaterandom \ +function run_range { + full_name=$1 + reverse_arg=$2 + out_name="benchmark_${full_name}.t${num_threads}.log" + echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}" + cmd="./db_bench --benchmarks=seekrandom \ --use_existing_db=1 \ - --num=$num_keys \ - --sync=$syncval \ - --disable_data_sync=0 \ - --threads=$num_read_threads 2>&1 | tee -a $output_dir/benchmark_updaterandom.log" - echo $cmd | tee $output_dir/benchmark_updaterandom.log - eval $cmd -} - -function run_mergerandom { - echo "Read/Modify/Write $num_keys random keys (using merge operator).." - cmd="./db_bench $params_w --benchmarks=mergerandom \ - --use_existing_db=1 \ - --num=$num_keys \ - --sync=$syncval \ - --disable_data_sync=0 \ - --merge_operator=\"put\" \ - --threads=$num_read_threads 2>&1 | tee -a $output_dir/benchmark_mergerandom.log" - echo $cmd | tee $output_dir/benchmark_mergerandom.log + $params_w \ + --threads=$num_threads \ + --seek_nexts=$num_nexts_per_seek \ + --reverse_iterator=$reverse_arg \ + 2>&1 | tee -a $output_dir/${out_name}" + echo $cmd | tee $output_dir/${out_name} eval $cmd + summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandom } function now() { @@ -241,6 +260,7 @@ function now() { } report="$output_dir/report.txt" +schedule="$output_dir/schedule.txt" echo "===== Benchmark =====" @@ -249,7 +269,7 @@ IFS=',' read -a jobs <<< $1 for job in ${jobs[@]}; do if [ $job != debug ]; then - echo "Start $job at `date`" | tee -a $report + echo "Start $job at `date`" | tee -a $schedule fi start=$(now) @@ -258,21 +278,31 @@ for job in ${jobs[@]}; do elif [ $job = fillseq ]; then run_fillseq elif [ $job = overwrite ]; then - run_overwrite + run_change overwrite + elif [ $job = updaterandom ]; then + run_change updaterandom + elif [ $job = mergerandom ]; then + run_change mergerandom elif [ $job = filluniquerandom ]; then run_filluniquerandom elif [ $job = readrandom ]; then run_readrandom + elif [ $job = fwdrange ]; then + run_range $job false + elif [ $job = revrange ]; then + run_range $job true elif [ $job = readwhilewriting ]; then - run_readwhilewriting + run_readwhile writing elif [ $job = readwhilemerging ]; then - run_readwhilemerging - elif [ $job = rangescanwhilewriting ]; then - run_rangescanwhilewriting - elif [ $job = updaterandom ]; then - run_updaterandom - elif [ $job = mergerandom ]; then - run_mergerandom + run_readwhile merging + elif [ $job = fwdrangewhilewriting ]; then + run_rangewhile writing $job false + elif [ $job = revrangewhilewriting ]; then + run_rangewhile writing $job true + elif [ $job = fwdrangewhilemerging ]; then + run_rangewhile merging $job false + elif [ $job = revrangewhilemerging ]; then + run_rangewhile merging $job true elif [ $job = debug ]; then num_keys=1000; # debug echo "Setting num_keys to $num_keys" @@ -283,24 +313,10 @@ for job in ${jobs[@]}; do end=$(now) if [ $job != debug ]; then - echo "Complete $job in $((end-start)) seconds" | tee -a $report + echo "Complete $job in $((end-start)) seconds" | tee -a $schedule fi - if [[ $job == readrandom || $job == readwhilewriting \ - || $job == rangescanwhilewriting || $job == updaterandom \ - || $job == mergerandom || $job == readwhilemerging ]]; then - lat=$(grep "micros\/op" "$output_dir/benchmark_$job.log" \ - | grep "ops\/sec" | awk '{print $3}') - qps=$(grep "micros\/op" "$output_dir/benchmark_$job.log" \ - | grep "ops\/sec" | awk '{print $5}') - line=$(grep "rocksdb.db.get.micros" "$output_dir/benchmark_$job.log") - p50=$(echo $line | awk '{print $7}') - p99=$(echo $line | awk '{print $13}') - print_percentile=$(echo "$p50 != 0 || $p99 != 0" | bc); - if [ "$print_percentile" == "1" ]; then - echo "Read latency p50 = $p50 us, p99 = $p99 us" | tee -a $report - fi - echo "QPS = $qps ops/sec" | tee -a $report - echo "Avg Latency = $lat micros/op " | tee -a $report - fi + echo -e "ops/sec\tmb/sec\tL0_MB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tUptime\tStall-time\tStall%\tTest" + tail -1 $output_dir/report.txt + done diff --git a/tools/run_flash_bench.sh b/tools/run_flash_bench.sh index 2d2fd2ade..224e1f6f0 100755 --- a/tools/run_flash_bench.sh +++ b/tools/run_flash_bench.sh @@ -3,50 +3,220 @@ # After execution of this script, log files are generated in $output_dir. # report.txt provides a high level statistics +# This should be run from the parent of the tools directory. The command line is: +# [$env_vars] tools/run_flash_bench.sh [list-of-threads] +# +# This runs a sequence of tests in the following sequence: +# step 1) load - bulkload, compact, fillseq, overwrite +# step 2) read-only for each number of threads +# step 3) read-write for each number of threads +# step 4) merge for each number of threads +# +# The list of threads is optional and when not set is equivalent to "24". +# Were list-of-threads specified as "1 2 4" then the tests in steps 2, 3 and +# 4 above would be repeated for 1, 2 and 4 threads. The tests in step 1 are +# only run for 1 thread. + +# Test output is written to $OUTPUT_DIR, currently /tmp/output. The performance +# summary is in $OUTPUT_DIR/report.txt. There is one file in $OUTPUT_DIR per +# test and the tests are listed below. +# +# The environment variables are also optional. The variables are: +# NKEYS - number of key/value pairs to load +# NWRITESPERSEC - the writes/second rate limit for the *whilewriting* tests. +# If this is too large then the non-writer threads can get +# starved. +# NSECONDS - number of seconds for which to run each test in steps 2, +# 3 and 4. There are currently 15 tests in those steps and +# they are repeated for each entry in list-of-threads so +# this variable lets you control the total duration to +# finish the benchmark. +# RANGE_LIMIT - the number of rows to read per range query for tests that +# do range queries. +# VAL_SIZE - the length of the value in the key/value pairs loaded. +# You can estimate the size of the test database from this, +# NKEYS and the compression rate (--compression_ratio) set +# in tools/benchmark.sh +# CACHE_BYTES - the size of the RocksDB block cache in bytes +# DATA_DIR - directory in which to create database files +# LOG_DIR - directory in which to create WAL files, may be the same +# as DATA_DIR +# DO_SETUP - when set to 0 then a backup of the database is copied from +# $DATA_DIR.bak to $DATA_DIR and the load tests from step 1 +# The WAL directory is also copied from a backup if +# DATA_DIR != LOG_DIR. This allows tests from steps 2, 3, 4 +# to be repeated faster. +# SAVE_SETUP - saves a copy of the database at the end of step 1 to +# $DATA_DIR.bak. When LOG_DIR != DATA_DIR then it is copied +# to $LOG_DIR.bak. + # Size constants K=1024 M=$((1024 * K)) G=$((1024 * M)) -n=$((1 * G)) -wps=$((80 * K)) -duration=$((12 * 60 * 60)) -num_read_threads=24 +num_keys=${NKEYS:-$((1 * G))} +wps=${NWRITESPERSEC:-$((10 * K))} +duration=${NSECONDS:-$((60 * 60))} +nps=${RANGE_LIMIT:-10} +vs=${VAL_SIZE:-400} +cs=${CACHE_BYTES:-$(( 1 * G ))} + +# If no command line arguments then run for 24 threads. +if [[ $# -eq 0 ]]; then + nthreads=( 24 ) +else + nthreads=( "$@" ) +fi + +for num_thr in "${nthreads[@]}" ; do + echo Will run for $num_thr threads +done # Update these parameters before execution !!! -db_dir="/tmp/rocksdb/" -wal_dir="/tmp/rocksdb/" +db_dir=${DATA_DIR:-"/tmp/rocksdb/"} +wal_dir=${LOG_DIR:-"/tmp/rocksdb/"} + +do_setup=${DO_SETUP:-1} +save_setup=${SAVE_SETUP:-0} + output_dir="/tmp/output" +ARGS="\ +OUTPUT_DIR=$output_dir \ +NUM_KEYS=$num_keys \ +DB_DIR=$db_dir \ +WAL_DIR=$wal_dir \ +VALUE_SIZE=$vs \ +CACHE_SIZE=$cs" + +mkdir -p $output_dir +echo -e "ops/sec\tmb/sec\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tUptime\tStall-time\tStall%\tTest" \ + > $output_dir/report.txt + +# Notes on test sequence: +# step 1) Setup database via sequential fill followed by overwrite to fragment it. +# Done without setting DURATION to make sure that overwrite does $num_keys writes +# step 2) read-only tests for all levels of concurrency requested +# step 3) non read-only tests for all levels of concurrency requested +# step 4) merge tests for all levels of concurrency requested. These must come last. + +###### Setup the database + +if [[ $do_setup != 0 ]]; then + echo Doing setup + + # Test 1: bulk load + env $ARGS ./tools/benchmark.sh bulkload + + # Test 2: sequential fill + env $ARGS ./tools/benchmark.sh fillseq + + # Test 3: single-threaded overwrite + env $ARGS NUM_THREADS=1 DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite + +else + echo Restoring from backup + + rm -rf $db_dir + + if [ ! -d ${db_dir}.bak ]; then + echo Database backup does not exist at ${db_dir}.bak + exit -1 + fi + + echo Restore database from ${db_dir}.bak + cp -p -r ${db_dir}.bak $db_dir + + if [[ $db_dir != $wal_dir ]]; then + rm -rf $wal_dir + + if [ ! -d ${wal_dir}.bak ]; then + echo WAL backup does not exist at ${wal_dir}.bak + exit -1 + fi + + echo Restore WAL from ${wal_dir}.bak + cp -p -r ${wal_dir}.bak $wal_dir + fi +fi + +if [[ $save_setup != 0 ]]; then + echo Save database to ${db_dir}.bak + cp -p -r $db_dir ${db_dir}.bak + + if [[ $db_dir != $wal_dir ]]; then + echo Save WAL to ${wal_dir}.bak + cp -p -r $wal_dir ${wal_dir}.bak + fi +fi + +###### Read-only tests + +for num_thr in "${nthreads[@]}" ; do + # Test 4: random read + env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh readrandom + + # Test 5: random range scans + env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \ + ./tools/benchmark.sh fwdrange + + # Test 6: random reverse range scans + env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \ + ./tools/benchmark.sh revrange +done + +###### Non read-only tests + +for num_thr in "${nthreads[@]}" ; do + # Test 7: overwrite with sync=0 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ + ./tools/benchmark.sh overwrite + + # Test 8: overwrite with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh overwrite + + # Test 9: random update with sync=0 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ + ./tools/benchmark.sh updaterandom + + # Test 10: random update with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom + + # Test 11: random read while writing + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + ./tools/benchmark.sh readwhilewriting + + # Test 12: range scan while writing + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting + + # Test 13: reverse range scan while writing + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting +done -# Test 1: bulk load -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - ./benchmark.sh bulkload +###### Merge tests -# Test 2: sequential fill -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - ./benchmark.sh fillseq +for num_thr in "${nthreads[@]}" ; do + # Test 14: random merge with sync=0 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \ + ./tools/benchmark.sh mergerandom -# Test 3: overwrite -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - ./benchmark.sh overwrite + # Test 15: random merge with sync=1 + env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh mergerandom -# Prepare: populate DB with random data -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - ./benchmark.sh filluniquerandom + # Test 16: random read while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + ./tools/benchmark.sh readwhilemerging -# Test 4: random read -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - DURATION=$duration NUM_READ_THREADS=$num_read_threads \ - ./benchmark.sh readrandom + # Test 17: range scan while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging -# Test 5: random read while writing -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - DURATION=$duration NUM_READ_THREADS=$num_read_threads WRITES_PER_SECOND=$wps \ - ./benchmark.sh readwhilewriting + # Test 18: reverse range scan while merging + env $ARGS DURATION=$duration NUM_THREADS=$num_thr WRITES_PER_SECOND=$wps \ + NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging +done -# Test 6: random seek + next()'s while writing -OUTPUT_DIR=$output_dir NUM_KEYS=$n DB_DIR=$db_dir WAL_DIR=$wal_dir \ - DURATION=$duration NUM_READ_THREADS=$num_read_threads WRITES_PER_SECOND=$wps \ - NUM_NEXTS_PER_SEEK=10 \ - ./benchmark.sh rangescanwhilewriting +cat $output_dir/report.txt diff --git a/util/logging.cc b/util/logging.cc index c8cd1217e..8917d099a 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -29,16 +29,17 @@ namespace rocksdb { // for micros >= 10 sec, print "XX sec". // for micros <= 1 hour, print Y:X M:S". // for micros > 1 hour, print Z:Y:X H:M:S". -int AppendHumanMicros(uint64_t micros, char* output, int len) { - if (micros < 10000) { +int AppendHumanMicros(uint64_t micros, char* output, int len, + bool fixed_format) { + if (micros < 10000 && !fixed_format) { return snprintf(output, len, "%" PRIu64 " us", micros); - } else if (micros < 10000000) { + } else if (micros < 10000000 && !fixed_format) { return snprintf(output, len, "%.3lf ms", static_cast(micros) / 1000); - } else if (micros < 1000000l * 60) { + } else if (micros < 1000000l * 60 && !fixed_format) { return snprintf(output, len, "%.3lf sec", static_cast(micros) / 1000000); - } else if (micros < 1000000ll * 60 * 60) { + } else if (micros < 1000000ll * 60 * 60 && !fixed_format) { return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S", micros / 1000000 / 60, static_cast(micros % 60000000) / 1000000); diff --git a/util/logging.h b/util/logging.h index 63c88debf..10801bb88 100644 --- a/util/logging.h +++ b/util/logging.h @@ -21,7 +21,8 @@ namespace rocksdb { class Slice; // Append a human-readable time in micros. -int AppendHumanMicros(uint64_t micros, char* output, int len); +int AppendHumanMicros(uint64_t micros, char* output, int len, + bool fixed_format); // Append a human-readable size in bytes int AppendHumanBytes(uint64_t bytes, char* output, int len); diff --git a/util/thread_status_impl.cc b/util/thread_status_impl.cc index 6d0778b8e..4725cbe49 100644 --- a/util/thread_status_impl.cc +++ b/util/thread_status_impl.cc @@ -40,7 +40,7 @@ const std::string ThreadStatus::MicrosToString(uint64_t micros) { } const int kBufferLen = 100; char buffer[kBufferLen]; - AppendHumanMicros(micros, buffer, kBufferLen); + AppendHumanMicros(micros, buffer, kBufferLen, false); return std::string(buffer); }