From 1ca1562e3565ac3d9ccfeeec2e206a21791f3aa3 Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Mon, 21 Mar 2022 17:30:51 -0700 Subject: [PATCH] Make mixgraph easier to use (#9711) Summary: Changes: * improves monitoring by displaying average size of a Put value and average scan length * forces the minimum value size to be 10. Before this it was 0 if you didn't set the distribution parameters. * uses reasonable defaults for the distribution parameters that determine value size and scan length * includes seeks in "reads ... found" message, before this they were missing This is for https://github.com/facebook/rocksdb/issues/9672 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9711 Test Plan: Before this change: ./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236 fillseq : 4.289 micros/op 233138 ops/sec; 25.8 MB/s mixgraph : 18.461 micros/op 54166 ops/sec; 755.0 MB/s ( Gets:50164 Puts:24919 Seek:24917 of 50164 in 75081 found) After this change: ./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236 fillseq : 3.974 micros/op 251553 ops/sec; 27.8 MB/s mixgraph : 16.722 micros/op 59795 ops/sec; 833.5 MB/s ( Gets:50164 Puts:24919 Seek:24917, reads 75081 in 75081 found, avg size: 36.0 value, 504.9 scan) Reviewed By: jay-zhuang Differential Revision: D35030190 Pulled By: mdcallag fbshipit-source-id: d8f555f28d869f752ddb674a524108884511b151 --- tools/db_bench_tool.cc | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index a405685a2..71070120d 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1357,19 +1357,23 @@ DEFINE_double(key_dist_b, 0.0, DEFINE_double(value_theta, 0.0, "The parameter 'theta' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); -DEFINE_double(value_k, 0.0, +// Use reasonable defaults based on the mixgraph paper +DEFINE_double(value_k, 0.2615, "The parameter 'k' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); -DEFINE_double(value_sigma, 0.0, +// Use reasonable defaults based on the mixgraph paper +DEFINE_double(value_sigma, 25.45, "The parameter 'theta' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); DEFINE_double(iter_theta, 0.0, "The parameter 'theta' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); -DEFINE_double(iter_k, 0.0, +// Use reasonable defaults based on the mixgraph paper +DEFINE_double(iter_k, 2.517, "The parameter 'k' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); -DEFINE_double(iter_sigma, 0.0, +// Use reasonable defaults based on the mixgraph paper +DEFINE_double(iter_sigma, 14.236, "The parameter 'sigma' of Generized Pareto Distribution " "f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)"); DEFINE_double(mix_get_ratio, 1.0, @@ -6026,13 +6030,14 @@ class Benchmark { // needs to decide the ratio between Get, Put, Iterator queries before // starting the benchmark. void MixGraph(ThreadState* thread) { - int64_t read = 0; // including single gets and Next of iterators int64_t gets = 0; int64_t puts = 0; - int64_t found = 0; + int64_t get_found = 0; int64_t seek = 0; int64_t seek_found = 0; int64_t bytes = 0; + double total_scan_length = 0; + double total_val_size = 0; const int64_t default_value_max = 1 * 1024 * 1024; int64_t value_max = default_value_max; int64_t scan_len_max = FLAGS_mix_max_scan_len; @@ -6131,7 +6136,6 @@ class Benchmark { if (query_type == 0) { // the Get query gets++; - read++; if (FLAGS_num_column_families > 1) { s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand), key, &pinnable_val); @@ -6143,14 +6147,14 @@ class Benchmark { } if (s.ok()) { - found++; + get_found++; bytes += key.size() + pinnable_val.size(); } else if (!s.IsNotFound()) { fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str()); abort(); } - if (thread->shared->read_rate_limiter && read % 100 == 0) { + if (thread->shared->read_rate_limiter && (gets + seek) % 100 == 0) { thread->shared->read_rate_limiter->Request(100, Env::IO_HIGH, nullptr /*stats*/); } @@ -6160,11 +6164,13 @@ class Benchmark { puts++; int64_t val_size = ParetoCdfInversion( u, FLAGS_value_theta, FLAGS_value_k, FLAGS_value_sigma); - if (val_size < 0) { + if (val_size < 10) { val_size = 10; } else if (val_size > value_max) { val_size = val_size % value_max; } + total_val_size += val_size; + s = db_with_cfh->db->Put( write_options_, key, gen.Generate(static_cast(val_size))); @@ -6186,7 +6192,6 @@ class Benchmark { if (single_iter != nullptr) { single_iter->Seek(key); seek++; - read++; if (single_iter->Valid() && single_iter->key().compare(key) == 0) { seek_found++; } @@ -6201,6 +6206,7 @@ class Benchmark { bytes += single_iter->key().size() + single_iter->value().size(); single_iter->Next(); assert(single_iter->status().ok()); + total_scan_length++; } } delete single_iter; @@ -6210,9 +6216,12 @@ class Benchmark { } char msg[256]; snprintf(msg, sizeof(msg), - "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 " of %" PRIu64 - " in %" PRIu64 " found)\n", - gets, puts, seek, found, read); + "( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 + ", reads %" PRIu64 " in %" PRIu64 + " found, " + "avg size: %.1f value, %.1f scan)\n", + gets, puts, seek, get_found + seek_found, gets + seek, + total_val_size / puts, total_scan_length / seek); thread->stats.AddBytes(bytes); thread->stats.AddMessage(msg);