From 1560bb913eb7ab5b4ca77989f84372d34d6acf1b Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Fri, 7 Feb 2014 09:47:47 -0800 Subject: [PATCH] Readrandom with tailing iterator Summary: Added an option for readrandom benchmark to run with tailing iterator instead of Get. Benefit of tailing iterator is that it doesn't require locking DB mutex on access. I also have some results when running on my machine. The results highly depend on number of cache shards. With our current benchmark setting of 4 table cache shards and 6 block cache shards, I don't see much improvements of using tailing iterator. In that case, we're probably seeing cache mutex contention. Here are the results for different number of shards cache shards tailing iterator get 6 1.38M 1.16M 10 1.58M 1.15M As soon as we get rid of cache mutex contention, we're seeing big improvements in using tailing iterator vs. ordinary get. Test Plan: ran regression test Reviewers: dhruba, haobo, ljin, kailiu, sding Reviewed By: haobo CC: tnovak Differential Revision: https://reviews.facebook.net/D15867 --- build_tools/regression_build_test.sh | 22 ++++++++++++++++++++++ db/db_bench.cc | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/build_tools/regression_build_test.sh b/build_tools/regression_build_test.sh index d38b67c3c..58766f5df 100755 --- a/build_tools/regression_build_test.sh +++ b/build_tools/regression_build_test.sh @@ -117,6 +117,27 @@ make release --sync=0 \ --threads=16 > ${STAT_FILE}.readrandom +# measure readrandom with 6GB block cache and tailing iterator +./db_bench \ + --benchmarks=readrandom \ + --db=$DATA_DIR \ + --use_existing_db=1 \ + --bloom_bits=10 \ + --num=$NUM \ + --reads=$((NUM / 5)) \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --table_cache_numshardbits=4 \ + --open_files=55000 \ + --disable_seek_compaction=1 \ + --use_tailing_iterator=1 \ + --statistics=1 \ + --histogram=1 \ + --disable_data_sync=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=16 > ${STAT_FILE}.readrandomtailing + # measure readrandom with 100MB block cache ./db_bench \ --benchmarks=readrandom \ @@ -300,6 +321,7 @@ function send_benchmark_to_ods { send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom +send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom diff --git a/db/db_bench.cc b/db/db_bench.cc index bdf842375..19938e0c1 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -447,6 +447,9 @@ static auto FLAGS_compaction_fadvice_e = DEFINE_bool(use_multiget, false, "Use multiget to access a series of keys instead of get"); +DEFINE_bool(use_tailing_iterator, false, + "Use tailing iterator to access a series of keys instead of get"); + DEFINE_int64(keys_per_multiget, 90, "If use_multiget is true, determines number" " of keys to group per call Arbitrary default is good because it" " agrees with readwritepercent"); @@ -1729,6 +1732,21 @@ class Benchmark { thread->stats.FinishedSingleOp(db_); keys_left -= num_keys; } + } else if (FLAGS_use_tailing_iterator) { // use tailing iterator for gets + options.tailing = true; + Iterator* iter = db_->NewIterator(options); + while (!duration.Done(1)) { + const long long k = thread->rand.Next() % FLAGS_num; + unique_ptr key = GenerateKeyFromInt(k); + + iter->Seek(key.get()); + if (iter->Valid() && iter->key().compare(Slice(key.get())) == 0) { + ++found; + } + + thread->stats.FinishedSingleOp(db_); + } + delete iter; } else { // Regular case. Do one "get" at a time Get Iterator* iter = db_->NewIterator(options); std::string value;