From 1560bb913eb7ab5b4ca77989f84372d34d6acf1b Mon Sep 17 00:00:00 2001
From: Igor Canadi <icanadi@fb.com>
Date: Fri, 7 Feb 2014 09:47:47 -0800
Subject: [PATCH] Readrandom with tailing iterator

Summary:
Added an option for readrandom benchmark to run with tailing iterator instead of Get. Benefit of tailing iterator is that it doesn't require locking DB mutex on access.

I also have some results when running on my machine. The results highly depend on number of cache shards. With our current benchmark setting of 4 table cache shards and 6 block cache shards, I don't see much improvements of using tailing iterator. In that case, we're probably seeing cache mutex contention.

Here are the results for different number of shards

    cache shards       tailing iterator        get
       6                      1.38M           1.16M
      10                      1.58M           1.15M

As soon as we get rid of cache mutex contention, we're seeing big improvements in using tailing iterator vs. ordinary get.

Test Plan: ran regression test

Reviewers: dhruba, haobo, ljin, kailiu, sding

Reviewed By: haobo

CC: tnovak

Differential Revision: https://reviews.facebook.net/D15867
---
 build_tools/regression_build_test.sh | 22 ++++++++++++++++++++++
 db/db_bench.cc                       | 18 ++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/build_tools/regression_build_test.sh b/build_tools/regression_build_test.sh
index d38b67c3c..58766f5df 100755
--- a/build_tools/regression_build_test.sh
+++ b/build_tools/regression_build_test.sh
@@ -117,6 +117,27 @@ make release
     --sync=0 \
     --threads=16 > ${STAT_FILE}.readrandom
 
+# measure readrandom with 6GB block cache and tailing iterator
+./db_bench \
+    --benchmarks=readrandom \
+    --db=$DATA_DIR \
+    --use_existing_db=1 \
+    --bloom_bits=10 \
+    --num=$NUM \
+    --reads=$((NUM / 5)) \
+    --cache_size=6442450944 \
+    --cache_numshardbits=6 \
+    --table_cache_numshardbits=4 \
+    --open_files=55000 \
+    --disable_seek_compaction=1 \
+    --use_tailing_iterator=1 \
+    --statistics=1 \
+    --histogram=1 \
+    --disable_data_sync=1 \
+    --disable_wal=1 \
+    --sync=0 \
+    --threads=16 > ${STAT_FILE}.readrandomtailing
+
 # measure readrandom with 100MB block cache
 ./db_bench \
     --benchmarks=readrandom \
@@ -300,6 +321,7 @@ function send_benchmark_to_ods {
 send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
 send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
 send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
+send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing
 send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache
 send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst
 send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom
diff --git a/db/db_bench.cc b/db/db_bench.cc
index bdf842375..19938e0c1 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -447,6 +447,9 @@ static auto FLAGS_compaction_fadvice_e =
 DEFINE_bool(use_multiget, false,
             "Use multiget to access a series of keys instead of get");
 
+DEFINE_bool(use_tailing_iterator, false,
+            "Use tailing iterator to access a series of keys instead of get");
+
 DEFINE_int64(keys_per_multiget, 90, "If use_multiget is true, determines number"
              " of keys to group per call Arbitrary default is good because it"
              " agrees with readwritepercent");
@@ -1729,6 +1732,21 @@ class Benchmark {
         thread->stats.FinishedSingleOp(db_);
         keys_left -= num_keys;
       }
+    } else if (FLAGS_use_tailing_iterator) {  // use tailing iterator for gets
+      options.tailing = true;
+      Iterator* iter = db_->NewIterator(options);
+      while (!duration.Done(1)) {
+        const long long k = thread->rand.Next() % FLAGS_num;
+        unique_ptr<char[]> key = GenerateKeyFromInt(k);
+
+        iter->Seek(key.get());
+        if (iter->Valid() && iter->key().compare(Slice(key.get())) == 0) {
+          ++found;
+        }
+
+        thread->stats.FinishedSingleOp(db_);
+      }
+      delete iter;
     } else {    // Regular case. Do one "get" at a time Get
       Iterator* iter = db_->NewIterator(options);
       std::string value;