From 792762c42c231b037a6dc402511c5e591c9878e0 Mon Sep 17 00:00:00 2001
From: Yi Wu <yiwu@fb.com>
Date: Mon, 18 Apr 2016 09:42:50 -0700
Subject: [PATCH] Split db_test.cc

Summary: Split db_test.cc into several files. Moving several helper functions into DBTestBase.

Test Plan: make check

Reviewers: sdong, yhchiang, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: dhruba, andrewkr, kradhakrishnan, yhchiang, leveldb, sdong

Differential Revision: https://reviews.facebook.net/D56715
---
 CMakeLists.txt                     |     3 +
 Makefile                           |    12 +
 db/db_block_cache_test.cc          |   221 +-
 db/db_bloom_filter_test.cc         |  1047 +++
 db/db_compaction_test.cc           |    28 -
 db/db_iterator_test.cc             |  1319 +++
 db/db_sst_test.cc                  |  1300 +++
 db/db_test.cc                      | 13031 +++++++++------------------
 db/db_test2.cc                     |     5 -
 db/db_test_util.cc                 |    30 +
 db/db_test_util.h                  |    37 +
 db/db_universal_compaction_test.cc |     5 -
 db/db_wal_test.cc                  |   628 ++
 src.mk                             |     6 +-
 14 files changed, 8844 insertions(+), 8828 deletions(-)
 create mode 100644 db/db_bloom_filter_test.cc
 create mode 100644 db/db_iterator_test.cc
 create mode 100644 db/db_sst_test.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2490d8208..0de146867 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -342,6 +342,9 @@ set(TESTS
         db/db_test.cc
         db/db_test2.cc
         db/db_block_cache_test.cc
+        db/db_bloom_filter_test.cc
+        db/db_iterator_test.cc
+        db/db_sst_test.cc
         db/db_universal_compaction_test.cc
         db/db_wal_test.cc
         db/dbformat_test.cc
diff --git a/Makefile b/Makefile
index 8325ba36a..cd5e3a5dd 100644
--- a/Makefile
+++ b/Makefile
@@ -258,12 +258,15 @@ TESTS = \
 	db_test \
 	db_test2 \
 	db_block_cache_test \
+	db_bloom_filter_test \
 	db_iter_test \
 	db_log_iter_test \
 	db_compaction_filter_test \
 	db_compaction_test \
 	db_dynamic_level_test \
 	db_inplace_update_test \
+	db_iterator_test \
+	db_sst_test \
 	db_tailing_iter_test \
 	db_universal_compaction_test \
 	db_wal_test \
@@ -872,6 +875,9 @@ db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
 db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(AM_LINK)
 
+db_bloom_filter_test: db/db_bloom_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(AM_LINK)
+
 db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(AM_LINK)
 
@@ -887,6 +893,12 @@ db_dynamic_level_test: db/db_dynamic_level_test.o db/db_test_util.o $(LIBOBJECTS
 db_inplace_update_test: db/db_inplace_update_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(AM_LINK)
 
+db_iterator_test: db/db_iterator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(AM_LINK)
+
+db_sst_test: db/db_sst_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(AM_LINK)
+
 db_tailing_iter_test: db/db_tailing_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(AM_LINK)
 
diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc
index 939cf44e0..1e2831e5b 100644
--- a/db/db_block_cache_test.cc
+++ b/db/db_block_cache_test.cc
@@ -12,11 +12,6 @@
 
 namespace rocksdb {
 
-static uint64_t TestGetTickerCount(const Options& options,
-                                   Tickers ticker_type) {
-  return options.statistics->getTickerCount(ticker_type);
-}
-
 class DBBlockCacheTest : public DBTestBase {
  private:
   size_t miss_count_ = 0;
@@ -229,6 +224,222 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
   delete iter;
   iter = nullptr;
 }
+
+// Make sure that when options.block_cache is set, after a new table is
+// created its index/filter blocks are added to block cache.
+TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
+  options.statistics = rocksdb::CreateDBStatistics();
+  BlockBasedTableOptions table_options;
+  table_options.cache_index_and_filter_blocks = true;
+  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
+  options.table_factory.reset(new BlockBasedTableFactory(table_options));
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  ASSERT_OK(Put(1, "key", "val"));
+  // Create a new table.
+  ASSERT_OK(Flush(1));
+
+  // index/filter blocks added to block cache right after table creation.
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(2, /* only index/filter were added */
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
+  uint64_t int_num;
+  ASSERT_TRUE(
+      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
+  ASSERT_EQ(int_num, 0U);
+
+  // Make sure filter block is in cache.
+  std::string value;
+  ReadOptions ropt;
+  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
+
+  // Miss count should remain the same.
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+  // Make sure index block is in cache.
+  auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+  value = Get(1, "key");
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(index_block_hit + 1,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+  value = Get(1, "key");
+  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(index_block_hit + 2,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+}
+
+TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.level0_file_num_compaction_trigger = 2;
+  options.paranoid_file_checks = true;
+  BlockBasedTableOptions table_options;
+  table_options.cache_index_and_filter_blocks = false;
+  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
+  options.table_factory.reset(new BlockBasedTableFactory(table_options));
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  ASSERT_OK(Put(1, "1_key", "val"));
+  ASSERT_OK(Put(1, "9_key", "val"));
+  // Create a new table.
+  ASSERT_OK(Flush(1));
+  ASSERT_EQ(1, /* read and cache data block */
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+  ASSERT_OK(Put(1, "1_key2", "val2"));
+  ASSERT_OK(Put(1, "9_key2", "val2"));
+  // Create a new SST file. This will further trigger a compaction
+  // and generate another file.
+  ASSERT_OK(Flush(1));
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ(3, /* Totally 3 files created up to now */
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+  // After disabling options.paranoid_file_checks. NO further block
+  // is added after generating a new file.
+  ASSERT_OK(
+      dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
+
+  ASSERT_OK(Put(1, "1_key3", "val3"));
+  ASSERT_OK(Put(1, "9_key3", "val3"));
+  ASSERT_OK(Flush(1));
+  ASSERT_OK(Put(1, "1_key4", "val4"));
+  ASSERT_OK(Put(1, "9_key4", "val4"));
+  ASSERT_OK(Flush(1));
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ(3, /* Totally 3 files created up to now */
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+}
+
+TEST_F(DBBlockCacheTest, CompressedCache) {
+  if (!Snappy_Supported()) {
+    return;
+  }
+  int num_iter = 80;
+
+  // Run this test three iterations.
+  // Iteration 1: only a uncompressed block cache
+  // Iteration 2: only a compressed block cache
+  // Iteration 3: both block cache and compressed cache
+  // Iteration 4: both block cache and compressed cache, but DB is not
+  // compressed
+  for (int iter = 0; iter < 4; iter++) {
+    Options options = CurrentOptions();
+    options.write_buffer_size = 64 * 1024;  // small write buffer
+    options.statistics = rocksdb::CreateDBStatistics();
+
+    BlockBasedTableOptions table_options;
+    switch (iter) {
+      case 0:
+        // only uncompressed block cache
+        table_options.block_cache = NewLRUCache(8 * 1024);
+        table_options.block_cache_compressed = nullptr;
+        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+        break;
+      case 1:
+        // no block cache, only compressed cache
+        table_options.no_block_cache = true;
+        table_options.block_cache = nullptr;
+        table_options.block_cache_compressed = NewLRUCache(8 * 1024);
+        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+        break;
+      case 2:
+        // both compressed and uncompressed block cache
+        table_options.block_cache = NewLRUCache(1024);
+        table_options.block_cache_compressed = NewLRUCache(8 * 1024);
+        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+        break;
+      case 3:
+        // both block cache and compressed cache, but DB is not compressed
+        // also, make block cache sizes bigger, to trigger block cache hits
+        table_options.block_cache = NewLRUCache(1024 * 1024);
+        table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
+        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+        options.compression = kNoCompression;
+        break;
+      default:
+        ASSERT_TRUE(false);
+    }
+    CreateAndReopenWithCF({"pikachu"}, options);
+    // default column family doesn't have block cache
+    Options no_block_cache_opts;
+    no_block_cache_opts.statistics = options.statistics;
+    no_block_cache_opts = CurrentOptions(no_block_cache_opts);
+    BlockBasedTableOptions table_options_no_bc;
+    table_options_no_bc.no_block_cache = true;
+    no_block_cache_opts.table_factory.reset(
+        NewBlockBasedTableFactory(table_options_no_bc));
+    ReopenWithColumnFamilies(
+        {"default", "pikachu"},
+        std::vector<Options>({no_block_cache_opts, options}));
+
+    Random rnd(301);
+
+    // Write 8MB (80 values, each 100K)
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    std::vector<std::string> values;
+    std::string str;
+    for (int i = 0; i < num_iter; i++) {
+      if (i % 4 == 0) {  // high compression ratio
+        str = RandomString(&rnd, 1000);
+      }
+      values.push_back(str);
+      ASSERT_OK(Put(1, Key(i), values[i]));
+    }
+
+    // flush all data from memtable so that reads are from block cache
+    ASSERT_OK(Flush(1));
+
+    for (int i = 0; i < num_iter; i++) {
+      ASSERT_EQ(Get(1, Key(i)), values[i]);
+    }
+
+    // check that we triggered the appropriate code paths in the cache
+    switch (iter) {
+      case 0:
+        // only uncompressed block cache
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
+        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
+        break;
+      case 1:
+        // no block cache, only compressed cache
+        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
+        break;
+      case 2:
+        // both compressed and uncompressed block cache
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
+        break;
+      case 3:
+        // both compressed and uncompressed block cache
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
+        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
+        // compressed doesn't have any hits since blocks are not compressed on
+        // storage
+        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
+        break;
+      default:
+        ASSERT_TRUE(false);
+    }
+
+    options.create_if_missing = true;
+    DestroyAndReopen(options);
+  }
+}
+
 #endif
 
 }  // namespace rocksdb
diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc
new file mode 100644
index 000000000..b9a86c31f
--- /dev/null
+++ b/db/db_bloom_filter_test.cc
@@ -0,0 +1,1047 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_test_util.h"
+#include "port/stack_trace.h"
+#include "rocksdb/perf_context.h"
+
+namespace rocksdb {
+
+// DB tests related to bloom filter.
+
+class DBBloomFilterTest : public DBTestBase {
+ public:
+  DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {}
+};
+
+// KeyMayExist can lead to a few false positives, but not false negatives.
+// To make test deterministic, use a much larger number of bits per key-20 than
+// bits in the key, so that false positives are eliminated
+TEST_F(DBBloomFilterTest, KeyMayExist) {
+  do {
+    ReadOptions ropts;
+    std::string value;
+    anon::OptionsOverride options_override;
+    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
+    Options options = CurrentOptions(options_override);
+    options.statistics = rocksdb::CreateDBStatistics();
+    CreateAndReopenWithCF({"pikachu"}, options);
+
+    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+
+    ASSERT_OK(Put(1, "a", "b"));
+    bool value_found = false;
+    ASSERT_TRUE(
+        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
+    ASSERT_TRUE(value_found);
+    ASSERT_EQ("b", value);
+
+    ASSERT_OK(Flush(1));
+    value.clear();
+
+    uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    ASSERT_TRUE(
+        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
+    ASSERT_TRUE(!value_found);
+    // assert that no new files were opened and no new blocks were
+    // read into block cache.
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+    ASSERT_OK(Delete(1, "a"));
+
+    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+    ASSERT_OK(Flush(1));
+    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
+                                true /* disallow trivial move */);
+
+    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+    ASSERT_OK(Delete(1, "c"));
+
+    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value));
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+    // KeyMayExist function only checks data in block caches, which is not used
+    // by plain table format.
+  } while (
+      ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction));
+}
+
+// A delete is skipped for key if KeyMayExist(key) returns False
+// Tests Writebatch consistency and proper delete behaviour
+TEST_F(DBBloomFilterTest, FilterDeletes) {
+  do {
+    anon::OptionsOverride options_override;
+    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
+    Options options = CurrentOptions(options_override);
+    options.filter_deletes = true;
+    CreateAndReopenWithCF({"pikachu"}, options);
+    WriteBatch batch;
+
+    batch.Delete(handles_[1], "a");
+    dbfull()->Write(WriteOptions(), &batch);
+    ASSERT_EQ(AllEntriesFor("a", 1), "[ ]");  // Delete skipped
+    batch.Clear();
+
+    batch.Put(handles_[1], "a", "b");
+    batch.Delete(handles_[1], "a");
+    dbfull()->Write(WriteOptions(), &batch);
+    ASSERT_EQ(Get(1, "a"), "NOT_FOUND");
+    ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]");  // Delete issued
+    batch.Clear();
+
+    batch.Delete(handles_[1], "c");
+    batch.Put(handles_[1], "c", "d");
+    dbfull()->Write(WriteOptions(), &batch);
+    ASSERT_EQ(Get(1, "c"), "d");
+    ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]");  // Delete skipped
+    batch.Clear();
+
+    ASSERT_OK(Flush(1));  // A stray Flush
+
+    batch.Delete(handles_[1], "c");
+    dbfull()->Write(WriteOptions(), &batch);
+    ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]");  // Delete issued
+    batch.Clear();
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
+  Options options = last_options_;
+  options.prefix_extractor.reset(NewFixedPrefixTransform(8));
+  options.statistics = rocksdb::CreateDBStatistics();
+  BlockBasedTableOptions bbto;
+  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  bbto.whole_key_filtering = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  DestroyAndReopen(options);
+
+  WriteOptions wo;
+  ReadOptions ro;
+  FlushOptions fo;
+  fo.wait = true;
+  std::string value;
+
+  ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
+  ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
+  ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
+
+  dbfull()->Flush(fo);
+
+  ASSERT_EQ("foo", Get("barbarbar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+  ASSERT_EQ("foo2", Get("barbarbar2"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+  ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+
+  ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+  ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
+}
+
+TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
+  Options options = last_options_;
+  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+  options.statistics = rocksdb::CreateDBStatistics();
+
+  BlockBasedTableOptions bbto;
+  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  bbto.whole_key_filtering = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  DestroyAndReopen(options);
+
+  WriteOptions wo;
+  ReadOptions ro;
+  FlushOptions fo;
+  fo.wait = true;
+  std::string value;
+
+  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+  // Needs insert some keys to make sure files are not filtered out by key
+  // ranges.
+  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+  dbfull()->Flush(fo);
+
+  Reopen(options);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+  // Reopen with whole key filtering enabled and prefix extractor
+  // NULL. Bloom filter should be off for both of whole key and
+  // prefix bloom.
+  bbto.whole_key_filtering = true;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  options.prefix_extractor.reset();
+  Reopen(options);
+
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  // Write DB with only full key filtering.
+  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+  // Needs insert some keys to make sure files are not filtered out by key
+  // ranges.
+  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+  db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+
+  // Reopen with both of whole key off and prefix extractor enabled.
+  // Still no bloom filter should be used.
+  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+  bbto.whole_key_filtering = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  Reopen(options);
+
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+
+  // Try to create a DB with mixed files:
+  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
+  // Needs insert some keys to make sure files are not filtered out by key
+  // ranges.
+  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+  db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+
+  options.prefix_extractor.reset();
+  bbto.whole_key_filtering = true;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  Reopen(options);
+
+  // Try to create a DB with mixed files.
+  ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
+  // In this case needs insert some keys to make sure files are
+  // not filtered out by key ranges.
+  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
+  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
+  Flush();
+
+  // Now we have two files:
+  // File 1: An older file with prefix bloom.
+  // File 2: A newer file with whole bloom filter.
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+  ASSERT_EQ("bar", Get("barfoo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+
+  // Reopen with the same setting: only whole key is used
+  Reopen(options);
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+  ASSERT_EQ("bar", Get("barfoo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+
+  // Restart with both filters are allowed
+  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+  bbto.whole_key_filtering = true;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  Reopen(options);
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
+  // File 1 will has it filtered out.
+  // File 2 will not, as prefix `foo` exists in the file.
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+  ASSERT_EQ("bar", Get("barfoo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+
+  // Restart with only prefix bloom is allowed.
+  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
+  bbto.whole_key_filtering = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  Reopen(options);
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+  ASSERT_EQ("NOT_FOUND", Get("foo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
+  ASSERT_EQ("NOT_FOUND", Get("bar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+  ASSERT_EQ("foo", Get("foobar"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+  ASSERT_EQ("bar", Get("barfoo"));
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
+}
+
+TEST_F(DBBloomFilterTest, BloomFilter) {
+  do {
+    Options options = CurrentOptions();
+    env_->count_random_reads_ = true;
+    options.env = env_;
+    // ChangeCompactOptions() only changes compaction style, which does not
+    // trigger reset of table_factory
+    BlockBasedTableOptions table_options;
+    table_options.no_block_cache = true;
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+    CreateAndReopenWithCF({"pikachu"}, options);
+
+    // Populate multiple layers
+    const int N = 10000;
+    for (int i = 0; i < N; i++) {
+      ASSERT_OK(Put(1, Key(i), Key(i)));
+    }
+    Compact(1, "a", "z");
+    for (int i = 0; i < N; i += 100) {
+      ASSERT_OK(Put(1, Key(i), Key(i)));
+    }
+    Flush(1);
+
+    // Prevent auto compactions triggered by seeks
+    env_->delay_sstable_sync_.store(true, std::memory_order_release);
+
+    // Lookup present keys.  Should rarely read from small sstable.
+    env_->random_read_counter_.Reset();
+    for (int i = 0; i < N; i++) {
+      ASSERT_EQ(Key(i), Get(1, Key(i)));
+    }
+    int reads = env_->random_read_counter_.Read();
+    fprintf(stderr, "%d present => %d reads\n", N, reads);
+    ASSERT_GE(reads, N);
+    ASSERT_LE(reads, N + 2 * N / 100);
+
+    // Lookup present keys.  Should rarely read from either sstable.
+    env_->random_read_counter_.Reset();
+    for (int i = 0; i < N; i++) {
+      ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing"));
+    }
+    reads = env_->random_read_counter_.Read();
+    fprintf(stderr, "%d missing => %d reads\n", N, reads);
+    ASSERT_LE(reads, 3 * N / 100);
+
+    env_->delay_sstable_sync_.store(false, std::memory_order_release);
+    Close();
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBBloomFilterTest, BloomFilterRate) {
+  while (ChangeFilterOptions()) {
+    Options options = CurrentOptions();
+    options.statistics = rocksdb::CreateDBStatistics();
+    CreateAndReopenWithCF({"pikachu"}, options);
+
+    const int maxKey = 10000;
+    for (int i = 0; i < maxKey; i++) {
+      ASSERT_OK(Put(1, Key(i), Key(i)));
+    }
+    // Add a large key to make the file contain wide range
+    ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+    Flush(1);
+
+    // Check if they can be found
+    for (int i = 0; i < maxKey; i++) {
+      ASSERT_EQ(Key(i), Get(1, Key(i)));
+    }
+    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+
+    // Check if filter is useful
+    for (int i = 0; i < maxKey; i++) {
+      ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
+    }
+    ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98);
+  }
+}
+
+TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
+  Options options = CurrentOptions();
+  options.statistics = rocksdb::CreateDBStatistics();
+  BlockBasedTableOptions table_options;
+  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  // Create with block based filter
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  const int maxKey = 10000;
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_OK(Put(1, Key(i), Key(i)));
+  }
+  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+  Flush(1);
+
+  // Check db with full filter
+  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  ReopenWithColumnFamilies({"default", "pikachu"}, options);
+
+  // Check if they can be found
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_EQ(Key(i), Get(1, Key(i)));
+  }
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+}
+
+TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) {
+  Options options = CurrentOptions();
+  options.statistics = rocksdb::CreateDBStatistics();
+  BlockBasedTableOptions table_options;
+  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  // Create with full filter
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  const int maxKey = 10000;
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_OK(Put(1, Key(i), Key(i)));
+  }
+  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+  Flush(1);
+
+  // Check db with block_based filter
+  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  ReopenWithColumnFamilies({"default", "pikachu"}, options);
+
+  // Check if they can be found
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_EQ(Key(i), Get(1, Key(i)));
+  }
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+}
+
+namespace {
+// A wrapped bloom over default FilterPolicy
+class WrappedBloom : public FilterPolicy {
+ public:
+  explicit WrappedBloom(int bits_per_key)
+      : filter_(NewBloomFilterPolicy(bits_per_key)), counter_(0) {}
+
+  ~WrappedBloom() { delete filter_; }
+
+  const char* Name() const override { return "WrappedRocksDbFilterPolicy"; }
+
+  void CreateFilter(const rocksdb::Slice* keys, int n,
+                    std::string* dst) const override {
+    std::unique_ptr<rocksdb::Slice[]> user_keys(new rocksdb::Slice[n]);
+    for (int i = 0; i < n; ++i) {
+      user_keys[i] = convertKey(keys[i]);
+    }
+    return filter_->CreateFilter(user_keys.get(), n, dst);
+  }
+
+  bool KeyMayMatch(const rocksdb::Slice& key,
+                   const rocksdb::Slice& filter) const override {
+    counter_++;
+    return filter_->KeyMayMatch(convertKey(key), filter);
+  }
+
+  uint32_t GetCounter() { return counter_; }
+
+ private:
+  const FilterPolicy* filter_;
+  mutable uint32_t counter_;
+
+  rocksdb::Slice convertKey(const rocksdb::Slice& key) const { return key; }
+};
+}  // namespace
+
+TEST_F(DBBloomFilterTest, BloomFilterWrapper) {
+  Options options = CurrentOptions();
+  options.statistics = rocksdb::CreateDBStatistics();
+
+  BlockBasedTableOptions table_options;
+  WrappedBloom* policy = new WrappedBloom(10);
+  table_options.filter_policy.reset(policy);
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  const int maxKey = 10000;
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_OK(Put(1, Key(i), Key(i)));
+  }
+  // Add a large key to make the file contain wide range
+  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
+  ASSERT_EQ(0U, policy->GetCounter());
+  Flush(1);
+
+  // Check if they can be found
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_EQ(Key(i), Get(1, Key(i)));
+  }
+  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+  ASSERT_EQ(1U * maxKey, policy->GetCounter());
+
+  // Check if filter is useful
+  for (int i = 0; i < maxKey; i++) {
+    ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333)));
+  }
+  ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98);
+  ASSERT_EQ(2U * maxKey, policy->GetCounter());
+}
+
+class SliceTransformLimitedDomain : public SliceTransform {
+  const char* Name() const override { return "SliceTransformLimitedDomain"; }
+
+  Slice Transform(const Slice& src) const override {
+    return Slice(src.data(), 5);
+  }
+
+  bool InDomain(const Slice& src) const override {
+    // prefix will be x????
+    return src.size() >= 5 && src[0] == 'x';
+  }
+
+  bool InRange(const Slice& dst) const override {
+    // prefix will be x????
+    return dst.size() == 5 && dst[0] == 'x';
+  }
+};
+
+TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) {
+  BlockBasedTableOptions bbto;
+  // Full Filter Block
+  bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
+  bbto.whole_key_filtering = false;
+
+  Options options = CurrentOptions();
+  options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  DestroyAndReopen(options);
+
+  ASSERT_OK(Put("x1111_AAAA", "val1"));
+  ASSERT_OK(Put("x1112_AAAA", "val2"));
+  ASSERT_OK(Put("x1113_AAAA", "val3"));
+  ASSERT_OK(Put("x1114_AAAA", "val4"));
+  // Not in domain, wont be added to filter
+  ASSERT_OK(Put("zzzzz_AAAA", "val5"));
+
+  ASSERT_OK(Flush());
+
+  ASSERT_EQ(Get("x1111_AAAA"), "val1");
+  ASSERT_EQ(Get("x1112_AAAA"), "val2");
+  ASSERT_EQ(Get("x1113_AAAA"), "val3");
+  ASSERT_EQ(Get("x1114_AAAA"), "val4");
+  // Was not added to filter but rocksdb will try to read it from the filter
+  ASSERT_EQ(Get("zzzzz_AAAA"), "val5");
+}
+
+TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) {
+  BlockBasedTableOptions bbto;
+  // Block Filter Block
+  bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
+
+  Options options = CurrentOptions();
+  options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  DestroyAndReopen(options);
+
+  ASSERT_OK(Put("x1113_AAAA", "val3"));
+  ASSERT_OK(Put("x1114_AAAA", "val4"));
+  // Not in domain, wont be added to filter
+  ASSERT_OK(Put("zzzzz_AAAA", "val1"));
+  ASSERT_OK(Put("zzzzz_AAAB", "val2"));
+  ASSERT_OK(Put("zzzzz_AAAC", "val3"));
+  ASSERT_OK(Put("zzzzz_AAAD", "val4"));
+
+  ASSERT_OK(Flush());
+
+  std::vector<std::string> iter_res;
+  auto iter = db_->NewIterator(ReadOptions());
+  // Seek to a key that was not in Domain
+  for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) {
+    iter_res.emplace_back(iter->value().ToString());
+  }
+
+  std::vector<std::string> expected_res = {"val1", "val2", "val3", "val4"};
+  ASSERT_EQ(iter_res, expected_res);
+  delete iter;
+}
+
+#ifndef ROCKSDB_LITE
+class BloomStatsTestWithParam
+    : public DBBloomFilterTest,
+      public testing::WithParamInterface<std::tuple<bool, bool>> {
+ public:
+  BloomStatsTestWithParam() {
+    use_block_table_ = std::get<0>(GetParam());
+    use_block_based_builder_ = std::get<1>(GetParam());
+
+    options_.create_if_missing = true;
+    options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
+    options_.memtable_prefix_bloom_bits = 8 * 1024;
+    if (use_block_table_) {
+      BlockBasedTableOptions table_options;
+      table_options.hash_index_allow_collision = false;
+      table_options.filter_policy.reset(
+          NewBloomFilterPolicy(10, use_block_based_builder_));
+      options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    } else {
+      PlainTableOptions table_options;
+      options_.table_factory.reset(NewPlainTableFactory(table_options));
+    }
+
+    perf_context.Reset();
+    DestroyAndReopen(options_);
+  }
+
+  ~BloomStatsTestWithParam() {
+    perf_context.Reset();
+    Destroy(options_);
+  }
+
+  // Required if inheriting from testing::WithParamInterface<>
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  bool use_block_table_;
+  bool use_block_based_builder_;
+  Options options_;
+};
+
+// 1 Insert 2 K-V pairs into DB
+// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2
+// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1
+// 4 Call Flush() to create SST
+// 5 Call Get() for both keys - expext SST bloom hit stat to be 2
+// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1
+// Test both: block and plain SST
+TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
+  std::string key1("AAAA");
+  std::string key2("RXDB");  // not in DB
+  std::string key3("ZBRA");
+  std::string value1("Value1");
+  std::string value3("Value3");
+
+  ASSERT_OK(Put(key1, value1, WriteOptions()));
+  ASSERT_OK(Put(key3, value3, WriteOptions()));
+
+  // check memtable bloom stats
+  ASSERT_EQ(value1, Get(key1));
+  ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
+  ASSERT_EQ(value3, Get(key3));
+  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
+  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
+
+  ASSERT_EQ("NOT_FOUND", Get(key2));
+  ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
+  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
+
+  // sanity checks
+  ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
+  ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
+
+  Flush();
+
+  // sanity checks
+  ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
+  ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
+
+  // check SST bloom stats
+  // NOTE: hits per get differs because of code paths differences
+  // in BlockBasedTable::Get()
+  int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1;
+  ASSERT_EQ(value1, Get(key1));
+  ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count);
+  ASSERT_EQ(value3, Get(key3));
+  ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count);
+
+  ASSERT_EQ("NOT_FOUND", Get(key2));
+  ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
+}
+
+// Same scenario as in BloomStatsTest but using an iterator
+TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
+  std::string key1("AAAA");
+  std::string key2("RXDB");  // not in DB
+  std::string key3("ZBRA");
+  std::string value1("Value1");
+  std::string value3("Value3");
+
+  ASSERT_OK(Put(key1, value1, WriteOptions()));
+  ASSERT_OK(Put(key3, value3, WriteOptions()));
+
+  unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
+
+  // check memtable bloom stats
+  iter->Seek(key1);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(value1, iter->value().ToString());
+  ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
+  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
+
+  iter->Seek(key3);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(value3, iter->value().ToString());
+  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
+  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
+
+  iter->Seek(key2);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(!iter->Valid());
+  ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
+  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
+
+  Flush();
+
+  iter.reset(dbfull()->NewIterator(ReadOptions()));
+
+  // Check SST bloom stats
+  iter->Seek(key1);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(value1, iter->value().ToString());
+  ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
+
+  iter->Seek(key3);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(value3, iter->value().ToString());
+  ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
+
+  iter->Seek(key2);
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(!iter->Valid());
+  ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
+  ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
+}
+
+INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
+                        ::testing::Values(std::make_tuple(true, true),
+                                          std::make_tuple(true, false),
+                                          std::make_tuple(false, false)));
+
+namespace {
+void PrefixScanInit(DBBloomFilterTest* dbtest) {
+  char buf[100];
+  std::string keystr;
+  const int small_range_sstfiles = 5;
+  const int big_range_sstfiles = 5;
+
+  // Generate 11 sst files with the following prefix ranges.
+  // GROUP 0: [0,10]                              (level 1)
+  // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6]  (level 0)
+  // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10]  (level 0)
+  //
+  // A seek with the previous API would do 11 random I/Os (to all the
+  // files).  With the new API and a prefix filter enabled, we should
+  // only do 2 random I/O, to the 2 files containing the key.
+
+  // GROUP 0
+  snprintf(buf, sizeof(buf), "%02d______:start", 0);
+  keystr = std::string(buf);
+  ASSERT_OK(dbtest->Put(keystr, keystr));
+  snprintf(buf, sizeof(buf), "%02d______:end", 10);
+  keystr = std::string(buf);
+  ASSERT_OK(dbtest->Put(keystr, keystr));
+  dbtest->Flush();
+  dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr,
+                                 nullptr);  // move to level 1
+
+  // GROUP 1
+  for (int i = 1; i <= small_range_sstfiles; i++) {
+    snprintf(buf, sizeof(buf), "%02d______:start", i);
+    keystr = std::string(buf);
+    ASSERT_OK(dbtest->Put(keystr, keystr));
+    snprintf(buf, sizeof(buf), "%02d______:end", i + 1);
+    keystr = std::string(buf);
+    ASSERT_OK(dbtest->Put(keystr, keystr));
+    dbtest->Flush();
+  }
+
+  // GROUP 2
+  for (int i = 1; i <= big_range_sstfiles; i++) {
+    snprintf(buf, sizeof(buf), "%02d______:start", 0);
+    keystr = std::string(buf);
+    ASSERT_OK(dbtest->Put(keystr, keystr));
+    snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1);
+    keystr = std::string(buf);
+    ASSERT_OK(dbtest->Put(keystr, keystr));
+    dbtest->Flush();
+  }
+}
+}  // namespace
+
+TEST_F(DBBloomFilterTest, PrefixScan) {
+  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip,
+             kSkipNoPrefix);
+  while (ChangeFilterOptions()) {
+    int count;
+    Slice prefix;
+    Slice key;
+    char buf[100];
+    Iterator* iter;
+    snprintf(buf, sizeof(buf), "03______:");
+    prefix = Slice(buf, 8);
+    key = Slice(buf, 9);
+    ASSERT_EQ(key.difference_offset(prefix), 8);
+    ASSERT_EQ(prefix.difference_offset(key), 8);
+    // db configs
+    env_->count_random_reads_ = true;
+    Options options = CurrentOptions();
+    options.env = env_;
+    options.prefix_extractor.reset(NewFixedPrefixTransform(8));
+    options.disable_auto_compactions = true;
+    options.max_background_compactions = 2;
+    options.create_if_missing = true;
+    options.memtable_factory.reset(NewHashSkipListRepFactory(16));
+
+    BlockBasedTableOptions table_options;
+    table_options.no_block_cache = true;
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
+    table_options.whole_key_filtering = false;
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+    // 11 RAND I/Os
+    DestroyAndReopen(options);
+    PrefixScanInit(this);
+    count = 0;
+    env_->random_read_counter_.Reset();
+    iter = db_->NewIterator(ReadOptions());
+    for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
+      if (!iter->key().starts_with(prefix)) {
+        break;
+      }
+      count++;
+    }
+    ASSERT_OK(iter->status());
+    delete iter;
+    ASSERT_EQ(count, 2);
+    ASSERT_EQ(env_->random_read_counter_.Read(), 2);
+    Close();
+  }  // end of while
+  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0);
+}
+
+TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 64 * 1024;
+  options.arena_block_size = 4 * 1024;
+  options.target_file_size_base = 64 * 1024;
+  options.level0_file_num_compaction_trigger = 2;
+  options.level0_slowdown_writes_trigger = 2;
+  options.level0_stop_writes_trigger = 4;
+  options.max_bytes_for_level_base = 256 * 1024;
+  options.max_write_buffer_number = 2;
+  options.max_background_compactions = 8;
+  options.max_background_flushes = 8;
+  options.compression = kNoCompression;
+  options.compaction_style = kCompactionStyleLevel;
+  options.level_compaction_dynamic_level_bytes = true;
+  BlockBasedTableOptions bbto;
+  bbto.cache_index_and_filter_blocks = true;
+  bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
+  bbto.whole_key_filtering = true;
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  options.optimize_filters_for_hits = true;
+  options.statistics = rocksdb::CreateDBStatistics();
+  CreateAndReopenWithCF({"mypikachu"}, options);
+
+  int numkeys = 200000;
+
+  // Generate randomly shuffled keys, so the updates are almost
+  // random.
+  std::vector<int> keys;
+  keys.reserve(numkeys);
+  for (int i = 0; i < numkeys; i += 2) {
+    keys.push_back(i);
+  }
+  std::random_shuffle(std::begin(keys), std::end(keys));
+
+  int num_inserted = 0;
+  for (int key : keys) {
+    ASSERT_OK(Put(1, Key(key), "val"));
+    if (++num_inserted % 1000 == 0) {
+      dbfull()->TEST_WaitForFlushMemTable();
+      dbfull()->TEST_WaitForCompact();
+    }
+  }
+  ASSERT_OK(Put(1, Key(0), "val"));
+  ASSERT_OK(Put(1, Key(numkeys), "val"));
+  ASSERT_OK(Flush(1));
+  dbfull()->TEST_WaitForCompact();
+
+  if (NumTableFilesAtLevel(0, 1) == 0) {
+    // No Level 0 file. Create one.
+    ASSERT_OK(Put(1, Key(0), "val"));
+    ASSERT_OK(Put(1, Key(numkeys), "val"));
+    ASSERT_OK(Flush(1));
+    dbfull()->TEST_WaitForCompact();
+  }
+
+  for (int i = 1; i < numkeys; i += 2) {
+    ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
+  }
+
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
+
+  // Now we have three sorted run, L0, L5 and L6 with most files in L6 have
+  // no bloom filter. Most keys be checked bloom filters twice.
+  ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2);
+  ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2);
+
+  for (int i = 0; i < numkeys; i += 2) {
+    ASSERT_EQ(Get(1, Key(i)), "val");
+  }
+
+  // Part 2 (read path): rewrite last level with blooms, then verify they get
+  // cached only if !optimize_filters_for_hits
+  options.disable_auto_compactions = true;
+  options.num_levels = 9;
+  options.optimize_filters_for_hits = false;
+  options.statistics = CreateDBStatistics();
+  bbto.block_cache.reset();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+  MoveFilesToLevel(7 /* level */, 1 /* column family index */);
+
+  std::string value = Get(1, Key(0));
+  uint64_t prev_cache_filter_hits =
+      TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+  value = Get(1, Key(0));
+  ASSERT_EQ(prev_cache_filter_hits + 1,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+  // Now that we know the filter blocks exist in the last level files, see if
+  // filter caching is skipped for this optimization
+  options.optimize_filters_for_hits = true;
+  options.statistics = CreateDBStatistics();
+  bbto.block_cache.reset();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+  value = Get(1, Key(0));
+  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+  ASSERT_EQ(2 /* index and data block */,
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+
+  // Check filter block ignored for files preloaded during DB::Open()
+  options.max_open_files = -1;
+  options.statistics = CreateDBStatistics();
+  bbto.block_cache.reset();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+  uint64_t prev_cache_filter_misses =
+      TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+  prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+  Get(1, Key(0));
+  ASSERT_EQ(prev_cache_filter_misses,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(prev_cache_filter_hits,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+
+  // Check filter block ignored for file trivially-moved to bottom level
+  bbto.block_cache.reset();
+  options.max_open_files = 100;  // setting > -1 makes it not preload all files
+  options.statistics = CreateDBStatistics();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+  ASSERT_OK(Put(1, Key(numkeys + 1), "val"));
+  ASSERT_OK(Flush(1));
+
+  int32_t trivial_move = 0;
+  int32_t non_trivial_move = 0;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::BackgroundCompaction:TrivialMove",
+      [&](void* arg) { trivial_move++; });
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::BackgroundCompaction:NonTrivial",
+      [&](void* arg) { non_trivial_move++; });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  CompactRangeOptions compact_options;
+  compact_options.bottommost_level_compaction =
+      BottommostLevelCompaction::kSkip;
+  compact_options.change_level = true;
+  compact_options.target_level = 7;
+  db_->CompactRange(compact_options, handles_[1], nullptr, nullptr);
+
+  ASSERT_EQ(trivial_move, 1);
+  ASSERT_EQ(non_trivial_move, 0);
+
+  prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
+  prev_cache_filter_misses =
+      TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
+  value = Get(1, Key(numkeys + 1));
+  ASSERT_EQ(prev_cache_filter_hits,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+  ASSERT_EQ(prev_cache_filter_misses,
+            TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+
+  // Check filter block not cached for iterator
+  bbto.block_cache.reset();
+  options.statistics = CreateDBStatistics();
+  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+
+  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
+
+  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions(), handles_[1]));
+  iter->SeekToFirst();
+  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
+  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
+  ASSERT_EQ(2 /* index and data block */,
+            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+}
+
+#endif  // ROCKSDB_LITE
+
+}  // namespace rocksdb
+
+int main(int argc, char** argv) {
+  rocksdb::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc
index 677cacb22..c794407e9 100644
--- a/db/db_compaction_test.cc
+++ b/db/db_compaction_test.cc
@@ -40,34 +40,6 @@ class DBCompactionTestWithParam
 };
 
 namespace {
-class OnFileDeletionListener : public EventListener {
- public:
-  OnFileDeletionListener() :
-      matched_count_(0),
-      expected_file_name_("") {}
-
-  void SetExpectedFileName(
-      const std::string file_name) {
-    expected_file_name_ = file_name;
-  }
-
-  void VerifyMatchedCount(size_t expected_value) {
-    ASSERT_EQ(matched_count_, expected_value);
-  }
-
-  void OnTableFileDeleted(
-      const TableFileDeletionInfo& info) override {
-    if (expected_file_name_ != "") {
-      ASSERT_EQ(expected_file_name_, info.file_path);
-      expected_file_name_ = "";
-      matched_count_++;
-    }
-  }
-
- private:
-  size_t matched_count_;
-  std::string expected_file_name_;
-};
 
 class FlushedFileCollector : public EventListener {
  public:
diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc
new file mode 100644
index 000000000..94bdbcd13
--- /dev/null
+++ b/db/db_iterator_test.cc
@@ -0,0 +1,1319 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_test_util.h"
+#include "port/stack_trace.h"
+#include "rocksdb/perf_context.h"
+
+namespace rocksdb {
+
+class DBIteratorTest : public DBTestBase {
+ public:
+  DBIteratorTest() : DBTestBase("/db_iterator_test") {}
+};
+
+TEST_F(DBIteratorTest, IteratorProperty) {
+  // The test needs to be changed if kPersistedTier is supported in iterator.
+  Options options = CurrentOptions();
+  CreateAndReopenWithCF({"pikachu"}, options);
+  Put(1, "1", "2");
+  ReadOptions ropt;
+  ropt.pin_data = false;
+  {
+    unique_ptr<Iterator> iter(db_->NewIterator(ropt, handles_[1]));
+    iter->SeekToFirst();
+    std::string prop_value;
+    ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
+    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+    ASSERT_EQ("0", prop_value);
+    iter->Next();
+    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+    ASSERT_EQ("Iterator is not valid.", prop_value);
+  }
+  Close();
+}
+
+TEST_F(DBIteratorTest, PersistedTierOnIterator) {
+  // The test needs to be changed if kPersistedTier is supported in iterator.
+  Options options = CurrentOptions();
+  CreateAndReopenWithCF({"pikachu"}, options);
+  ReadOptions ropt;
+  ropt.read_tier = kPersistedTier;
+
+  auto* iter = db_->NewIterator(ropt, handles_[1]);
+  ASSERT_TRUE(iter->status().IsNotSupported());
+  delete iter;
+
+  std::vector<Iterator*> iters;
+  ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
+  Close();
+}
+
+TEST_F(DBIteratorTest, NonBlockingIteration) {
+  do {
+    ReadOptions non_blocking_opts, regular_opts;
+    Options options = CurrentOptions();
+    options.statistics = rocksdb::CreateDBStatistics();
+    non_blocking_opts.read_tier = kBlockCacheTier;
+    CreateAndReopenWithCF({"pikachu"}, options);
+    // write one kv to the database.
+    ASSERT_OK(Put(1, "a", "b"));
+
+    // scan using non-blocking iterator. We should find it because
+    // it is in memtable.
+    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    int count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(iter->status());
+      count++;
+    }
+    ASSERT_EQ(count, 1);
+    delete iter;
+
+    // flush memtable to storage. Now, the key should not be in the
+    // memtable neither in the block cache.
+    ASSERT_OK(Flush(1));
+
+    // verify that a non-blocking iterator does not find any
+    // kvs. Neither does it do any IOs to storage.
+    uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      count++;
+    }
+    ASSERT_EQ(count, 0);
+    ASSERT_TRUE(iter->status().IsIncomplete());
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    delete iter;
+
+    // read in the specified block via a regular get
+    ASSERT_EQ(Get(1, "a"), "b");
+
+    // verify that we can find it via a non-blocking scan
+    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(iter->status());
+      count++;
+    }
+    ASSERT_EQ(count, 1);
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    delete iter;
+
+    // This test verifies block cache behaviors, which is not used by plain
+    // table format.
+    // Exclude kHashCuckoo as it does not support iteration currently
+  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
+                         kSkipMmapReads));
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBIteratorTest, ManagedNonBlockingIteration) {
+  do {
+    ReadOptions non_blocking_opts, regular_opts;
+    Options options = CurrentOptions();
+    options.statistics = rocksdb::CreateDBStatistics();
+    non_blocking_opts.read_tier = kBlockCacheTier;
+    non_blocking_opts.managed = true;
+    CreateAndReopenWithCF({"pikachu"}, options);
+    // write one kv to the database.
+    ASSERT_OK(Put(1, "a", "b"));
+
+    // scan using non-blocking iterator. We should find it because
+    // it is in memtable.
+    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    int count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(iter->status());
+      count++;
+    }
+    ASSERT_EQ(count, 1);
+    delete iter;
+
+    // flush memtable to storage. Now, the key should not be in the
+    // memtable neither in the block cache.
+    ASSERT_OK(Flush(1));
+
+    // verify that a non-blocking iterator does not find any
+    // kvs. Neither does it do any IOs to storage.
+    int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      count++;
+    }
+    ASSERT_EQ(count, 0);
+    ASSERT_TRUE(iter->status().IsIncomplete());
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    delete iter;
+
+    // read in the specified block via a regular get
+    ASSERT_EQ(Get(1, "a"), "b");
+
+    // verify that we can find it via a non-blocking scan
+    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
+    count = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(iter->status());
+      count++;
+    }
+    ASSERT_EQ(count, 1);
+    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+    delete iter;
+
+    // This test verifies block cache behaviors, which is not used by plain
+    // table format.
+    // Exclude kHashCuckoo as it does not support iteration currently
+  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
+                         kSkipMmapReads));
+}
+#endif  // ROCKSDB_LITE
+
+TEST_F(DBIteratorTest, IterSeekBeforePrev) {
+  ASSERT_OK(Put("a", "b"));
+  ASSERT_OK(Put("c", "d"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put("0", "f"));
+  ASSERT_OK(Put("1", "h"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put("2", "j"));
+  auto iter = db_->NewIterator(ReadOptions());
+  iter->Seek(Slice("c"));
+  iter->Prev();
+  iter->Seek(Slice("a"));
+  iter->Prev();
+  delete iter;
+}
+
+namespace {
+std::string MakeLongKey(size_t length, char c) {
+  return std::string(length, c);
+}
+}  // namespace
+
+TEST_F(DBIteratorTest, IterLongKeys) {
+  ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
+  ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
+  ASSERT_OK(Put("a", "b"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
+  ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
+  ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
+  auto iter = db_->NewIterator(ReadOptions());
+
+  // Create a key that needs to be skipped for Seq too new
+  iter->Seek(MakeLongKey(20, 0));
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
+  delete iter;
+
+  iter = db_->NewIterator(ReadOptions());
+  iter->Seek(MakeLongKey(50, 1));
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IterNextWithNewerSeq) {
+  ASSERT_OK(Put("0", "0"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put("a", "b"));
+  ASSERT_OK(Put("c", "d"));
+  ASSERT_OK(Put("d", "e"));
+  auto iter = db_->NewIterator(ReadOptions());
+
+  // Create a key that needs to be skipped for Seq too new
+  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+       i++) {
+    ASSERT_OK(Put("b", "f"));
+  }
+
+  iter->Seek(Slice("a"));
+  ASSERT_EQ(IterStatus(iter), "a->b");
+  iter->Next();
+  ASSERT_EQ(IterStatus(iter), "c->d");
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IterPrevWithNewerSeq) {
+  ASSERT_OK(Put("0", "0"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put("a", "b"));
+  ASSERT_OK(Put("c", "d"));
+  ASSERT_OK(Put("d", "e"));
+  auto iter = db_->NewIterator(ReadOptions());
+
+  // Create a key that needs to be skipped for Seq too new
+  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+       i++) {
+    ASSERT_OK(Put("b", "f"));
+  }
+
+  iter->Seek(Slice("d"));
+  ASSERT_EQ(IterStatus(iter), "d->e");
+  iter->Prev();
+  ASSERT_EQ(IterStatus(iter), "c->d");
+  iter->Prev();
+  ASSERT_EQ(IterStatus(iter), "a->b");
+
+  iter->Prev();
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IterPrevWithNewerSeq2) {
+  ASSERT_OK(Put("0", "0"));
+  dbfull()->Flush(FlushOptions());
+  ASSERT_OK(Put("a", "b"));
+  ASSERT_OK(Put("c", "d"));
+  ASSERT_OK(Put("d", "e"));
+  auto iter = db_->NewIterator(ReadOptions());
+  iter->Seek(Slice("c"));
+  ASSERT_EQ(IterStatus(iter), "c->d");
+
+  // Create a key that needs to be skipped for Seq too new
+  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+       i++) {
+    ASSERT_OK(Put("b", "f"));
+  }
+
+  iter->Prev();
+  ASSERT_EQ(IterStatus(iter), "a->b");
+
+  iter->Prev();
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IterEmpty) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->Seek("foo");
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    delete iter;
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBIteratorTest, IterSingle) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "a", "va"));
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->Seek("");
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->Seek("a");
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->Seek("b");
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    delete iter;
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBIteratorTest, IterMulti) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "a", "va"));
+    ASSERT_OK(Put(1, "b", "vb"));
+    ASSERT_OK(Put(1, "c", "vc"));
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->Seek("");
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Seek("a");
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Seek("ax");
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+
+    iter->Seek("b");
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+    iter->Seek("z");
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    // Switch from reverse to forward
+    iter->SeekToLast();
+    iter->Prev();
+    iter->Prev();
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+
+    // Switch from forward to reverse
+    iter->SeekToFirst();
+    iter->Next();
+    iter->Next();
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+
+    // Make sure iter stays at snapshot
+    ASSERT_OK(Put(1, "a", "va2"));
+    ASSERT_OK(Put(1, "a2", "va3"));
+    ASSERT_OK(Put(1, "b", "vb2"));
+    ASSERT_OK(Put(1, "c", "vc2"));
+    ASSERT_OK(Delete(1, "b"));
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "b->vb");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    delete iter;
+  } while (ChangeCompactOptions());
+}
+
+// Check that we can skip over a run of user keys
+// by using reseek rather than sequential scan
+TEST_F(DBIteratorTest, IterReseek) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  Options options = CurrentOptions(options_override);
+  options.max_sequential_skip_in_iterations = 3;
+  options.create_if_missing = true;
+  options.statistics = rocksdb::CreateDBStatistics();
+  DestroyAndReopen(options);
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  // insert three keys with same userkey and verify that
+  // reseek is not invoked. For each of these test cases,
+  // verify that we can find the next key "b".
+  ASSERT_OK(Put(1, "a", "zero"));
+  ASSERT_OK(Put(1, "a", "one"));
+  ASSERT_OK(Put(1, "a", "two"));
+  ASSERT_OK(Put(1, "b", "bone"));
+  Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+  iter->SeekToFirst();
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+  ASSERT_EQ(IterStatus(iter), "a->two");
+  iter->Next();
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+  ASSERT_EQ(IterStatus(iter), "b->bone");
+  delete iter;
+
+  // insert a total of three keys with same userkey and verify
+  // that reseek is still not invoked.
+  ASSERT_OK(Put(1, "a", "three"));
+  iter = db_->NewIterator(ReadOptions(), handles_[1]);
+  iter->SeekToFirst();
+  ASSERT_EQ(IterStatus(iter), "a->three");
+  iter->Next();
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+  ASSERT_EQ(IterStatus(iter), "b->bone");
+  delete iter;
+
+  // insert a total of four keys with same userkey and verify
+  // that reseek is invoked.
+  ASSERT_OK(Put(1, "a", "four"));
+  iter = db_->NewIterator(ReadOptions(), handles_[1]);
+  iter->SeekToFirst();
+  ASSERT_EQ(IterStatus(iter), "a->four");
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+  iter->Next();
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+  ASSERT_EQ(IterStatus(iter), "b->bone");
+  delete iter;
+
+  // Testing reverse iterator
+  // At this point, we have three versions of "a" and one version of "b".
+  // The reseek statistics is already at 1.
+  int num_reseeks = static_cast<int>(
+      TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
+
+  // Insert another version of b and assert that reseek is not invoked
+  ASSERT_OK(Put(1, "b", "btwo"));
+  iter = db_->NewIterator(ReadOptions(), handles_[1]);
+  iter->SeekToLast();
+  ASSERT_EQ(IterStatus(iter), "b->btwo");
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+            num_reseeks);
+  iter->Prev();
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+            num_reseeks + 1);
+  ASSERT_EQ(IterStatus(iter), "a->four");
+  delete iter;
+
+  // insert two more versions of b. This makes a total of 4 versions
+  // of b and 4 versions of a.
+  ASSERT_OK(Put(1, "b", "bthree"));
+  ASSERT_OK(Put(1, "b", "bfour"));
+  iter = db_->NewIterator(ReadOptions(), handles_[1]);
+  iter->SeekToLast();
+  ASSERT_EQ(IterStatus(iter), "b->bfour");
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+            num_reseeks + 2);
+  iter->Prev();
+
+  // the previous Prev call should have invoked reseek
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+            num_reseeks + 3);
+  ASSERT_EQ(IterStatus(iter), "a->four");
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IterSmallAndLargeMix) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "a", "va"));
+    ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
+    ASSERT_OK(Put(1, "c", "vc"));
+    ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
+    ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
+
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+
+    iter->SeekToFirst();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
+    iter->Next();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    iter->SeekToLast();
+    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "a->va");
+    iter->Prev();
+    ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+    delete iter;
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBIteratorTest, IterMultiWithDelete) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "ka", "va"));
+    ASSERT_OK(Put(1, "kb", "vb"));
+    ASSERT_OK(Put(1, "kc", "vc"));
+    ASSERT_OK(Delete(1, "kb"));
+    ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
+
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+    iter->Seek("kc");
+    ASSERT_EQ(IterStatus(iter), "kc->vc");
+    if (!CurrentOptions().merge_operator) {
+      // TODO: merge operator does not support backward iteration yet
+      if (kPlainTableAllBytesPrefix != option_config_ &&
+          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+          kHashLinkList != option_config_) {
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "ka->va");
+      }
+    }
+    delete iter;
+  } while (ChangeOptions());
+}
+
+TEST_F(DBIteratorTest, IterPrevMaxSkip) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    for (int i = 0; i < 2; i++) {
+      ASSERT_OK(Put(1, "key1", "v1"));
+      ASSERT_OK(Put(1, "key2", "v2"));
+      ASSERT_OK(Put(1, "key3", "v3"));
+      ASSERT_OK(Put(1, "key4", "v4"));
+      ASSERT_OK(Put(1, "key5", "v5"));
+    }
+
+    VerifyIterLast("key5->v5", 1);
+
+    ASSERT_OK(Delete(1, "key5"));
+    VerifyIterLast("key4->v4", 1);
+
+    ASSERT_OK(Delete(1, "key4"));
+    VerifyIterLast("key3->v3", 1);
+
+    ASSERT_OK(Delete(1, "key3"));
+    VerifyIterLast("key2->v2", 1);
+
+    ASSERT_OK(Delete(1, "key2"));
+    VerifyIterLast("key1->v1", 1);
+
+    ASSERT_OK(Delete(1, "key1"));
+    VerifyIterLast("(invalid)", 1);
+  } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
+}
+
+TEST_F(DBIteratorTest, IterWithSnapshot) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
+    ASSERT_OK(Put(1, "key1", "val1"));
+    ASSERT_OK(Put(1, "key2", "val2"));
+    ASSERT_OK(Put(1, "key3", "val3"));
+    ASSERT_OK(Put(1, "key4", "val4"));
+    ASSERT_OK(Put(1, "key5", "val5"));
+
+    const Snapshot* snapshot = db_->GetSnapshot();
+    ReadOptions options;
+    options.snapshot = snapshot;
+    Iterator* iter = db_->NewIterator(options, handles_[1]);
+
+    // Put more values after the snapshot
+    ASSERT_OK(Put(1, "key100", "val100"));
+    ASSERT_OK(Put(1, "key101", "val101"));
+
+    iter->Seek("key5");
+    ASSERT_EQ(IterStatus(iter), "key5->val5");
+    if (!CurrentOptions().merge_operator) {
+      // TODO: merge operator does not support backward iteration yet
+      if (kPlainTableAllBytesPrefix != option_config_ &&
+          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+          kHashLinkList != option_config_) {
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "key4->val4");
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "key3->val3");
+
+        iter->Next();
+        ASSERT_EQ(IterStatus(iter), "key4->val4");
+        iter->Next();
+        ASSERT_EQ(IterStatus(iter), "key5->val5");
+      }
+      iter->Next();
+      ASSERT_TRUE(!iter->Valid());
+    }
+    db_->ReleaseSnapshot(snapshot);
+    delete iter;
+    // skip as HashCuckooRep does not support snapshot
+  } while (ChangeOptions(kSkipHashCuckoo));
+}
+
+TEST_F(DBIteratorTest, IteratorPinsRef) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    Put(1, "foo", "hello");
+
+    // Get iterator that will yield the current contents of the DB.
+    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+
+    // Write to force compactions
+    Put(1, "foo", "newvalue1");
+    for (int i = 0; i < 100; i++) {
+      // 100K values
+      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
+    }
+    Put(1, "foo", "newvalue2");
+
+    iter->SeekToFirst();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ("foo", iter->key().ToString());
+    ASSERT_EQ("hello", iter->value().ToString());
+    iter->Next();
+    ASSERT_TRUE(!iter->Valid());
+    delete iter;
+  } while (ChangeCompactOptions());
+}
+
+TEST_F(DBIteratorTest, DBIteratorBoundTest) {
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+
+  options.prefix_extractor = nullptr;
+  DestroyAndReopen(options);
+  ASSERT_OK(Put("a", "0"));
+  ASSERT_OK(Put("foo", "bar"));
+  ASSERT_OK(Put("foo1", "bar1"));
+  ASSERT_OK(Put("g1", "0"));
+
+  // testing basic case with no iterate_upper_bound and no prefix_extractor
+  {
+    ReadOptions ro;
+    ro.iterate_upper_bound = nullptr;
+
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
+
+    iter->Seek("foo");
+
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
+  }
+
+  // testing iterate_upper_bound and forward iterator
+  // to make sure it stops at bound
+  {
+    ReadOptions ro;
+    // iterate_upper_bound points beyond the last expected entry
+    Slice prefix("foo2");
+    ro.iterate_upper_bound = &prefix;
+
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
+
+    iter->Seek("foo");
+
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(("foo1")), 0);
+
+    iter->Next();
+    // should stop here...
+    ASSERT_TRUE(!iter->Valid());
+  }
+  // Testing SeekToLast with iterate_upper_bound set
+  {
+    ReadOptions ro;
+
+    Slice prefix("foo");
+    ro.iterate_upper_bound = &prefix;
+
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
+
+    iter->SeekToLast();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("a")), 0);
+  }
+
+  // prefix is the first letter of the key
+  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+
+  DestroyAndReopen(options);
+  ASSERT_OK(Put("a", "0"));
+  ASSERT_OK(Put("foo", "bar"));
+  ASSERT_OK(Put("foo1", "bar1"));
+  ASSERT_OK(Put("g1", "0"));
+
+  // testing with iterate_upper_bound and prefix_extractor
+  // Seek target and iterate_upper_bound are not is same prefix
+  // This should be an error
+  {
+    ReadOptions ro;
+    Slice upper_bound("g");
+    ro.iterate_upper_bound = &upper_bound;
+
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
+
+    iter->Seek("foo");
+
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ("foo", iter->key().ToString());
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ("foo1", iter->key().ToString());
+
+    iter->Next();
+    ASSERT_TRUE(!iter->Valid());
+  }
+
+  // testing that iterate_upper_bound prevents iterating over deleted items
+  // if the bound has already reached
+  {
+    options.prefix_extractor = nullptr;
+    DestroyAndReopen(options);
+    ASSERT_OK(Put("a", "0"));
+    ASSERT_OK(Put("b", "0"));
+    ASSERT_OK(Put("b1", "0"));
+    ASSERT_OK(Put("c", "0"));
+    ASSERT_OK(Put("d", "0"));
+    ASSERT_OK(Put("e", "0"));
+    ASSERT_OK(Delete("c"));
+    ASSERT_OK(Delete("d"));
+
+    // base case with no bound
+    ReadOptions ro;
+    ro.iterate_upper_bound = nullptr;
+
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
+
+    iter->Seek("b");
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(("b1")), 0);
+
+    perf_context.Reset();
+    iter->Next();
+
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 2);
+
+    // now testing with iterate_bound
+    Slice prefix("c");
+    ro.iterate_upper_bound = &prefix;
+
+    iter.reset(db_->NewIterator(ro));
+
+    perf_context.Reset();
+
+    iter->Seek("b");
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
+
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(("b1")), 0);
+
+    iter->Next();
+    // the iteration should stop as soon as the the bound key is reached
+    // even though the key is deleted
+    // hence internal_delete_skipped_count should be 0
+    ASSERT_TRUE(!iter->Valid());
+    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
+  }
+}
+
+// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
+//             return the biggest key which is smaller than the seek key.
+TEST_F(DBIteratorTest, PrevAfterMerge) {
+  Options options;
+  options.create_if_missing = true;
+  options.merge_operator = MergeOperators::CreatePutOperator();
+  DestroyAndReopen(options);
+
+  // write three entries with different keys using Merge()
+  WriteOptions wopts;
+  db_->Merge(wopts, "1", "data1");
+  db_->Merge(wopts, "2", "data2");
+  db_->Merge(wopts, "3", "data3");
+
+  std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
+
+  it->Seek("2");
+  ASSERT_TRUE(it->Valid());
+  ASSERT_EQ("2", it->key().ToString());
+
+  it->Prev();
+  ASSERT_TRUE(it->Valid());
+  ASSERT_EQ("1", it->key().ToString());
+}
+
+TEST_F(DBIteratorTest, PinnedDataIteratorRandomized) {
+  enum TestConfig {
+    NORMAL,
+    CLOSE_AND_OPEN,
+    COMPACT_BEFORE_READ,
+    FLUSH_EVERY_1000,
+    MAX
+  };
+
+  // Generate Random data
+  Random rnd(301);
+
+  int puts = 100000;
+  int key_pool = static_cast<int>(puts * 0.7);
+  int key_size = 100;
+  int val_size = 1000;
+  int seeks_percentage = 20;   // 20% of keys will be used to test seek()
+  int delete_percentage = 20;  // 20% of keys will be deleted
+  int merge_percentage = 20;   // 20% of keys will be added using Merge()
+
+  for (int run_config = 0; run_config < TestConfig::MAX; run_config++) {
+    Options options = CurrentOptions();
+    BlockBasedTableOptions table_options;
+    table_options.use_delta_encoding = false;
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    options.merge_operator = MergeOperators::CreatePutOperator();
+    DestroyAndReopen(options);
+
+    std::vector<std::string> generated_keys(key_pool);
+    for (int i = 0; i < key_pool; i++) {
+      generated_keys[i] = RandomString(&rnd, key_size);
+    }
+
+    std::map<std::string, std::string> true_data;
+    std::vector<std::string> random_keys;
+    std::vector<std::string> deleted_keys;
+    for (int i = 0; i < puts; i++) {
+      auto& k = generated_keys[rnd.Next() % key_pool];
+      auto v = RandomString(&rnd, val_size);
+
+      // Insert data to true_data map and to DB
+      true_data[k] = v;
+      if (rnd.OneIn(static_cast<int>(100.0 / merge_percentage))) {
+        ASSERT_OK(db_->Merge(WriteOptions(), k, v));
+      } else {
+        ASSERT_OK(Put(k, v));
+      }
+
+      // Pick random keys to be used to test Seek()
+      if (rnd.OneIn(static_cast<int>(100.0 / seeks_percentage))) {
+        random_keys.push_back(k);
+      }
+
+      // Delete some random keys
+      if (rnd.OneIn(static_cast<int>(100.0 / delete_percentage))) {
+        deleted_keys.push_back(k);
+        true_data.erase(k);
+        ASSERT_OK(Delete(k));
+      }
+
+      if (run_config == TestConfig::FLUSH_EVERY_1000) {
+        if (i && i % 1000 == 0) {
+          Flush();
+        }
+      }
+    }
+
+    if (run_config == TestConfig::CLOSE_AND_OPEN) {
+      Close();
+      Reopen(options);
+    } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
+      db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+    }
+
+    ReadOptions ro;
+    ro.pin_data = true;
+    auto iter = db_->NewIterator(ro);
+
+    {
+      // Test Seek to random keys
+      printf("Testing seek on %zu keys\n", random_keys.size());
+      std::vector<Slice> keys_slices;
+      std::vector<std::string> true_keys;
+      for (auto& k : random_keys) {
+        iter->Seek(k);
+        if (!iter->Valid()) {
+          ASSERT_EQ(true_data.lower_bound(k), true_data.end());
+          continue;
+        }
+        std::string prop_value;
+        ASSERT_OK(
+            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+        ASSERT_EQ("1", prop_value);
+        keys_slices.push_back(iter->key());
+        true_keys.push_back(true_data.lower_bound(k)->first);
+      }
+
+      for (size_t i = 0; i < keys_slices.size(); i++) {
+        ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
+      }
+    }
+
+    {
+      // Test iterating all data forward
+      printf("Testing iterating forward on all keys\n");
+      std::vector<Slice> all_keys;
+      for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+        std::string prop_value;
+        ASSERT_OK(
+            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+        ASSERT_EQ("1", prop_value);
+        all_keys.push_back(iter->key());
+      }
+      ASSERT_EQ(all_keys.size(), true_data.size());
+
+      // Verify that all keys slices are valid
+      auto data_iter = true_data.begin();
+      for (size_t i = 0; i < all_keys.size(); i++) {
+        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
+        data_iter++;
+      }
+    }
+
+    {
+      // Test iterating all data backward
+      printf("Testing iterating backward on all keys\n");
+      std::vector<Slice> all_keys;
+      for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+        std::string prop_value;
+        ASSERT_OK(
+            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+        ASSERT_EQ("1", prop_value);
+        all_keys.push_back(iter->key());
+      }
+      ASSERT_EQ(all_keys.size(), true_data.size());
+
+      // Verify that all keys slices are valid (backward)
+      auto data_iter = true_data.rbegin();
+      for (size_t i = 0; i < all_keys.size(); i++) {
+        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
+        data_iter++;
+      }
+    }
+
+    delete iter;
+  }
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
+  Options options = CurrentOptions();
+  BlockBasedTableOptions table_options;
+  table_options.use_delta_encoding = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  options.disable_auto_compactions = true;
+  options.write_buffer_size = 1024 * 1024 * 10;  // 10 Mb
+  DestroyAndReopen(options);
+
+  std::map<std::string, std::string> true_data;
+
+  // Generate 4 sst files in L2
+  Random rnd(301);
+  for (int i = 1; i <= 1000; i++) {
+    std::string k = Key(i * 3);
+    std::string v = RandomString(&rnd, 100);
+    ASSERT_OK(Put(k, v));
+    true_data[k] = v;
+    if (i % 250 == 0) {
+      ASSERT_OK(Flush());
+    }
+  }
+  ASSERT_EQ(FilesPerLevel(0), "4");
+  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ(FilesPerLevel(0), "0,4");
+
+  // Generate 4 sst files in L0
+  for (int i = 1; i <= 1000; i++) {
+    std::string k = Key(i * 2);
+    std::string v = RandomString(&rnd, 100);
+    ASSERT_OK(Put(k, v));
+    true_data[k] = v;
+    if (i % 250 == 0) {
+      ASSERT_OK(Flush());
+    }
+  }
+  ASSERT_EQ(FilesPerLevel(0), "4,4");
+
+  // Add some keys/values in memtables
+  for (int i = 1; i <= 1000; i++) {
+    std::string k = Key(i);
+    std::string v = RandomString(&rnd, 100);
+    ASSERT_OK(Put(k, v));
+    true_data[k] = v;
+  }
+  ASSERT_EQ(FilesPerLevel(0), "4,4");
+
+  ReadOptions ro;
+  ro.pin_data = true;
+  auto iter = db_->NewIterator(ro);
+
+  std::vector<std::pair<Slice, std::string>> results;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    std::string prop_value;
+    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+    ASSERT_EQ("1", prop_value);
+    results.emplace_back(iter->key(), iter->value().ToString());
+  }
+
+  ASSERT_EQ(results.size(), true_data.size());
+  auto data_iter = true_data.begin();
+  for (size_t i = 0; i < results.size(); i++, data_iter++) {
+    auto& kv = results[i];
+    ASSERT_EQ(kv.first, data_iter->first);
+    ASSERT_EQ(kv.second, data_iter->second);
+  }
+
+  delete iter;
+}
+#endif
+
+TEST_F(DBIteratorTest, PinnedDataIteratorMergeOperator) {
+  Options options = CurrentOptions();
+  BlockBasedTableOptions table_options;
+  table_options.use_delta_encoding = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  options.merge_operator = MergeOperators::CreateUInt64AddOperator();
+  DestroyAndReopen(options);
+
+  std::string numbers[7];
+  for (int val = 0; val <= 6; val++) {
+    PutFixed64(numbers + val, val);
+  }
+
+  // +1 all keys in range [ 0 => 999]
+  for (int i = 0; i < 1000; i++) {
+    WriteOptions wo;
+    ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
+  }
+
+  // +2 all keys divisible by 2 in range [ 0 => 999]
+  for (int i = 0; i < 1000; i += 2) {
+    WriteOptions wo;
+    ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
+  }
+
+  // +3 all keys divisible by 5 in range [ 0 => 999]
+  for (int i = 0; i < 1000; i += 5) {
+    WriteOptions wo;
+    ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
+  }
+
+  ReadOptions ro;
+  ro.pin_data = true;
+  auto iter = db_->NewIterator(ro);
+
+  std::vector<std::pair<Slice, std::string>> results;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    std::string prop_value;
+    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+    ASSERT_EQ("1", prop_value);
+    results.emplace_back(iter->key(), iter->value().ToString());
+  }
+
+  ASSERT_EQ(results.size(), 1000);
+  for (size_t i = 0; i < results.size(); i++) {
+    auto& kv = results[i];
+    ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
+    int expected_val = 1;
+    if (i % 2 == 0) {
+      expected_val += 2;
+    }
+    if (i % 5 == 0) {
+      expected_val += 3;
+    }
+    ASSERT_EQ(kv.second, numbers[expected_val]);
+  }
+
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
+  Options options = CurrentOptions();
+  BlockBasedTableOptions table_options;
+  table_options.use_delta_encoding = false;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  options.write_buffer_size = 100000;
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+
+  std::map<std::string, std::string> true_data;
+  for (int i = 0; i < 1000; i++) {
+    std::string k = RandomString(&rnd, 10);
+    std::string v = RandomString(&rnd, 1000);
+    ASSERT_OK(Put(k, v));
+    true_data[k] = v;
+  }
+
+  ReadOptions ro;
+  ro.pin_data = true;
+  auto iter = db_->NewIterator(ro);
+
+  // Delete 50% of the keys and update the other 50%
+  for (auto& kv : true_data) {
+    if (rnd.OneIn(2)) {
+      ASSERT_OK(Delete(kv.first));
+    } else {
+      std::string new_val = RandomString(&rnd, 1000);
+      ASSERT_OK(Put(kv.first, new_val));
+    }
+  }
+
+  std::vector<std::pair<Slice, std::string>> results;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    std::string prop_value;
+    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+    ASSERT_EQ("1", prop_value);
+    results.emplace_back(iter->key(), iter->value().ToString());
+  }
+
+  auto data_iter = true_data.begin();
+  for (size_t i = 0; i < results.size(); i++, data_iter++) {
+    auto& kv = results[i];
+    ASSERT_EQ(kv.first, data_iter->first);
+    ASSERT_EQ(kv.second, data_iter->second);
+  }
+
+  delete iter;
+}
+
+TEST_F(DBIteratorTest, IteratorWithLocalStatistics) {
+  Options options = CurrentOptions();
+  options.statistics = rocksdb::CreateDBStatistics();
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+  for (int i = 0; i < 1000; i++) {
+    // Key 10 bytes / Value 10 bytes
+    ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
+  }
+
+  std::atomic<uint64_t> total_next(0);
+  std::atomic<uint64_t> total_next_found(0);
+  std::atomic<uint64_t> total_prev(0);
+  std::atomic<uint64_t> total_prev_found(0);
+  std::atomic<uint64_t> total_bytes(0);
+
+  std::vector<std::thread> threads;
+  std::function<void()> reader_func_next = [&]() {
+    Iterator* iter = db_->NewIterator(ReadOptions());
+
+    iter->SeekToFirst();
+    // Seek will bump ITER_BYTES_READ
+    total_bytes += iter->key().size();
+    total_bytes += iter->value().size();
+    while (true) {
+      iter->Next();
+      total_next++;
+
+      if (!iter->Valid()) {
+        break;
+      }
+      total_next_found++;
+      total_bytes += iter->key().size();
+      total_bytes += iter->value().size();
+    }
+
+    delete iter;
+  };
+
+  std::function<void()> reader_func_prev = [&]() {
+    Iterator* iter = db_->NewIterator(ReadOptions());
+
+    iter->SeekToLast();
+    // Seek will bump ITER_BYTES_READ
+    total_bytes += iter->key().size();
+    total_bytes += iter->value().size();
+    while (true) {
+      iter->Prev();
+      total_prev++;
+
+      if (!iter->Valid()) {
+        break;
+      }
+      total_prev_found++;
+      total_bytes += iter->key().size();
+      total_bytes += iter->value().size();
+    }
+
+    delete iter;
+  };
+
+  for (int i = 0; i < 10; i++) {
+    threads.emplace_back(reader_func_next);
+  }
+  for (int i = 0; i < 15; i++) {
+    threads.emplace_back(reader_func_prev);
+  }
+
+  for (auto& t : threads) {
+    t.join();
+  }
+
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next);
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
+            total_next_found);
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev);
+  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
+            total_prev_found);
+  ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes);
+}
+
+}  // namespace rocksdb
+
+int main(int argc, char** argv) {
+  rocksdb::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc
new file mode 100644
index 000000000..1ec125cb1
--- /dev/null
+++ b/db/db_sst_test.cc
@@ -0,0 +1,1300 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_test_util.h"
+#include "port/stack_trace.h"
+#include "rocksdb/sst_file_manager.h"
+#include "rocksdb/sst_file_writer.h"
+#include "util/sst_file_manager_impl.h"
+
+namespace rocksdb {
+
+class DBSSTTest : public DBTestBase {
+ public:
+  DBSSTTest() : DBTestBase("/db_sst_test") {}
+};
+
+TEST_F(DBSSTTest, DontDeletePendingOutputs) {
+  Options options;
+  options.env = env_;
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
+
+  // Every time we write to a table file, call FOF/POF with full DB scan. This
+  // will make sure our pending_outputs_ protection work correctly
+  std::function<void()> purge_obsolete_files_function = [&]() {
+    JobContext job_context(0);
+    dbfull()->TEST_LockMutex();
+    dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
+    dbfull()->TEST_UnlockMutex();
+    dbfull()->PurgeObsoleteFiles(job_context);
+    job_context.Clean();
+  };
+
+  env_->table_write_callback_ = &purge_obsolete_files_function;
+
+  for (int i = 0; i < 2; ++i) {
+    ASSERT_OK(Put("a", "begin"));
+    ASSERT_OK(Put("z", "end"));
+    ASSERT_OK(Flush());
+  }
+
+  // If pending output guard does not work correctly, PurgeObsoleteFiles() will
+  // delete the file that Compaction is trying to create, causing this: error
+  // db/db_test.cc:975: IO error:
+  // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory
+  Compact("a", "b");
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBSSTTest, DontDeleteMovedFile) {
+  // This test triggers move compaction and verifies that the file is not
+  // deleted when it's part of move compaction
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
+  options.level0_file_num_compaction_trigger =
+      2;  // trigger compaction when we have 2 files
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+  // Create two 1MB sst files
+  for (int i = 0; i < 2; ++i) {
+    // Create 1MB sst file
+    for (int j = 0; j < 100; ++j) {
+      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
+    }
+    ASSERT_OK(Flush());
+  }
+  // this should execute both L0->L1 and L1->(move)->L2 compactions
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ("0,0,1", FilesPerLevel(0));
+
+  // If the moved file is actually deleted (the move-safeguard in
+  // ~Version::Version() is not there), we get this failure:
+  // Corruption: Can't access /000009.sst
+  Reopen(options);
+}
+
+// This reproduces a bug where we don't delete a file because when it was
+// supposed to be deleted, it was blocked by pending_outputs
+// Consider:
+// 1. current file_number is 13
+// 2. compaction (1) starts, blocks deletion of all files starting with 13
+// (pending outputs)
+// 3. file 13 is created by compaction (2)
+// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file
+// 13 has no references, it is put into VersionSet::obsolete_files_
+// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13
+// is deleted from obsolete_files_ set.
+// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by
+// pending outputs since compaction (1) is still running. It is not deleted and
+// it is not present in obsolete_files_ anymore. Therefore, we never delete it.
+TEST_F(DBSSTTest, DeleteObsoleteFilesPendingOutputs) {
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.write_buffer_size = 2 * 1024 * 1024;     // 2 MB
+  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
+  options.level0_file_num_compaction_trigger =
+      2;  // trigger compaction when we have 2 files
+  options.max_background_flushes = 2;
+  options.max_background_compactions = 2;
+
+  OnFileDeletionListener* listener = new OnFileDeletionListener();
+  options.listeners.emplace_back(listener);
+
+  Reopen(options);
+
+  Random rnd(301);
+  // Create two 1MB sst files
+  for (int i = 0; i < 2; ++i) {
+    // Create 1MB sst file
+    for (int j = 0; j < 100; ++j) {
+      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
+    }
+    ASSERT_OK(Flush());
+  }
+  // this should execute both L0->L1 and L1->(move)->L2 compactions
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ("0,0,1", FilesPerLevel(0));
+
+  test::SleepingBackgroundTask blocking_thread;
+  port::Mutex mutex_;
+  bool already_blocked(false);
+
+  // block the flush
+  std::function<void()> block_first_time = [&]() {
+    bool blocking = false;
+    {
+      MutexLock l(&mutex_);
+      if (!already_blocked) {
+        blocking = true;
+        already_blocked = true;
+      }
+    }
+    if (blocking) {
+      blocking_thread.DoSleep();
+    }
+  };
+  env_->table_write_callback_ = &block_first_time;
+  // Create 1MB sst file
+  for (int j = 0; j < 256; ++j) {
+    ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024)));
+  }
+  // this should trigger a flush, which is blocked with block_first_time
+  // pending_file is protecting all the files created after
+
+  ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr));
+
+  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
+  std::vector<LiveFileMetaData> metadata;
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(metadata.size(), 1U);
+  auto file_on_L2 = metadata[0].name;
+  listener->SetExpectedFileName(dbname_ + file_on_L2);
+
+  ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr,
+                                        true /* disallow trivial move */));
+  ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0));
+
+  // finish the flush!
+  blocking_thread.WakeUp();
+  blocking_thread.WaitUntilDone();
+  dbfull()->TEST_WaitForFlushMemTable();
+  ASSERT_EQ("1,0,0,0,1", FilesPerLevel(0));
+
+  metadata.clear();
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(metadata.size(), 2U);
+
+  // This file should have been deleted during last compaction
+  ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2));
+  listener->VerifyMatchedCount(1);
+}
+
+#endif  // ROCKSDB_LITE
+
+TEST_F(DBSSTTest, DBWithSstFileManager) {
+  std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
+  auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+
+  int files_added = 0;
+  int files_deleted = 0;
+  int files_moved = 0;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "SstFileManagerImpl::OnAddFile", [&](void* arg) { files_added++; });
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { files_deleted++; });
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "SstFileManagerImpl::OnMoveFile", [&](void* arg) { files_moved++; });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  Options options = CurrentOptions();
+  options.sst_file_manager = sst_file_manager;
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+  for (int i = 0; i < 25; i++) {
+    GenerateNewRandomFile(&rnd);
+    ASSERT_OK(Flush());
+    dbfull()->TEST_WaitForFlushMemTable();
+    dbfull()->TEST_WaitForCompact();
+    // Verify that we are tracking all sst files in dbname_
+    ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles());
+  }
+  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+  auto files_in_db = GetAllSSTFiles();
+  // Verify that we are tracking all sst files in dbname_
+  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
+  // Verify the total files size
+  uint64_t total_files_size = 0;
+  for (auto& file_to_size : files_in_db) {
+    total_files_size += file_to_size.second;
+  }
+  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
+  // We flushed at least 25 files
+  ASSERT_GE(files_added, 25);
+  // Compaction must have deleted some files
+  ASSERT_GT(files_deleted, 0);
+  // No files were moved
+  ASSERT_EQ(files_moved, 0);
+
+  Close();
+  Reopen(options);
+  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
+  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
+
+  // Verify that we track all the files again after the DB is closed and opened
+  Close();
+  sst_file_manager.reset(NewSstFileManager(env_));
+  options.sst_file_manager = sst_file_manager;
+  sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+
+  Reopen(options);
+  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
+  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
+
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBSSTTest, RateLimitedDelete) {
+  rocksdb::SyncPoint::GetInstance()->LoadDependency({
+      {"DBSSTTest::RateLimitedDelete:1",
+       "DeleteScheduler::BackgroundEmptyTrash"},
+  });
+
+  std::vector<uint64_t> penalties;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DeleteScheduler::BackgroundEmptyTrash:Wait",
+      [&](void* arg) { penalties.push_back(*(static_cast<int*>(arg))); });
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.env = env_;
+
+  std::string trash_dir = test::TmpDir(env_) + "/trash";
+  int64_t rate_bytes_per_sec = 1024 * 10;  // 10 Kbs / Sec
+  Status s;
+  options.sst_file_manager.reset(NewSstFileManager(
+      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
+  ASSERT_OK(s);
+  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
+
+  Destroy(last_options_);
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  ASSERT_OK(TryReopen(options));
+  // Create 4 files in L0
+  for (char v = 'a'; v <= 'd'; v++) {
+    ASSERT_OK(Put("Key2", DummyString(1024, v)));
+    ASSERT_OK(Put("Key3", DummyString(1024, v)));
+    ASSERT_OK(Put("Key4", DummyString(1024, v)));
+    ASSERT_OK(Put("Key1", DummyString(1024, v)));
+    ASSERT_OK(Put("Key4", DummyString(1024, v)));
+    ASSERT_OK(Flush());
+  }
+  // We created 4 sst files in L0
+  ASSERT_EQ("4", FilesPerLevel(0));
+
+  std::vector<LiveFileMetaData> metadata;
+  db_->GetLiveFilesMetaData(&metadata);
+
+  // Compaction will move the 4 files in L0 to trash and create 1 L1 file
+  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+
+  uint64_t delete_start_time = env_->NowMicros();
+  // Hold BackgroundEmptyTrash
+  TEST_SYNC_POINT("DBSSTTest::RateLimitedDelete:1");
+  sfm->WaitForEmptyTrash();
+  uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time;
+
+  uint64_t total_files_size = 0;
+  uint64_t expected_penlty = 0;
+  ASSERT_EQ(penalties.size(), metadata.size());
+  for (size_t i = 0; i < metadata.size(); i++) {
+    total_files_size += metadata[i].size;
+    expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec);
+    ASSERT_EQ(expected_penlty, penalties[i]);
+  }
+  ASSERT_GT(time_spent_deleting, expected_penlty * 0.9);
+
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+
+// Create a DB with 2 db_paths, and generate multiple files in the 2
+// db_paths using CompactRangeOptions, make sure that files that were
+// deleted from first db_path were deleted using DeleteScheduler and
+// files in the second path were not.
+TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
+  int bg_delete_file = 0;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DeleteScheduler::DeleteTrashFile:DeleteFile",
+      [&](void* arg) { bg_delete_file++; });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.db_paths.emplace_back(dbname_, 1024 * 100);
+  options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100);
+  options.env = env_;
+
+  std::string trash_dir = test::TmpDir(env_) + "/trash";
+  int64_t rate_bytes_per_sec = 1024 * 1024;  // 1 Mb / Sec
+  Status s;
+  options.sst_file_manager.reset(NewSstFileManager(
+      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
+  ASSERT_OK(s);
+  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
+
+  DestroyAndReopen(options);
+
+  // Create 4 files in L0
+  for (int i = 0; i < 4; i++) {
+    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
+    ASSERT_OK(Flush());
+  }
+  // We created 4 sst files in L0
+  ASSERT_EQ("4", FilesPerLevel(0));
+  // Compaction will delete files from L0 in first db path and generate a new
+  // file in L1 in second db path
+  CompactRangeOptions compact_options;
+  compact_options.target_path_id = 1;
+  Slice begin("Key0");
+  Slice end("Key3");
+  ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+
+  // Create 4 files in L0
+  for (int i = 4; i < 8; i++) {
+    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B')));
+    ASSERT_OK(Flush());
+  }
+  ASSERT_EQ("4,1", FilesPerLevel(0));
+
+  // Compaction will delete files from L0 in first db path and generate a new
+  // file in L1 in second db path
+  begin = "Key4";
+  end = "Key7";
+  ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
+  ASSERT_EQ("0,2", FilesPerLevel(0));
+
+  sfm->WaitForEmptyTrash();
+  ASSERT_EQ(bg_delete_file, 8);
+
+  compact_options.bottommost_level_compaction =
+      BottommostLevelCompaction::kForce;
+  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+
+  sfm->WaitForEmptyTrash();
+  ASSERT_EQ(bg_delete_file, 8);
+
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+
+TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
+  int bg_delete_file = 0;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DeleteScheduler::DeleteTrashFile:DeleteFile",
+      [&](void* arg) { bg_delete_file++; });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.env = env_;
+  DestroyAndReopen(options);
+
+  // Create 4 files in L0
+  for (int i = 0; i < 4; i++) {
+    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
+    ASSERT_OK(Flush());
+  }
+  // We created 4 sst files in L0
+  ASSERT_EQ("4", FilesPerLevel(0));
+
+  // Close DB and destroy it using DeleteScheduler
+  Close();
+  std::string trash_dir = test::TmpDir(env_) + "/trash";
+  int64_t rate_bytes_per_sec = 1024 * 1024;  // 1 Mb / Sec
+  Status s;
+  options.sst_file_manager.reset(NewSstFileManager(
+      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
+  ASSERT_OK(s);
+  ASSERT_OK(DestroyDB(dbname_, options));
+
+  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
+  sfm->WaitForEmptyTrash();
+  // We have deleted the 4 sst files in the delete_scheduler
+  ASSERT_EQ(bg_delete_file, 4);
+}
+#endif  // ROCKSDB_LITE
+
+TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {
+  std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
+  auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+
+  Options options = CurrentOptions();
+  options.sst_file_manager = sst_file_manager;
+  options.disable_auto_compactions = true;
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+
+  // Generate a file containing 100 keys.
+  for (int i = 0; i < 100; i++) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 50)));
+  }
+  ASSERT_OK(Flush());
+
+  uint64_t first_file_size = 0;
+  auto files_in_db = GetAllSSTFiles(&first_file_size);
+  ASSERT_EQ(sfm->GetTotalSize(), first_file_size);
+
+  // Set the maximum allowed space usage to the current total size
+  sfm->SetMaxAllowedSpaceUsage(first_file_size + 1);
+
+  ASSERT_OK(Put("key1", "val1"));
+  // This flush will cause bg_error_ and will fail
+  ASSERT_NOK(Flush());
+}
+
+TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) {
+  // This test will set a maximum allowed space for the DB, then it will
+  // keep filling the DB until the limit is reached and bg_error_ is set.
+  // When bg_error_ is set we will verify that the DB size is greater
+  // than the limit.
+
+  std::vector<int> max_space_limits_mbs = {1, 2, 4, 8, 10};
+
+  bool bg_error_set = false;
+  uint64_t total_sst_files_size = 0;
+
+  int reached_max_space_on_flush = 0;
+  int reached_max_space_on_compaction = 0;
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
+      [&](void* arg) {
+        bg_error_set = true;
+        GetAllSSTFiles(&total_sst_files_size);
+        reached_max_space_on_flush++;
+      });
+
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached",
+      [&](void* arg) {
+        bg_error_set = true;
+        GetAllSSTFiles(&total_sst_files_size);
+        reached_max_space_on_compaction++;
+      });
+
+  for (auto limit_mb : max_space_limits_mbs) {
+    bg_error_set = false;
+    total_sst_files_size = 0;
+    rocksdb::SyncPoint::GetInstance()->ClearTrace();
+    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+    std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
+    auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+
+    Options options = CurrentOptions();
+    options.sst_file_manager = sst_file_manager;
+    options.write_buffer_size = 1024 * 512;  // 512 Kb
+    DestroyAndReopen(options);
+    Random rnd(301);
+
+    sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024);
+
+    int keys_written = 0;
+    uint64_t estimated_db_size = 0;
+    while (true) {
+      auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50));
+      if (!s.ok()) {
+        break;
+      }
+      keys_written++;
+      // Check the estimated db size vs the db limit just to make sure we
+      // dont run into an infinite loop
+      estimated_db_size = keys_written * 60;  // ~60 bytes per key
+      ASSERT_LT(estimated_db_size, limit_mb * 1024 * 1024 * 2);
+    }
+    ASSERT_TRUE(bg_error_set);
+    ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024);
+    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  }
+
+  ASSERT_GT(reached_max_space_on_flush, 0);
+  ASSERT_GT(reached_max_space_on_compaction, 0);
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFiles) {
+  // Open DB with infinite max open files
+  //  - First iteration use 1 thread to open files
+  //  - Second iteration use 5 threads to open files
+  for (int iter = 0; iter < 2; iter++) {
+    Options options;
+    options.create_if_missing = true;
+    options.write_buffer_size = 100000;
+    options.disable_auto_compactions = true;
+    options.max_open_files = -1;
+    if (iter == 0) {
+      options.max_file_opening_threads = 1;
+    } else {
+      options.max_file_opening_threads = 5;
+    }
+    options = CurrentOptions(options);
+    DestroyAndReopen(options);
+
+    // Create 12 Files in L0 (then move then to L2)
+    for (int i = 0; i < 12; i++) {
+      std::string k = "L2_" + Key(i);
+      ASSERT_OK(Put(k, k + std::string(1000, 'a')));
+      ASSERT_OK(Flush());
+    }
+    CompactRangeOptions compact_options;
+    compact_options.change_level = true;
+    compact_options.target_level = 2;
+    db_->CompactRange(compact_options, nullptr, nullptr);
+
+    // Create 12 Files in L0
+    for (int i = 0; i < 12; i++) {
+      std::string k = "L0_" + Key(i);
+      ASSERT_OK(Put(k, k + std::string(1000, 'a')));
+      ASSERT_OK(Flush());
+    }
+    Close();
+
+    // Reopening the DB will load all exisitng files
+    Reopen(options);
+    ASSERT_EQ("12,0,12", FilesPerLevel(0));
+    std::vector<std::vector<FileMetaData>> files;
+    dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
+
+    for (const auto& level : files) {
+      for (const auto& file : level) {
+        ASSERT_TRUE(file.table_reader_handle != nullptr);
+      }
+    }
+
+    for (int i = 0; i < 12; i++) {
+      ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a'));
+      ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a'));
+    }
+  }
+}
+
+TEST_F(DBSSTTest, GetTotalSstFilesSize) {
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.compression = kNoCompression;
+  DestroyAndReopen(options);
+  // Generate 5 files in L0
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 10; j++) {
+      std::string val = "val_file_" + ToString(i);
+      ASSERT_OK(Put(Key(j), val));
+    }
+    Flush();
+  }
+  ASSERT_EQ("5", FilesPerLevel(0));
+
+  std::vector<LiveFileMetaData> live_files_meta;
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 5);
+  uint64_t single_file_size = live_files_meta[0].size;
+
+  uint64_t live_sst_files_size = 0;
+  uint64_t total_sst_files_size = 0;
+  for (const auto& file_meta : live_files_meta) {
+    live_sst_files_size += file_meta.size;
+  }
+
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 5
+  // Total SST files = 5
+  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
+  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+
+  // hold current version
+  std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
+
+  // Compact 5 files into 1 file in L0
+  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+
+  live_files_meta.clear();
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 1);
+
+  live_sst_files_size = 0;
+  total_sst_files_size = 0;
+  for (const auto& file_meta : live_files_meta) {
+    live_sst_files_size += file_meta.size;
+  }
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 1 (compacted file)
+  // Total SST files = 6 (5 original files + compacted file)
+  ASSERT_EQ(live_sst_files_size, 1 * single_file_size);
+  ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
+
+  // hold current version
+  std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
+
+  // Delete all keys and compact, this will delete all live files
+  for (int i = 0; i < 10; i++) {
+    ASSERT_OK(Delete(Key(i)));
+  }
+  Flush();
+  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ("", FilesPerLevel(0));
+
+  live_files_meta.clear();
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 0);
+
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 0
+  // Total SST files = 6 (5 original files + compacted file)
+  ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
+
+  iter1.reset();
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 0
+  // Total SST files = 1 (compacted file)
+  ASSERT_EQ(total_sst_files_size, 1 * single_file_size);
+
+  iter2.reset();
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 0
+  // Total SST files = 0
+  ASSERT_EQ(total_sst_files_size, 0);
+}
+
+TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) {
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.compression = kNoCompression;
+  DestroyAndReopen(options);
+  // Generate 5 files in L0
+  for (int i = 0; i < 5; i++) {
+    ASSERT_OK(Put(Key(i), "val"));
+    Flush();
+  }
+  ASSERT_EQ("5", FilesPerLevel(0));
+
+  std::vector<LiveFileMetaData> live_files_meta;
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 5);
+  uint64_t single_file_size = live_files_meta[0].size;
+
+  uint64_t live_sst_files_size = 0;
+  uint64_t total_sst_files_size = 0;
+  for (const auto& file_meta : live_files_meta) {
+    live_sst_files_size += file_meta.size;
+  }
+
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+
+  // Live SST files = 5
+  // Total SST files = 5
+  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
+  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+
+  // hold current version
+  std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
+
+  // Compaction will do trivial move from L0 to L1
+  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ("0,5", FilesPerLevel(0));
+
+  live_files_meta.clear();
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 5);
+
+  live_sst_files_size = 0;
+  total_sst_files_size = 0;
+  for (const auto& file_meta : live_files_meta) {
+    live_sst_files_size += file_meta.size;
+  }
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 5
+  // Total SST files = 5 (used in 2 version)
+  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
+  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+
+  // hold current version
+  std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
+
+  // Delete all keys and compact, this will delete all live files
+  for (int i = 0; i < 5; i++) {
+    ASSERT_OK(Delete(Key(i)));
+  }
+  Flush();
+  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  ASSERT_EQ("", FilesPerLevel(0));
+
+  live_files_meta.clear();
+  dbfull()->GetLiveFilesMetaData(&live_files_meta);
+  ASSERT_EQ(live_files_meta.size(), 0);
+
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 0
+  // Total SST files = 5 (used in 2 version)
+  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+
+  iter1.reset();
+  iter2.reset();
+
+  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
+                                       &total_sst_files_size));
+  // Live SST files = 0
+  // Total SST files = 0
+  ASSERT_EQ(total_sst_files_size, 0);
+}
+
+TEST_F(DBSSTTest, AddExternalSstFile) {
+  do {
+    std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
+    env_->CreateDir(sst_files_folder);
+    Options options = CurrentOptions();
+    options.env = env_;
+    const ImmutableCFOptions ioptions(options);
+
+    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+
+    // file1.sst (0 => 99)
+    std::string file1 = sst_files_folder + "file1.sst";
+    ASSERT_OK(sst_file_writer.Open(file1));
+    for (int k = 0; k < 100; k++) {
+      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
+    }
+    ExternalSstFileInfo file1_info;
+    Status s = sst_file_writer.Finish(&file1_info);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(file1_info.file_path, file1);
+    ASSERT_EQ(file1_info.num_entries, 100);
+    ASSERT_EQ(file1_info.smallest_key, Key(0));
+    ASSERT_EQ(file1_info.largest_key, Key(99));
+    // sst_file_writer already finished, cannot add this value
+    s = sst_file_writer.Add(Key(100), "bad_val");
+    ASSERT_FALSE(s.ok()) << s.ToString();
+
+    // file2.sst (100 => 199)
+    std::string file2 = sst_files_folder + "file2.sst";
+    ASSERT_OK(sst_file_writer.Open(file2));
+    for (int k = 100; k < 200; k++) {
+      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
+    }
+    // Cannot add this key because it's not after last added key
+    s = sst_file_writer.Add(Key(99), "bad_val");
+    ASSERT_FALSE(s.ok()) << s.ToString();
+    ExternalSstFileInfo file2_info;
+    s = sst_file_writer.Finish(&file2_info);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(file2_info.file_path, file2);
+    ASSERT_EQ(file2_info.num_entries, 100);
+    ASSERT_EQ(file2_info.smallest_key, Key(100));
+    ASSERT_EQ(file2_info.largest_key, Key(199));
+
+    // file3.sst (195 => 299)
+    // This file values overlap with file2 values
+    std::string file3 = sst_files_folder + "file3.sst";
+    ASSERT_OK(sst_file_writer.Open(file3));
+    for (int k = 195; k < 300; k++) {
+      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
+    }
+    ExternalSstFileInfo file3_info;
+    s = sst_file_writer.Finish(&file3_info);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(file3_info.file_path, file3);
+    ASSERT_EQ(file3_info.num_entries, 105);
+    ASSERT_EQ(file3_info.smallest_key, Key(195));
+    ASSERT_EQ(file3_info.largest_key, Key(299));
+
+    // file4.sst (30 => 39)
+    // This file values overlap with file1 values
+    std::string file4 = sst_files_folder + "file4.sst";
+    ASSERT_OK(sst_file_writer.Open(file4));
+    for (int k = 30; k < 40; k++) {
+      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
+    }
+    ExternalSstFileInfo file4_info;
+    s = sst_file_writer.Finish(&file4_info);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(file4_info.file_path, file4);
+    ASSERT_EQ(file4_info.num_entries, 10);
+    ASSERT_EQ(file4_info.smallest_key, Key(30));
+    ASSERT_EQ(file4_info.largest_key, Key(39));
+
+    // file5.sst (400 => 499)
+    std::string file5 = sst_files_folder + "file5.sst";
+    ASSERT_OK(sst_file_writer.Open(file5));
+    for (int k = 400; k < 500; k++) {
+      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
+    }
+    ExternalSstFileInfo file5_info;
+    s = sst_file_writer.Finish(&file5_info);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(file5_info.file_path, file5);
+    ASSERT_EQ(file5_info.num_entries, 100);
+    ASSERT_EQ(file5_info.smallest_key, Key(400));
+    ASSERT_EQ(file5_info.largest_key, Key(499));
+
+    // Cannot create an empty sst file
+    std::string file_empty = sst_files_folder + "file_empty.sst";
+    ExternalSstFileInfo file_empty_info;
+    s = sst_file_writer.Finish(&file_empty_info);
+    ASSERT_NOK(s);
+
+    DestroyAndReopen(options);
+    // Add file using file path
+    s = db_->AddFile(file1);
+    ASSERT_TRUE(s.ok()) << s.ToString();
+    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
+    for (int k = 0; k < 100; k++) {
+      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
+    }
+
+    // Add file while holding a snapshot will fail
+    const Snapshot* s1 = db_->GetSnapshot();
+    if (s1 != nullptr) {
+      ASSERT_NOK(db_->AddFile(&file2_info));
+      db_->ReleaseSnapshot(s1);
+    }
+    // We can add the file after releaseing the snapshot
+    ASSERT_OK(db_->AddFile(&file2_info));
+
+    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
+    for (int k = 0; k < 200; k++) {
+      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
+    }
+
+    // This file have overlapping values with the exisitng data
+    s = db_->AddFile(file3);
+    ASSERT_FALSE(s.ok()) << s.ToString();
+
+    // This file have overlapping values with the exisitng data
+    s = db_->AddFile(&file4_info);
+    ASSERT_FALSE(s.ok()) << s.ToString();
+
+    // Overwrite values of keys divisible by 5
+    for (int k = 0; k < 200; k += 5) {
+      ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
+    }
+    ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
+
+    // Key range of file5 (400 => 499) dont overlap with any keys in DB
+    ASSERT_OK(db_->AddFile(file5));
+
+    // Make sure values are correct before and after flush/compaction
+    for (int i = 0; i < 2; i++) {
+      for (int k = 0; k < 200; k++) {
+        std::string value = Key(k) + "_val";
+        if (k % 5 == 0) {
+          value += "_new";
+        }
+        ASSERT_EQ(Get(Key(k)), value);
+      }
+      for (int k = 400; k < 500; k++) {
+        std::string value = Key(k) + "_val";
+        ASSERT_EQ(Get(Key(k)), value);
+      }
+      ASSERT_OK(Flush());
+      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+    }
+
+    Close();
+    options.disable_auto_compactions = true;
+    Reopen(options);
+
+    // Delete keys in range (400 => 499)
+    for (int k = 400; k < 500; k++) {
+      ASSERT_OK(Delete(Key(k)));
+    }
+    // We deleted range (400 => 499) but cannot add file5 because
+    // of the range tombstones
+    ASSERT_NOK(db_->AddFile(file5));
+
+    // Compacting the DB will remove the tombstones
+    ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+    // Now we can add the file
+    ASSERT_OK(db_->AddFile(file5));
+
+    // Verify values of file5 in DB
+    for (int k = 400; k < 500; k++) {
+      std::string value = Key(k) + "_val";
+      ASSERT_EQ(Get(Key(k)), value);
+    }
+  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
+                         kSkipFIFOCompaction));
+}
+
+// This test reporduce a bug that can happen in some cases if the DB started
+// purging obsolete files when we are adding an external sst file.
+// This situation may result in deleting the file while it's being added.
+TEST_F(DBSSTTest, AddExternalSstFilePurgeObsoleteFilesBug) {
+  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
+  env_->CreateDir(sst_files_folder);
+  Options options = CurrentOptions();
+  options.env = env_;
+  const ImmutableCFOptions ioptions(options);
+
+  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+
+  // file1.sst (0 => 500)
+  std::string sst_file_path = sst_files_folder + "file1.sst";
+  Status s = sst_file_writer.Open(sst_file_path);
+  ASSERT_OK(s);
+  for (int i = 0; i < 500; i++) {
+    std::string k = Key(i);
+    s = sst_file_writer.Add(k, k + "_val");
+    ASSERT_OK(s);
+  }
+
+  ExternalSstFileInfo sst_file_info;
+  s = sst_file_writer.Finish(&sst_file_info);
+  ASSERT_OK(s);
+
+  options.delete_obsolete_files_period_micros = 0;
+  options.disable_auto_compactions = true;
+  DestroyAndReopen(options);
+
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::AddFile:FileCopied", [&](void* arg) {
+        ASSERT_OK(Put("aaa", "bbb"));
+        ASSERT_OK(Flush());
+        ASSERT_OK(Put("aaa", "xxx"));
+        ASSERT_OK(Flush());
+        db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+      });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  s = db_->AddFile(sst_file_path);
+  ASSERT_OK(s);
+
+  for (int i = 0; i < 500; i++) {
+    std::string k = Key(i);
+    std::string v = k + "_val";
+    ASSERT_EQ(Get(k), v);
+  }
+
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+
+TEST_F(DBSSTTest, AddExternalSstFileNoCopy) {
+  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
+  env_->CreateDir(sst_files_folder);
+  Options options = CurrentOptions();
+  options.env = env_;
+  const ImmutableCFOptions ioptions(options);
+
+  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+
+  // file1.sst (0 => 99)
+  std::string file1 = sst_files_folder + "file1.sst";
+  ASSERT_OK(sst_file_writer.Open(file1));
+  for (int k = 0; k < 100; k++) {
+    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
+  }
+  ExternalSstFileInfo file1_info;
+  Status s = sst_file_writer.Finish(&file1_info);
+  ASSERT_TRUE(s.ok()) << s.ToString();
+  ASSERT_EQ(file1_info.file_path, file1);
+  ASSERT_EQ(file1_info.num_entries, 100);
+  ASSERT_EQ(file1_info.smallest_key, Key(0));
+  ASSERT_EQ(file1_info.largest_key, Key(99));
+
+  // file2.sst (100 => 299)
+  std::string file2 = sst_files_folder + "file2.sst";
+  ASSERT_OK(sst_file_writer.Open(file2));
+  for (int k = 100; k < 300; k++) {
+    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
+  }
+  ExternalSstFileInfo file2_info;
+  s = sst_file_writer.Finish(&file2_info);
+  ASSERT_TRUE(s.ok()) << s.ToString();
+  ASSERT_EQ(file2_info.file_path, file2);
+  ASSERT_EQ(file2_info.num_entries, 200);
+  ASSERT_EQ(file2_info.smallest_key, Key(100));
+  ASSERT_EQ(file2_info.largest_key, Key(299));
+
+  // file3.sst (110 => 124) .. overlap with file2.sst
+  std::string file3 = sst_files_folder + "file3.sst";
+  ASSERT_OK(sst_file_writer.Open(file3));
+  for (int k = 110; k < 125; k++) {
+    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
+  }
+  ExternalSstFileInfo file3_info;
+  s = sst_file_writer.Finish(&file3_info);
+  ASSERT_TRUE(s.ok()) << s.ToString();
+  ASSERT_EQ(file3_info.file_path, file3);
+  ASSERT_EQ(file3_info.num_entries, 15);
+  ASSERT_EQ(file3_info.smallest_key, Key(110));
+  ASSERT_EQ(file3_info.largest_key, Key(124));
+
+  s = db_->AddFile(&file1_info, true /* move file */);
+  ASSERT_TRUE(s.ok()) << s.ToString();
+  ASSERT_EQ(Status::NotFound(), env_->FileExists(file1));
+
+  s = db_->AddFile(&file2_info, false /* copy file */);
+  ASSERT_TRUE(s.ok()) << s.ToString();
+  ASSERT_OK(env_->FileExists(file2));
+
+  // This file have overlapping values with the exisitng data
+  s = db_->AddFile(&file3_info, true /* move file */);
+  ASSERT_FALSE(s.ok()) << s.ToString();
+  ASSERT_OK(env_->FileExists(file3));
+
+  for (int k = 0; k < 300; k++) {
+    ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
+  }
+}
+
+TEST_F(DBSSTTest, AddExternalSstFileMultiThreaded) {
+  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
+  // Bulk load 10 files every file contain 1000 keys
+  int num_files = 10;
+  int keys_per_file = 1000;
+
+  // Generate file names
+  std::vector<std::string> file_names;
+  for (int i = 0; i < num_files; i++) {
+    std::string file_name = "file_" + ToString(i) + ".sst";
+    file_names.push_back(sst_files_folder + file_name);
+  }
+
+  do {
+    env_->CreateDir(sst_files_folder);
+    Options options = CurrentOptions();
+    const ImmutableCFOptions ioptions(options);
+
+    std::atomic<int> thread_num(0);
+    std::function<void()> write_file_func = [&]() {
+      int file_idx = thread_num.fetch_add(1);
+      int range_start = file_idx * keys_per_file;
+      int range_end = range_start + keys_per_file;
+
+      SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+
+      ASSERT_OK(sst_file_writer.Open(file_names[file_idx]));
+
+      for (int k = range_start; k < range_end; k++) {
+        ASSERT_OK(sst_file_writer.Add(Key(k), Key(k)));
+      }
+
+      Status s = sst_file_writer.Finish();
+      ASSERT_TRUE(s.ok()) << s.ToString();
+    };
+    // Write num_files files in parallel
+    std::vector<std::thread> sst_writer_threads;
+    for (int i = 0; i < num_files; ++i) {
+      sst_writer_threads.emplace_back(write_file_func);
+    }
+
+    for (auto& t : sst_writer_threads) {
+      t.join();
+    }
+
+    fprintf(stderr, "Wrote %d files (%d keys)\n", num_files,
+            num_files * keys_per_file);
+
+    thread_num.store(0);
+    std::atomic<int> files_added(0);
+    std::function<void()> load_file_func = [&]() {
+      // We intentionally add every file twice, and assert that it was added
+      // only once and the other add failed
+      int thread_id = thread_num.fetch_add(1);
+      int file_idx = thread_id / 2;
+      // sometimes we use copy, sometimes link .. the result should be the same
+      bool move_file = (thread_id % 3 == 0);
+
+      Status s = db_->AddFile(file_names[file_idx], move_file);
+      if (s.ok()) {
+        files_added++;
+      }
+    };
+    // Bulk load num_files files in parallel
+    std::vector<std::thread> add_file_threads;
+    DestroyAndReopen(options);
+    for (int i = 0; i < num_files * 2; ++i) {
+      add_file_threads.emplace_back(load_file_func);
+    }
+
+    for (auto& t : add_file_threads) {
+      t.join();
+    }
+    ASSERT_EQ(files_added.load(), num_files);
+    fprintf(stderr, "Loaded %d files (%d keys)\n", num_files,
+            num_files * keys_per_file);
+
+    // Overwrite values of keys divisible by 100
+    for (int k = 0; k < num_files * keys_per_file; k += 100) {
+      std::string key = Key(k);
+      Status s = Put(key, key + "_new");
+      ASSERT_TRUE(s.ok());
+    }
+
+    for (int i = 0; i < 2; i++) {
+      // Make sure the values are correct before and after flush/compaction
+      for (int k = 0; k < num_files * keys_per_file; ++k) {
+        std::string key = Key(k);
+        std::string value = (k % 100 == 0) ? (key + "_new") : key;
+        ASSERT_EQ(Get(key), value);
+      }
+      ASSERT_OK(Flush());
+      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+    }
+
+    fprintf(stderr, "Verified %d values\n", num_files * keys_per_file);
+  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
+                         kSkipFIFOCompaction));
+}
+
+TEST_F(DBSSTTest, AddExternalSstFileOverlappingRanges) {
+  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
+  Random rnd(301);
+  do {
+    env_->CreateDir(sst_files_folder);
+    Options options = CurrentOptions();
+    DestroyAndReopen(options);
+    const ImmutableCFOptions ioptions(options);
+    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+
+    printf("Option config = %d\n", option_config_);
+    std::vector<std::pair<int, int>> key_ranges;
+    for (int i = 0; i < 500; i++) {
+      int range_start = rnd.Uniform(20000);
+      int keys_per_range = 10 + rnd.Uniform(41);
+
+      key_ranges.emplace_back(range_start, range_start + keys_per_range);
+    }
+
+    int memtable_add = 0;
+    int success_add_file = 0;
+    int failed_add_file = 0;
+    std::map<std::string, std::string> true_data;
+    for (size_t i = 0; i < key_ranges.size(); i++) {
+      int range_start = key_ranges[i].first;
+      int range_end = key_ranges[i].second;
+
+      Status s;
+      std::string range_val = "range_" + ToString(i);
+
+      // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile
+      if (i && i % 5 == 0) {
+        // Use DB::Put to insert range (insert into memtable)
+        range_val += "_put";
+        for (int k = range_start; k <= range_end; k++) {
+          s = Put(Key(k), range_val);
+          ASSERT_OK(s);
+        }
+        memtable_add++;
+      } else {
+        // Use DB::AddFile to insert range
+        range_val += "_add_file";
+
+        // Generate the file containing the range
+        std::string file_name = sst_files_folder + env_->GenerateUniqueId();
+        ASSERT_OK(sst_file_writer.Open(file_name));
+        for (int k = range_start; k <= range_end; k++) {
+          s = sst_file_writer.Add(Key(k), range_val);
+          ASSERT_OK(s);
+        }
+        ExternalSstFileInfo file_info;
+        s = sst_file_writer.Finish(&file_info);
+        ASSERT_OK(s);
+
+        // Insert the generated file
+        s = db_->AddFile(&file_info);
+
+        auto it = true_data.lower_bound(Key(range_start));
+        if (it != true_data.end() && it->first <= Key(range_end)) {
+          // This range overlap with data already exist in DB
+          ASSERT_NOK(s);
+          failed_add_file++;
+        } else {
+          ASSERT_OK(s);
+          success_add_file++;
+        }
+      }
+
+      if (s.ok()) {
+        // Update true_data map to include the new inserted data
+        for (int k = range_start; k <= range_end; k++) {
+          true_data[Key(k)] = range_val;
+        }
+      }
+
+      // Flush / Compact the DB
+      if (i && i % 50 == 0) {
+        Flush();
+      }
+      if (i && i % 75 == 0) {
+        db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+      }
+    }
+
+    printf(
+        "Total: %zu ranges\n"
+        "AddFile()|Success: %d ranges\n"
+        "AddFile()|RangeConflict: %d ranges\n"
+        "Put(): %d ranges\n",
+        key_ranges.size(), success_add_file, failed_add_file, memtable_add);
+
+    // Verify the correctness of the data
+    for (const auto& kv : true_data) {
+      ASSERT_EQ(Get(kv.first), kv.second);
+    }
+    printf("keys/values verified\n");
+  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
+                         kSkipFIFOCompaction));
+}
+
+#endif  // ROCKSDB_LITE
+
+// 1 Create some SST files by inserting K-V pairs into DB
+// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file
+// 3 Open DB and check if all key can be read
+TEST_F(DBSSTTest, SSTsWithLdbSuffixHandling) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 110 << 10;  // 110KB
+  options.num_levels = 4;
+  DestroyAndReopen(options);
+
+  Random rnd(301);
+  int key_id = 0;
+  for (int i = 0; i < 10; ++i) {
+    GenerateNewFile(&rnd, &key_id, false);
+  }
+  Flush();
+  Close();
+  int const num_files = GetSstFileCount(dbname_);
+  ASSERT_GT(num_files, 0);
+
+  std::vector<std::string> filenames;
+  GetSstFiles(dbname_, &filenames);
+  int num_ldb_files = 0;
+  for (size_t i = 0; i < filenames.size(); ++i) {
+    if (i & 1) {
+      continue;
+    }
+    std::string const rdb_name = dbname_ + "/" + filenames[i];
+    std::string const ldb_name = Rocks2LevelTableFileName(rdb_name);
+    ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok());
+    ++num_ldb_files;
+  }
+  ASSERT_GT(num_ldb_files, 0);
+  ASSERT_EQ(num_files, GetSstFileCount(dbname_));
+
+  Reopen(options);
+  for (int k = 0; k < key_id; ++k) {
+    ASSERT_NE("NOT_FOUND", Get(Key(k)));
+  }
+  Destroy(options);
+}
+
+}  // namespace rocksdb
+
+int main(int argc, char** argv) {
+  rocksdb::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/db/db_test.cc b/db/db_test.cc
index 70c0646f0..caa230b6a 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -39,18 +39,15 @@
 #include "rocksdb/db.h"
 #include "rocksdb/env.h"
 #include "rocksdb/experimental.h"
-#include "rocksdb/sst_file_manager.h"
 #include "rocksdb/filter_policy.h"
 #include "rocksdb/options.h"
 #include "rocksdb/perf_context.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/slice_transform.h"
 #include "rocksdb/snapshot.h"
-#include "rocksdb/sst_file_writer.h"
 #include "rocksdb/table.h"
 #include "rocksdb/table_properties.h"
 #include "rocksdb/thread_status.h"
-#include "rocksdb/wal_filter.h"
 #include "rocksdb/utilities/write_batch_with_index.h"
 #include "rocksdb/utilities/checkpoint.h"
 #include "rocksdb/utilities/optimistic_transaction_db.h"
@@ -65,8 +62,6 @@
 #include "util/compression.h"
 #include "util/mutexlock.h"
 #include "util/rate_limiter.h"
-#include "util/sst_file_manager_impl.h"
-#include "util/statistics.h"
 #include "util/sync_point.h"
 #include "util/testharness.h"
 #include "util/testutil.h"
@@ -77,30 +72,6 @@
 
 namespace rocksdb {
 
-static uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) {
-  return options.statistics->getTickerCount(ticker_type);
-}
-
-#ifndef ROCKSDB_LITE
-// A helper function that ensures the table properties returned in
-// `GetPropertiesOfAllTablesTest` is correct.
-// This test assumes entries size is different for each of the tables.
-namespace {
-
-uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
-                                            std::string column_family_name) {
-  std::vector<LiveFileMetaData> metadata;
-  db->GetLiveFilesMetaData(&metadata);
-  uint64_t result = 0;
-  for (auto& fileMetadata : metadata) {
-    result += (fileMetadata.column_family_name == column_family_name);
-  }
-  return result;
-}
-
-}  // namespace
-#endif  // ROCKSDB_LITE
-
 class DBTest : public DBTestBase {
  public:
   DBTest() : DBTestBase("/db_test") {}
@@ -369,103 +340,6 @@ TEST_F(DBTest, CompactedDB) {
   ASSERT_TRUE(status_list[5].IsNotFound());
 }
 
-// Make sure that when options.block_cache is set, after a new table is
-// created its index/filter blocks are added to block cache.
-TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  BlockBasedTableOptions table_options;
-  table_options.cache_index_and_filter_blocks = true;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
-  options.table_factory.reset(new BlockBasedTableFactory(table_options));
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  ASSERT_OK(Put(1, "key", "val"));
-  // Create a new table.
-  ASSERT_OK(Flush(1));
-
-  // index/filter blocks added to block cache right after table creation.
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(2, /* only index/filter were added */
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
-  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
-  uint64_t int_num;
-  ASSERT_TRUE(
-      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
-  ASSERT_EQ(int_num, 0U);
-
-  // Make sure filter block is in cache.
-  std::string value;
-  ReadOptions ropt;
-  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
-
-  // Miss count should remain the same.
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-
-  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-
-  // Make sure index block is in cache.
-  auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
-  value = Get(1, "key");
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(index_block_hit + 1,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-
-  value = Get(1, "key");
-  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(index_block_hit + 2,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-}
-
-TEST_F(DBTest, ParanoidFileChecks) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  options.level0_file_num_compaction_trigger = 2;
-  options.paranoid_file_checks = true;
-  BlockBasedTableOptions table_options;
-  table_options.cache_index_and_filter_blocks = false;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
-  options.table_factory.reset(new BlockBasedTableFactory(table_options));
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  ASSERT_OK(Put(1, "1_key", "val"));
-  ASSERT_OK(Put(1, "9_key", "val"));
-  // Create a new table.
-  ASSERT_OK(Flush(1));
-  ASSERT_EQ(1, /* read and cache data block */
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-  ASSERT_OK(Put(1, "1_key2", "val2"));
-  ASSERT_OK(Put(1, "9_key2", "val2"));
-  // Create a new SST file. This will further trigger a compaction
-  // and generate another file.
-  ASSERT_OK(Flush(1));
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ(3, /* Totally 3 files created up to now */
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-  // After disabling options.paranoid_file_checks. NO further block
-  // is added after generating a new file.
-  ASSERT_OK(
-      dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
-
-  ASSERT_OK(Put(1, "1_key3", "val3"));
-  ASSERT_OK(Put(1, "9_key3", "val3"));
-  ASSERT_OK(Flush(1));
-  ASSERT_OK(Put(1, "1_key4", "val4"));
-  ASSERT_OK(Put(1, "9_key4", "val4"));
-  ASSERT_OK(Flush(1));
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ(3, /* Totally 3 files created up to now */
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
-}
-
 TEST_F(DBTest, LevelLimitReopen) {
   Options options = CurrentOptions();
   CreateAndReopenWithCF({"pikachu"}, options);
@@ -630,43 +504,6 @@ TEST_F(DBTest, ReadFromPersistedTier) {
   } while (ChangeOptions(kSkipHashCuckoo));
 }
 
-TEST_F(DBTest, IteratorProperty) {
-  // The test needs to be changed if kPersistedTier is supported in iterator.
-  Options options = CurrentOptions();
-  CreateAndReopenWithCF({"pikachu"}, options);
-  Put(1, "1", "2");
-  ReadOptions ropt;
-  ropt.pin_data = false;
-  {
-    unique_ptr<Iterator> iter(db_->NewIterator(ropt, handles_[1]));
-    iter->SeekToFirst();
-    std::string prop_value;
-    ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
-    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-    ASSERT_EQ("0", prop_value);
-    iter->Next();
-    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-    ASSERT_EQ("Iterator is not valid.", prop_value);
-  }
-  Close();
-}
-
-TEST_F(DBTest, PersistedTierOnIterator) {
-  // The test needs to be changed if kPersistedTier is supported in iterator.
-  Options options = CurrentOptions();
-  CreateAndReopenWithCF({"pikachu"}, options);
-  ReadOptions ropt;
-  ropt.read_tier = kPersistedTier;
-
-  auto* iter = db_->NewIterator(ropt, handles_[1]);
-  ASSERT_TRUE(iter->status().IsNotSupported());
-  delete iter;
-
-  std::vector<Iterator*> iters;
-  ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
-  Close();
-}
-
 TEST_F(DBTest, SingleDeleteFlush) {
   // Test to check whether flushing preserves a single delete hidden
   // behind a put.
@@ -964,9148 +801,5092 @@ TEST_F(DBTest, GetEncountersEmptyLevel) {
 }
 #endif  // ROCKSDB_LITE
 
-// KeyMayExist can lead to a few false positives, but not false negatives.
-// To make test deterministic, use a much larger number of bits per key-20 than
-// bits in the key, so that false positives are eliminated
-TEST_F(DBTest, KeyMayExist) {
-  do {
-    ReadOptions ropts;
-    std::string value;
-    anon::OptionsOverride options_override;
-    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
-    Options options = CurrentOptions(options_override);
-    options.statistics = rocksdb::CreateDBStatistics();
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
-
-    ASSERT_OK(Put(1, "a", "b"));
-    bool value_found = false;
-    ASSERT_TRUE(
-        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
-    ASSERT_TRUE(value_found);
-    ASSERT_EQ("b", value);
-
-    ASSERT_OK(Flush(1));
-    value.clear();
-
-    uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    ASSERT_TRUE(
-        db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
-    ASSERT_TRUE(!value_found);
-    // assert that no new files were opened and no new blocks were
-    // read into block cache.
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-    ASSERT_OK(Delete(1, "a"));
-
-    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-    ASSERT_OK(Flush(1));
-    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
-                                true /* disallow trivial move */);
-
-    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-    ASSERT_OK(Delete(1, "c"));
-
-    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value));
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-    // KeyMayExist function only checks data in block caches, which is not used
-    // by plain table format.
-  } while (
-      ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction));
-}
-
-TEST_F(DBTest, NonBlockingIteration) {
+TEST_F(DBTest, CheckLock) {
   do {
-    ReadOptions non_blocking_opts, regular_opts;
+    DB* localdb;
     Options options = CurrentOptions();
-    options.statistics = rocksdb::CreateDBStatistics();
-    non_blocking_opts.read_tier = kBlockCacheTier;
-    CreateAndReopenWithCF({"pikachu"}, options);
-    // write one kv to the database.
-    ASSERT_OK(Put(1, "a", "b"));
-
-    // scan using non-blocking iterator. We should find it because
-    // it is in memtable.
-    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    int count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      ASSERT_OK(iter->status());
-      count++;
-    }
-    ASSERT_EQ(count, 1);
-    delete iter;
-
-    // flush memtable to storage. Now, the key should not be in the
-    // memtable neither in the block cache.
-    ASSERT_OK(Flush(1));
-
-    // verify that a non-blocking iterator does not find any
-    // kvs. Neither does it do any IOs to storage.
-    uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      count++;
-    }
-    ASSERT_EQ(count, 0);
-    ASSERT_TRUE(iter->status().IsIncomplete());
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-    delete iter;
-
-    // read in the specified block via a regular get
-    ASSERT_EQ(Get(1, "a"), "b");
-
-    // verify that we can find it via a non-blocking scan
-    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      ASSERT_OK(iter->status());
-      count++;
-    }
-    ASSERT_EQ(count, 1);
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-    delete iter;
+    ASSERT_OK(TryReopen(options));
 
-    // This test verifies block cache behaviors, which is not used by plain
-    // table format.
-    // Exclude kHashCuckoo as it does not support iteration currently
-  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
-                         kSkipMmapReads));
+    // second open should fail
+    ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok());
+  } while (ChangeCompactOptions());
 }
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, ManagedNonBlockingIteration) {
+TEST_F(DBTest, FlushMultipleMemtable) {
   do {
-    ReadOptions non_blocking_opts, regular_opts;
     Options options = CurrentOptions();
-    options.statistics = rocksdb::CreateDBStatistics();
-    non_blocking_opts.read_tier = kBlockCacheTier;
-    non_blocking_opts.managed = true;
+    WriteOptions writeOpt = WriteOptions();
+    writeOpt.disableWAL = true;
+    options.max_write_buffer_number = 4;
+    options.min_write_buffer_number_to_merge = 3;
+    options.max_write_buffer_number_to_maintain = -1;
     CreateAndReopenWithCF({"pikachu"}, options);
-    // write one kv to the database.
-    ASSERT_OK(Put(1, "a", "b"));
-
-    // scan using non-blocking iterator. We should find it because
-    // it is in memtable.
-    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    int count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      ASSERT_OK(iter->status());
-      count++;
-    }
-    ASSERT_EQ(count, 1);
-    delete iter;
-
-    // flush memtable to storage. Now, the key should not be in the
-    // memtable neither in the block cache.
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
     ASSERT_OK(Flush(1));
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
 
-    // verify that a non-blocking iterator does not find any
-    // kvs. Neither does it do any IOs to storage.
-    int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      count++;
-    }
-    ASSERT_EQ(count, 0);
-    ASSERT_TRUE(iter->status().IsIncomplete());
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-    delete iter;
-
-    // read in the specified block via a regular get
-    ASSERT_EQ(Get(1, "a"), "b");
-
-    // verify that we can find it via a non-blocking scan
-    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
-    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
-    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
-    count = 0;
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      ASSERT_OK(iter->status());
-      count++;
-    }
-    ASSERT_EQ(count, 1);
-    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
-    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
-    delete iter;
-
-    // This test verifies block cache behaviors, which is not used by plain
-    // table format.
-    // Exclude kHashCuckoo as it does not support iteration currently
-  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
-                         kSkipMmapReads));
-}
-#endif  // ROCKSDB_LITE
-
-// A delete is skipped for key if KeyMayExist(key) returns False
-// Tests Writebatch consistency and proper delete behaviour
-TEST_F(DBTest, FilterDeletes) {
-  do {
-    anon::OptionsOverride options_override;
-    options_override.filter_policy.reset(NewBloomFilterPolicy(20));
-    Options options = CurrentOptions(options_override);
-    options.filter_deletes = true;
-    CreateAndReopenWithCF({"pikachu"}, options);
-    WriteBatch batch;
-
-    batch.Delete(handles_[1], "a");
-    dbfull()->Write(WriteOptions(), &batch);
-    ASSERT_EQ(AllEntriesFor("a", 1), "[ ]");  // Delete skipped
-    batch.Clear();
-
-    batch.Put(handles_[1], "a", "b");
-    batch.Delete(handles_[1], "a");
-    dbfull()->Write(WriteOptions(), &batch);
-    ASSERT_EQ(Get(1, "a"), "NOT_FOUND");
-    ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]");  // Delete issued
-    batch.Clear();
-
-    batch.Delete(handles_[1], "c");
-    batch.Put(handles_[1], "c", "d");
-    dbfull()->Write(WriteOptions(), &batch);
-    ASSERT_EQ(Get(1, "c"), "d");
-    ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]");  // Delete skipped
-    batch.Clear();
-
-    ASSERT_OK(Flush(1));  // A stray Flush
-
-    batch.Delete(handles_[1], "c");
-    dbfull()->Write(WriteOptions(), &batch);
-    ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]");  // Delete issued
-    batch.Clear();
+    ASSERT_EQ("v1", Get(1, "foo"));
+    ASSERT_EQ("v1", Get(1, "bar"));
+    ASSERT_OK(Flush(1));
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, GetFilterByPrefixBloom) {
-  Options options = last_options_;
-  options.prefix_extractor.reset(NewFixedPrefixTransform(8));
-  options.statistics = rocksdb::CreateDBStatistics();
-  BlockBasedTableOptions bbto;
-  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
-  bbto.whole_key_filtering = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  DestroyAndReopen(options);
-
-  WriteOptions wo;
-  ReadOptions ro;
-  FlushOptions fo;
-  fo.wait = true;
-  std::string value;
-
-  ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo"));
-  ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2"));
-  ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar"));
-
-  dbfull()->Flush(fo);
+TEST_F(DBTest, FlushEmptyColumnFamily) {
+  // Block flush thread and disable compaction thread
+  env_->SetBackgroundThreads(1, Env::HIGH);
+  env_->SetBackgroundThreads(1, Env::LOW);
+  test::SleepingBackgroundTask sleeping_task_low;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  test::SleepingBackgroundTask sleeping_task_high;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
+                 &sleeping_task_high, Env::Priority::HIGH);
 
-  ASSERT_EQ("foo", Get("barbarbar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-  ASSERT_EQ("foo2", Get("barbarbar2"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-  ASSERT_EQ("NOT_FOUND", Get("barbarbar3"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
+  Options options = CurrentOptions();
+  // disable compaction
+  options.disable_auto_compactions = true;
+  WriteOptions writeOpt = WriteOptions();
+  writeOpt.disableWAL = true;
+  options.max_write_buffer_number = 2;
+  options.min_write_buffer_number_to_merge = 1;
+  options.max_write_buffer_number_to_maintain = 1;
+  CreateAndReopenWithCF({"pikachu"}, options);
 
-  ASSERT_EQ("NOT_FOUND", Get("barfoofoo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
+  // Compaction can still go through even if no thread can flush the
+  // mem table.
+  ASSERT_OK(Flush(0));
+  ASSERT_OK(Flush(1));
 
-  ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
-}
+  // Insert can go through
+  ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1"));
+  ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
 
-TEST_F(DBTest, WholeKeyFilterProp) {
-  Options options = last_options_;
-  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
-  options.statistics = rocksdb::CreateDBStatistics();
+  ASSERT_EQ("v1", Get(0, "foo"));
+  ASSERT_EQ("v1", Get(1, "bar"));
 
-  BlockBasedTableOptions bbto;
-  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
-  bbto.whole_key_filtering = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  DestroyAndReopen(options);
+  sleeping_task_high.WakeUp();
+  sleeping_task_high.WaitUntilDone();
 
-  WriteOptions wo;
-  ReadOptions ro;
-  FlushOptions fo;
-  fo.wait = true;
-  std::string value;
+  // Flush can still go through.
+  ASSERT_OK(Flush(0));
+  ASSERT_OK(Flush(1));
 
-  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
-  // Needs insert some keys to make sure files are not filtered out by key
-  // ranges.
-  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
-  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
-  dbfull()->Flush(fo);
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilDone();
+}
 
-  Reopen(options);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-
-  // Reopen with whole key filtering enabled and prefix extractor
-  // NULL. Bloom filter should be off for both of whole key and
-  // prefix bloom.
-  bbto.whole_key_filtering = true;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  options.prefix_extractor.reset();
-  Reopen(options);
+TEST_F(DBTest, FLUSH) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    WriteOptions writeOpt = WriteOptions();
+    writeOpt.disableWAL = true;
+    SetPerfLevel(kEnableTime);
+    ;
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
+    // this will now also flush the last 2 writes
+    ASSERT_OK(Flush(1));
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
 
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  // Write DB with only full key filtering.
-  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
-  // Needs insert some keys to make sure files are not filtered out by key
-  // ranges.
-  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
-  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
-  db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+    perf_context.Reset();
+    Get(1, "foo");
+    ASSERT_TRUE((int)perf_context.get_from_output_files_time > 0);
 
-  // Reopen with both of whole key off and prefix extractor enabled.
-  // Still no bloom filter should be used.
-  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
-  bbto.whole_key_filtering = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  Reopen(options);
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_EQ("v1", Get(1, "foo"));
+    ASSERT_EQ("v1", Get(1, "bar"));
 
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-
-  // Try to create a DB with mixed files:
-  ASSERT_OK(dbfull()->Put(wo, "foobar", "foo"));
-  // Needs insert some keys to make sure files are not filtered out by key
-  // ranges.
-  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
-  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
-  db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+    writeOpt.disableWAL = true;
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2"));
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2"));
+    ASSERT_OK(Flush(1));
 
-  options.prefix_extractor.reset();
-  bbto.whole_key_filtering = true;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  Reopen(options);
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_EQ("v2", Get(1, "bar"));
+    perf_context.Reset();
+    ASSERT_EQ("v2", Get(1, "foo"));
+    ASSERT_TRUE((int)perf_context.get_from_output_files_time > 0);
 
-  // Try to create a DB with mixed files.
-  ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar"));
-  // In this case needs insert some keys to make sure files are
-  // not filtered out by key ranges.
-  ASSERT_OK(dbfull()->Put(wo, "aaa", ""));
-  ASSERT_OK(dbfull()->Put(wo, "zzz", ""));
-  Flush();
+    writeOpt.disableWAL = false;
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3"));
+    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3"));
+    ASSERT_OK(Flush(1));
 
-  // Now we have two files:
-  // File 1: An older file with prefix bloom.
-  // File 2: A newer file with whole bloom filter.
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
-  ASSERT_EQ("bar", Get("barfoo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
-
-  // Reopen with the same setting: only whole key is used
-  Reopen(options);
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
-  ASSERT_EQ("bar", Get("barfoo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
-
-  // Restart with both filters are allowed
-  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
-  bbto.whole_key_filtering = true;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  Reopen(options);
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7);
-  // File 1 will has it filtered out.
-  // File 2 will not, as prefix `foo` exists in the file.
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
-  ASSERT_EQ("bar", Get("barfoo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
-
-  // Restart with only prefix bloom is allowed.
-  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
-  bbto.whole_key_filtering = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  Reopen(options);
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
-  ASSERT_EQ("NOT_FOUND", Get("foo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11);
-  ASSERT_EQ("NOT_FOUND", Get("bar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
-  ASSERT_EQ("foo", Get("foobar"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
-  ASSERT_EQ("bar", Get("barfoo"));
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
-}
-
-TEST_F(DBTest, IterSeekBeforePrev) {
-  ASSERT_OK(Put("a", "b"));
-  ASSERT_OK(Put("c", "d"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("0", "f"));
-  ASSERT_OK(Put("1", "h"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("2", "j"));
-  auto iter = db_->NewIterator(ReadOptions());
-  iter->Seek(Slice("c"));
-  iter->Prev();
-  iter->Seek(Slice("a"));
-  iter->Prev();
-  delete iter;
-}
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    // 'foo' should be there because its put
+    // has WAL enabled.
+    ASSERT_EQ("v3", Get(1, "foo"));
+    ASSERT_EQ("v3", Get(1, "bar"));
 
-namespace {
-std::string MakeLongKey(size_t length, char c) {
-  return std::string(length, c);
+    SetPerfLevel(kDisable);
+  } while (ChangeCompactOptions());
 }
-}  // namespace
-
-TEST_F(DBTest, IterLongKeys) {
-  ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
-  ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
-  ASSERT_OK(Put("a", "b"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
-  ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
-  ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
-  auto iter = db_->NewIterator(ReadOptions());
-
-  // Create a key that needs to be skipped for Seq too new
-  iter->Seek(MakeLongKey(20, 0));
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
-  delete iter;
 
-  iter = db_->NewIterator(ReadOptions());
-  iter->Seek(MakeLongKey(50, 1));
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
-  delete iter;
-}
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, FlushSchedule) {
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.level0_stop_writes_trigger = 1 << 10;
+  options.level0_slowdown_writes_trigger = 1 << 10;
+  options.min_write_buffer_number_to_merge = 1;
+  options.max_write_buffer_number_to_maintain = 1;
+  options.max_write_buffer_number = 2;
+  options.write_buffer_size = 120 * 1024;
+  CreateAndReopenWithCF({"pikachu"}, options);
+  std::vector<std::thread> threads;
 
-TEST_F(DBTest, IterNextWithNewerSeq) {
-  ASSERT_OK(Put("0", "0"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("a", "b"));
-  ASSERT_OK(Put("c", "d"));
-  ASSERT_OK(Put("d", "e"));
-  auto iter = db_->NewIterator(ReadOptions());
+  std::atomic<int> thread_num(0);
+  // each column family will have 5 thread, each thread generating 2 memtables.
+  // each column family should end up with 10 table files
+  std::function<void()> fill_memtable_func = [&]() {
+    int a = thread_num.fetch_add(1);
+    Random rnd(a);
+    WriteOptions wo;
+    // this should fill up 2 memtables
+    for (int k = 0; k < 5000; ++k) {
+      ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
+    }
+  };
 
-  // Create a key that needs to be skipped for Seq too new
-  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
-       i++) {
-    ASSERT_OK(Put("b", "f"));
+  for (int i = 0; i < 10; ++i) {
+    threads.emplace_back(fill_memtable_func);
   }
 
-  iter->Seek(Slice("a"));
-  ASSERT_EQ(IterStatus(iter), "a->b");
-  iter->Next();
-  ASSERT_EQ(IterStatus(iter), "c->d");
-  delete iter;
-}
+  for (auto& t : threads) {
+    t.join();
+  }
 
-TEST_F(DBTest, IterPrevWithNewerSeq) {
-  ASSERT_OK(Put("0", "0"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("a", "b"));
-  ASSERT_OK(Put("c", "d"));
-  ASSERT_OK(Put("d", "e"));
-  auto iter = db_->NewIterator(ReadOptions());
-
-  // Create a key that needs to be skipped for Seq too new
-  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
-       i++) {
-    ASSERT_OK(Put("b", "f"));
-  }
-
-  iter->Seek(Slice("d"));
-  ASSERT_EQ(IterStatus(iter), "d->e");
-  iter->Prev();
-  ASSERT_EQ(IterStatus(iter), "c->d");
-  iter->Prev();
-  ASSERT_EQ(IterStatus(iter), "a->b");
-
-  iter->Prev();
-  delete iter;
-}
-
-TEST_F(DBTest, IterPrevWithNewerSeq2) {
-  ASSERT_OK(Put("0", "0"));
-  dbfull()->Flush(FlushOptions());
-  ASSERT_OK(Put("a", "b"));
-  ASSERT_OK(Put("c", "d"));
-  ASSERT_OK(Put("d", "e"));
-  auto iter = db_->NewIterator(ReadOptions());
-  iter->Seek(Slice("c"));
-  ASSERT_EQ(IterStatus(iter), "c->d");
-
-  // Create a key that needs to be skipped for Seq too new
-  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
-      i++) {
-    ASSERT_OK(Put("b", "f"));
-  }
-
-  iter->Prev();
-  ASSERT_EQ(IterStatus(iter), "a->b");
-
-  iter->Prev();
-  delete iter;
+  auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
+  auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
+  ASSERT_LE(default_tables, static_cast<uint64_t>(10));
+  ASSERT_GT(default_tables, static_cast<uint64_t>(0));
+  ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
+  ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
 }
+#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, IterEmpty) {
+TEST_F(DBTest, ManifestRollOver) {
   do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
-
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->Seek("foo");
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    delete iter;
+    Options options;
+    options.max_manifest_file_size = 10;  // 10 bytes
+    options = CurrentOptions(options);
+    CreateAndReopenWithCF({"pikachu"}, options);
+    {
+      ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1')));
+      ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2')));
+      ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3')));
+      uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo();
+      ASSERT_OK(Flush(1));  // This should trigger LogAndApply.
+      uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo();
+      ASSERT_GT(manifest_after_flush, manifest_before_flush);
+      ReopenWithColumnFamilies({"default", "pikachu"}, options);
+      ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush);
+      // check if a new manifest file got inserted or not.
+      ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1"));
+      ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2"));
+      ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3"));
+    }
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, IterSingle) {
+TEST_F(DBTest, IdentityAcrossRestarts) {
   do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "a", "va"));
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
-
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->Seek("");
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->Seek("a");
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    std::string id1;
+    ASSERT_OK(db_->GetDbIdentity(id1));
 
-    iter->Seek("b");
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
+    Options options = CurrentOptions();
+    Reopen(options);
+    std::string id2;
+    ASSERT_OK(db_->GetDbIdentity(id2));
+    // id1 should match id2 because identity was not regenerated
+    ASSERT_EQ(id1.compare(id2), 0);
 
-    delete iter;
+    std::string idfilename = IdentityFileName(dbname_);
+    ASSERT_OK(env_->DeleteFile(idfilename));
+    Reopen(options);
+    std::string id3;
+    ASSERT_OK(db_->GetDbIdentity(id3));
+    // id1 should NOT match id3 because identity was regenerated
+    ASSERT_NE(id1.compare(id3), 0);
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, IterMulti) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "a", "va"));
-    ASSERT_OK(Put(1, "b", "vb"));
-    ASSERT_OK(Put(1, "c", "vc"));
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+namespace {
+class KeepFilter : public CompactionFilter {
+ public:
+  virtual bool Filter(int level, const Slice& key, const Slice& value,
+                      std::string* new_value,
+                      bool* value_changed) const override {
+    return false;
+  }
 
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->Seek("");
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Seek("a");
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Seek("ax");
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-
-    iter->Seek("b");
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-    iter->Seek("z");
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    // Switch from reverse to forward
-    iter->SeekToLast();
-    iter->Prev();
-    iter->Prev();
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
+  virtual const char* Name() const override { return "KeepFilter"; }
+};
 
-    // Switch from forward to reverse
-    iter->SeekToFirst();
-    iter->Next();
-    iter->Next();
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-
-    // Make sure iter stays at snapshot
-    ASSERT_OK(Put(1, "a", "va2"));
-    ASSERT_OK(Put(1, "a2", "va3"));
-    ASSERT_OK(Put(1, "b", "vb2"));
-    ASSERT_OK(Put(1, "c", "vc2"));
-    ASSERT_OK(Delete(1, "b"));
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "b->vb");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    delete iter;
-  } while (ChangeCompactOptions());
-}
+class KeepFilterFactory : public CompactionFilterFactory {
+ public:
+  explicit KeepFilterFactory(bool check_context = false)
+      : check_context_(check_context) {}
 
-// Check that we can skip over a run of user keys
-// by using reseek rather than sequential scan
-TEST_F(DBTest, IterReseek) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  Options options = CurrentOptions(options_override);
-  options.max_sequential_skip_in_iterations = 3;
-  options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  DestroyAndReopen(options);
-  CreateAndReopenWithCF({"pikachu"}, options);
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override {
+    if (check_context_) {
+      EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
+      EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
+    }
+    return std::unique_ptr<CompactionFilter>(new KeepFilter());
+  }
 
-  // insert three keys with same userkey and verify that
-  // reseek is not invoked. For each of these test cases,
-  // verify that we can find the next key "b".
-  ASSERT_OK(Put(1, "a", "zero"));
-  ASSERT_OK(Put(1, "a", "one"));
-  ASSERT_OK(Put(1, "a", "two"));
-  ASSERT_OK(Put(1, "b", "bone"));
-  Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
-  iter->SeekToFirst();
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
-  ASSERT_EQ(IterStatus(iter), "a->two");
-  iter->Next();
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
-  ASSERT_EQ(IterStatus(iter), "b->bone");
-  delete iter;
+  virtual const char* Name() const override { return "KeepFilterFactory"; }
+  bool check_context_;
+  std::atomic_bool expect_full_compaction_;
+  std::atomic_bool expect_manual_compaction_;
+};
 
-  // insert a total of three keys with same userkey and verify
-  // that reseek is still not invoked.
-  ASSERT_OK(Put(1, "a", "three"));
-  iter = db_->NewIterator(ReadOptions(), handles_[1]);
-  iter->SeekToFirst();
-  ASSERT_EQ(IterStatus(iter), "a->three");
-  iter->Next();
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
-  ASSERT_EQ(IterStatus(iter), "b->bone");
-  delete iter;
+class DelayFilter : public CompactionFilter {
+ public:
+  explicit DelayFilter(DBTestBase* d) : db_test(d) {}
+  virtual bool Filter(int level, const Slice& key, const Slice& value,
+                      std::string* new_value,
+                      bool* value_changed) const override {
+    db_test->env_->addon_time_.fetch_add(1000);
+    return true;
+  }
 
-  // insert a total of four keys with same userkey and verify
-  // that reseek is invoked.
-  ASSERT_OK(Put(1, "a", "four"));
-  iter = db_->NewIterator(ReadOptions(), handles_[1]);
-  iter->SeekToFirst();
-  ASSERT_EQ(IterStatus(iter), "a->four");
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
-  iter->Next();
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
-  ASSERT_EQ(IterStatus(iter), "b->bone");
-  delete iter;
+  virtual const char* Name() const override { return "DelayFilter"; }
 
-  // Testing reverse iterator
-  // At this point, we have three versions of "a" and one version of "b".
-  // The reseek statistics is already at 1.
-  int num_reseeks =
-      (int)TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION);
-
-  // Insert another version of b and assert that reseek is not invoked
-  ASSERT_OK(Put(1, "b", "btwo"));
-  iter = db_->NewIterator(ReadOptions(), handles_[1]);
-  iter->SeekToLast();
-  ASSERT_EQ(IterStatus(iter), "b->btwo");
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
-            num_reseeks);
-  iter->Prev();
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
-            num_reseeks + 1);
-  ASSERT_EQ(IterStatus(iter), "a->four");
-  delete iter;
+ private:
+  DBTestBase* db_test;
+};
 
-  // insert two more versions of b. This makes a total of 4 versions
-  // of b and 4 versions of a.
-  ASSERT_OK(Put(1, "b", "bthree"));
-  ASSERT_OK(Put(1, "b", "bfour"));
-  iter = db_->NewIterator(ReadOptions(), handles_[1]);
-  iter->SeekToLast();
-  ASSERT_EQ(IterStatus(iter), "b->bfour");
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
-            num_reseeks + 2);
-  iter->Prev();
-
-  // the previous Prev call should have invoked reseek
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
-            num_reseeks + 3);
-  ASSERT_EQ(IterStatus(iter), "a->four");
-  delete iter;
-}
+class DelayFilterFactory : public CompactionFilterFactory {
+ public:
+  explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override {
+    return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
+  }
 
-TEST_F(DBTest, IterSmallAndLargeMix) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "a", "va"));
-    ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
-    ASSERT_OK(Put(1, "c", "vc"));
-    ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
-    ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
+  virtual const char* Name() const override { return "DelayFilterFactory"; }
 
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+ private:
+  DBTestBase* db_test;
+};
+}  // namespace
 
-    iter->SeekToFirst();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
-    iter->Next();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    iter->SeekToLast();
-    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "c->vc");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "a->va");
-    iter->Prev();
-    ASSERT_EQ(IterStatus(iter), "(invalid)");
-
-    delete iter;
-  } while (ChangeCompactOptions());
+#ifndef ROCKSDB_LITE
+
+static std::string CompressibleString(Random* rnd, int len) {
+  std::string r;
+  test::CompressibleString(rnd, 0.8, len, &r);
+  return r;
 }
+#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, IterMultiWithDelete) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "ka", "va"));
-    ASSERT_OK(Put(1, "kb", "vb"));
-    ASSERT_OK(Put(1, "kc", "vc"));
-    ASSERT_OK(Delete(1, "kb"));
-    ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
-
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
-    iter->Seek("kc");
-    ASSERT_EQ(IterStatus(iter), "kc->vc");
-    if (!CurrentOptions().merge_operator) {
-      // TODO: merge operator does not support backward iteration yet
-      if (kPlainTableAllBytesPrefix != option_config_&&
-          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
-          kHashLinkList != option_config_) {
-        iter->Prev();
-        ASSERT_EQ(IterStatus(iter), "ka->va");
-      }
-    }
-    delete iter;
-  } while (ChangeOptions());
+TEST_F(DBTest, FailMoreDbPaths) {
+  Options options = CurrentOptions();
+  options.db_paths.emplace_back(dbname_, 10000000);
+  options.db_paths.emplace_back(dbname_ + "_2", 1000000);
+  options.db_paths.emplace_back(dbname_ + "_3", 1000000);
+  options.db_paths.emplace_back(dbname_ + "_4", 1000000);
+  options.db_paths.emplace_back(dbname_ + "_5", 1000000);
+  ASSERT_TRUE(TryReopen(options).IsNotSupported());
 }
 
-TEST_F(DBTest, IterPrevMaxSkip) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    for (int i = 0; i < 2; i++) {
-      ASSERT_OK(Put(1, "key1", "v1"));
-      ASSERT_OK(Put(1, "key2", "v2"));
-      ASSERT_OK(Put(1, "key3", "v3"));
-      ASSERT_OK(Put(1, "key4", "v4"));
-      ASSERT_OK(Put(1, "key5", "v5"));
+void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
+  uint64_t cf_size = 0;
+  uint64_t cf_csize = 0;
+  size_t file_count = 0;
+  for (auto level_meta : cf_meta.levels) {
+    uint64_t level_size = 0;
+    uint64_t level_csize = 0;
+    file_count += level_meta.files.size();
+    for (auto file_meta : level_meta.files) {
+      level_size += file_meta.size;
     }
+    ASSERT_EQ(level_meta.size, level_size);
+    cf_size += level_size;
+    cf_csize += level_csize;
+  }
+  ASSERT_EQ(cf_meta.file_count, file_count);
+  ASSERT_EQ(cf_meta.size, cf_size);
+}
 
-    VerifyIterLast("key5->v5", 1);
-
-    ASSERT_OK(Delete(1, "key5"));
-    VerifyIterLast("key4->v4", 1);
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, ColumnFamilyMetaDataTest) {
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
 
-    ASSERT_OK(Delete(1, "key4"));
-    VerifyIterLast("key3->v3", 1);
+  Random rnd(301);
+  int key_index = 0;
+  ColumnFamilyMetaData cf_meta;
+  for (int i = 0; i < 100; ++i) {
+    GenerateNewFile(&rnd, &key_index);
+    db_->GetColumnFamilyMetaData(&cf_meta);
+    CheckColumnFamilyMeta(cf_meta);
+  }
+}
 
-    ASSERT_OK(Delete(1, "key3"));
-    VerifyIterLast("key2->v2", 1);
+namespace {
+void MinLevelHelper(DBTest* self, Options& options) {
+  Random rnd(301);
 
-    ASSERT_OK(Delete(1, "key2"));
-    VerifyIterLast("key1->v1", 1);
+  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
+       num++) {
+    std::vector<std::string> values;
+    // Write 120KB (12 values, each 10K)
+    for (int i = 0; i < 12; i++) {
+      values.push_back(DBTestBase::RandomString(&rnd, 10000));
+      ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
+    }
+    self->dbfull()->TEST_WaitForFlushMemTable();
+    ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
+  }
 
-    ASSERT_OK(Delete(1, "key1"));
-    VerifyIterLast("(invalid)", 1);
-  } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
-}
+  // generate one more file in level-0, and should trigger level-0 compaction
+  std::vector<std::string> values;
+  for (int i = 0; i < 12; i++) {
+    values.push_back(DBTestBase::RandomString(&rnd, 10000));
+    ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
+  }
+  self->dbfull()->TEST_WaitForCompact();
 
-TEST_F(DBTest, IterWithSnapshot) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
-    ASSERT_OK(Put(1, "key1", "val1"));
-    ASSERT_OK(Put(1, "key2", "val2"));
-    ASSERT_OK(Put(1, "key3", "val3"));
-    ASSERT_OK(Put(1, "key4", "val4"));
-    ASSERT_OK(Put(1, "key5", "val5"));
-
-    const Snapshot *snapshot = db_->GetSnapshot();
-    ReadOptions options;
-    options.snapshot = snapshot;
-    Iterator* iter = db_->NewIterator(options, handles_[1]);
-
-    // Put more values after the snapshot
-    ASSERT_OK(Put(1, "key100", "val100"));
-    ASSERT_OK(Put(1, "key101", "val101"));
-
-    iter->Seek("key5");
-    ASSERT_EQ(IterStatus(iter), "key5->val5");
-    if (!CurrentOptions().merge_operator) {
-      // TODO: merge operator does not support backward iteration yet
-      if (kPlainTableAllBytesPrefix != option_config_&&
-        kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
-        kHashLinkList != option_config_) {
-        iter->Prev();
-        ASSERT_EQ(IterStatus(iter), "key4->val4");
-        iter->Prev();
-        ASSERT_EQ(IterStatus(iter), "key3->val3");
-
-        iter->Next();
-        ASSERT_EQ(IterStatus(iter), "key4->val4");
-        iter->Next();
-        ASSERT_EQ(IterStatus(iter), "key5->val5");
-      }
-      iter->Next();
-      ASSERT_TRUE(!iter->Valid());
-    }
-    db_->ReleaseSnapshot(snapshot);
-    delete iter;
-    // skip as HashCuckooRep does not support snapshot
-  } while (ChangeOptions(kSkipHashCuckoo));
+  ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
+  ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
 }
 
-TEST_F(DBTest, Recover) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "foo", "v1"));
-    ASSERT_OK(Put(1, "baz", "v5"));
+// returns false if the calling-Test should be skipped
+bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
+                        int lev, int strategy) {
+  fprintf(stderr,
+          "Test with compression options : window_bits = %d, level =  %d, "
+          "strategy = %d}\n",
+          wbits, lev, strategy);
+  options.write_buffer_size = 100 << 10;  // 100KB
+  options.arena_block_size = 4096;
+  options.num_levels = 3;
+  options.level0_file_num_compaction_trigger = 3;
+  options.create_if_missing = true;
 
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_EQ("v1", Get(1, "foo"));
-
-    ASSERT_EQ("v1", Get(1, "foo"));
-    ASSERT_EQ("v5", Get(1, "baz"));
-    ASSERT_OK(Put(1, "bar", "v2"));
-    ASSERT_OK(Put(1, "foo", "v3"));
+  if (Snappy_Supported()) {
+    type = kSnappyCompression;
+    fprintf(stderr, "using snappy\n");
+  } else if (Zlib_Supported()) {
+    type = kZlibCompression;
+    fprintf(stderr, "using zlib\n");
+  } else if (BZip2_Supported()) {
+    type = kBZip2Compression;
+    fprintf(stderr, "using bzip2\n");
+  } else if (LZ4_Supported()) {
+    type = kLZ4Compression;
+    fprintf(stderr, "using lz4\n");
+  } else {
+    fprintf(stderr, "skipping test, compression disabled\n");
+    return false;
+  }
+  options.compression_per_level.resize(options.num_levels);
 
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_EQ("v3", Get(1, "foo"));
-    ASSERT_OK(Put(1, "foo", "v4"));
-    ASSERT_EQ("v4", Get(1, "foo"));
-    ASSERT_EQ("v2", Get(1, "bar"));
-    ASSERT_EQ("v5", Get(1, "baz"));
-  } while (ChangeOptions());
+  // do not compress L0
+  for (int i = 0; i < 1; i++) {
+    options.compression_per_level[i] = kNoCompression;
+  }
+  for (int i = 1; i < options.num_levels; i++) {
+    options.compression_per_level[i] = type;
+  }
+  return true;
 }
+}  // namespace
 
-TEST_F(DBTest, RecoverWithTableHandle) {
-  do {
-    Options options = CurrentOptions();
-    options.create_if_missing = true;
-    options.disable_auto_compactions = true;
-    DestroyAndReopen(options);
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    ASSERT_OK(Put(1, "foo", "v1"));
-    ASSERT_OK(Put(1, "bar", "v2"));
-    ASSERT_OK(Flush(1));
-    ASSERT_OK(Put(1, "foo", "v3"));
-    ASSERT_OK(Put(1, "bar", "v4"));
-    ASSERT_OK(Flush(1));
-    ASSERT_OK(Put(1, "big", std::string(100, 'a')));
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+TEST_F(DBTest, MinLevelToCompress1) {
+  Options options = CurrentOptions();
+  CompressionType type = kSnappyCompression;
+  if (!MinLevelToCompress(type, options, -14, -1, 0)) {
+    return;
+  }
+  Reopen(options);
+  MinLevelHelper(this, options);
 
-    std::vector<std::vector<FileMetaData>> files;
-    dbfull()->TEST_GetFilesMetaData(handles_[1], &files);
-    size_t total_files = 0;
-    for (const auto& level : files) {
-      total_files += level.size();
-    }
-    ASSERT_EQ(total_files, 3);
-    for (const auto& level : files) {
-      for (const auto& file : level) {
-        if (kInfiniteMaxOpenFiles == option_config_) {
-          ASSERT_TRUE(file.table_reader_handle != nullptr);
-        } else {
-          ASSERT_TRUE(file.table_reader_handle == nullptr);
-        }
-      }
-    }
-  } while (ChangeOptions());
+  // do not compress L0 and L1
+  for (int i = 0; i < 2; i++) {
+    options.compression_per_level[i] = kNoCompression;
+  }
+  for (int i = 2; i < options.num_levels; i++) {
+    options.compression_per_level[i] = type;
+  }
+  DestroyAndReopen(options);
+  MinLevelHelper(this, options);
 }
 
-TEST_F(DBTest, IgnoreRecoveredLog) {
-  std::string backup_logs = dbname_ + "/backup_logs";
+TEST_F(DBTest, MinLevelToCompress2) {
+  Options options = CurrentOptions();
+  CompressionType type = kSnappyCompression;
+  if (!MinLevelToCompress(type, options, 15, -1, 0)) {
+    return;
+  }
+  Reopen(options);
+  MinLevelHelper(this, options);
 
-  // delete old files in backup_logs directory
-  env_->CreateDirIfMissing(backup_logs);
-  std::vector<std::string> old_files;
-  env_->GetChildren(backup_logs, &old_files);
-  for (auto& file : old_files) {
-    if (file != "." && file != "..") {
-      env_->DeleteFile(backup_logs + "/" + file);
-    }
+  // do not compress L0 and L1
+  for (int i = 0; i < 2; i++) {
+    options.compression_per_level[i] = kNoCompression;
+  }
+  for (int i = 2; i < options.num_levels; i++) {
+    options.compression_per_level[i] = type;
   }
+  DestroyAndReopen(options);
+  MinLevelHelper(this, options);
+}
 
+TEST_F(DBTest, RepeatedWritesToSameKey) {
   do {
     Options options = CurrentOptions();
-    options.create_if_missing = true;
-    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
-    options.wal_dir = dbname_ + "/logs";
-    DestroyAndReopen(options);
-
-    // fill up the DB
-    std::string one, two;
-    PutFixed64(&one, 1);
-    PutFixed64(&two, 2);
-    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
-    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
-    ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one)));
-
-    // copy the logs to backup
-    std::vector<std::string> logs;
-    env_->GetChildren(options.wal_dir, &logs);
-    for (auto& log : logs) {
-      if (log != ".." && log != ".") {
-        CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log);
-      }
-    }
-
-    // recover the DB
-    Reopen(options);
-    ASSERT_EQ(two, Get("foo"));
-    ASSERT_EQ(one, Get("bar"));
-    Close();
-
-    // copy the logs from backup back to wal dir
-    for (auto& log : logs) {
-      if (log != ".." && log != ".") {
-        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
-      }
-    }
-    // this should ignore the log files, recovery should not happen again
-    // if the recovery happens, the same merge operator would be called twice,
-    // leading to incorrect results
-    Reopen(options);
-    ASSERT_EQ(two, Get("foo"));
-    ASSERT_EQ(one, Get("bar"));
-    Close();
-    Destroy(options);
-    Reopen(options);
-    Close();
+    options.env = env_;
+    options.write_buffer_size = 100000;  // Small write buffer
+    CreateAndReopenWithCF({"pikachu"}, options);
 
-    // copy the logs from backup back to wal dir
-    env_->CreateDirIfMissing(options.wal_dir);
-    for (auto& log : logs) {
-      if (log != ".." && log != ".") {
-        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
-      }
-    }
-    // assert that we successfully recovered only from logs, even though we
-    // destroyed the DB
-    Reopen(options);
-    ASSERT_EQ(two, Get("foo"));
-    ASSERT_EQ(one, Get("bar"));
+    // We must have at most one file per level except for level-0,
+    // which may have up to kL0_StopWritesTrigger files.
+    const int kMaxFiles =
+        options.num_levels + options.level0_stop_writes_trigger;
 
-    // Recovery will fail if DB directory doesn't exist.
-    Destroy(options);
-    // copy the logs from backup back to wal dir
-    env_->CreateDirIfMissing(options.wal_dir);
-    for (auto& log : logs) {
-      if (log != ".." && log != ".") {
-        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
-        // we won't be needing this file no more
-        env_->DeleteFile(backup_logs + "/" + log);
-      }
+    Random rnd(301);
+    std::string value =
+        RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
+    for (int i = 0; i < 5 * kMaxFiles; i++) {
+      ASSERT_OK(Put(1, "key", value));
+      ASSERT_LE(TotalTableFiles(1), kMaxFiles);
     }
-    Status s = TryReopen(options);
-    ASSERT_TRUE(!s.ok());
-  } while (ChangeOptions(kSkipHashCuckoo));
-}
-
-TEST_F(DBTest, CheckLock) {
-  do {
-    DB* localdb;
-    Options options = CurrentOptions();
-    ASSERT_OK(TryReopen(options));
-
-    // second open should fail
-    ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok());
   } while (ChangeCompactOptions());
 }
+#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, FlushMultipleMemtable) {
+TEST_F(DBTest, SparseMerge) {
   do {
     Options options = CurrentOptions();
-    WriteOptions writeOpt = WriteOptions();
-    writeOpt.disableWAL = true;
-    options.max_write_buffer_number = 4;
-    options.min_write_buffer_number_to_merge = 3;
-    options.max_write_buffer_number_to_maintain = -1;
+    options.compression = kNoCompression;
     CreateAndReopenWithCF({"pikachu"}, options);
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
+
+    FillLevels("A", "Z", 1);
+
+    // Suppose there is:
+    //    small amount of data with prefix A
+    //    large amount of data with prefix B
+    //    small amount of data with prefix C
+    // and that recent updates have made small changes to all three prefixes.
+    // Check that we do not do a compaction that merges all of B in one shot.
+    const std::string value(1000, 'x');
+    Put(1, "A", "va");
+    // Write approximately 100MB of "B" values
+    for (int i = 0; i < 100000; i++) {
+      char key[100];
+      snprintf(key, sizeof(key), "B%010d", i);
+      Put(1, key, value);
+    }
+    Put(1, "C", "vc");
     ASSERT_OK(Flush(1));
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
+    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
 
-    ASSERT_EQ("v1", Get(1, "foo"));
-    ASSERT_EQ("v1", Get(1, "bar"));
+    // Make sparse update
+    Put(1, "A", "va2");
+    Put(1, "B100", "bvalue2");
+    Put(1, "C", "vc2");
     ASSERT_OK(Flush(1));
+
+    // Compactions should not cause us to create a situation where
+    // a file overlaps too much data at the next level.
+    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
+              20 * 1048576);
+    dbfull()->TEST_CompactRange(0, nullptr, nullptr);
+    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
+              20 * 1048576);
+    dbfull()->TEST_CompactRange(1, nullptr, nullptr);
+    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
+              20 * 1048576);
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, FlushEmptyColumnFamily) {
-  // Block flush thread and disable compaction thread
-  env_->SetBackgroundThreads(1, Env::HIGH);
-  env_->SetBackgroundThreads(1, Env::LOW);
-  test::SleepingBackgroundTask sleeping_task_low;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  test::SleepingBackgroundTask sleeping_task_high;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
-                 &sleeping_task_high, Env::Priority::HIGH);
+#ifndef ROCKSDB_LITE
+static bool Between(uint64_t val, uint64_t low, uint64_t high) {
+  bool result = (val >= low) && (val <= high);
+  if (!result) {
+    fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
+            (unsigned long long)(val), (unsigned long long)(low),
+            (unsigned long long)(high));
+  }
+  return result;
+}
 
+TEST_F(DBTest, ApproximateSizesMemTable) {
   Options options = CurrentOptions();
-  // disable compaction
-  options.disable_auto_compactions = true;
-  WriteOptions writeOpt = WriteOptions();
-  writeOpt.disableWAL = true;
-  options.max_write_buffer_number = 2;
-  options.min_write_buffer_number_to_merge = 1;
-  options.max_write_buffer_number_to_maintain = 1;
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  // Compaction can still go through even if no thread can flush the
-  // mem table.
-  ASSERT_OK(Flush(0));
-  ASSERT_OK(Flush(1));
+  options.write_buffer_size = 100000000;  // Large write buffer
+  options.compression = kNoCompression;
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
 
-  // Insert can go through
-  ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1"));
-  ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
+  const int N = 128;
+  Random rnd(301);
+  for (int i = 0; i < N; i++) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
+  }
 
-  ASSERT_EQ("v1", Get(0, "foo"));
-  ASSERT_EQ("v1", Get(1, "bar"));
+  uint64_t size;
+  std::string start = Key(50);
+  std::string end = Key(60);
+  Range r(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_GT(size, 6000);
+  ASSERT_LT(size, 204800);
+  // Zero if not including mem table
+  db_->GetApproximateSizes(&r, 1, &size, false);
+  ASSERT_EQ(size, 0);
 
-  sleeping_task_high.WakeUp();
-  sleeping_task_high.WaitUntilDone();
+  start = Key(500);
+  end = Key(600);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_EQ(size, 0);
 
-  // Flush can still go through.
-  ASSERT_OK(Flush(0));
-  ASSERT_OK(Flush(1));
+  for (int i = 0; i < N; i++) {
+    ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
+  }
 
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilDone();
-}
+  start = Key(500);
+  end = Key(600);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_EQ(size, 0);
 
-TEST_F(DBTest, FLUSH) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    WriteOptions writeOpt = WriteOptions();
-    writeOpt.disableWAL = true;
-    SetPerfLevel(kEnableTime);;
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
-    // this will now also flush the last 2 writes
-    ASSERT_OK(Flush(1));
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
+  start = Key(100);
+  end = Key(1020);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_GT(size, 6000);
 
-    perf_context.Reset();
-    Get(1, "foo");
-    ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);
+  options.max_write_buffer_number = 8;
+  options.min_write_buffer_number_to_merge = 5;
+  options.write_buffer_size = 1024 * N;  // Not very large
+  DestroyAndReopen(options);
 
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_EQ("v1", Get(1, "foo"));
-    ASSERT_EQ("v1", Get(1, "bar"));
+  int keys[N * 3];
+  for (int i = 0; i < N; i++) {
+    keys[i * 3] = i * 5;
+    keys[i * 3 + 1] = i * 5 + 1;
+    keys[i * 3 + 2] = i * 5 + 2;
+  }
+  std::random_shuffle(std::begin(keys), std::end(keys));
 
-    writeOpt.disableWAL = true;
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2"));
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2"));
-    ASSERT_OK(Flush(1));
+  for (int i = 0; i < N * 3; i++) {
+    ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
+  }
 
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_EQ("v2", Get(1, "bar"));
-    perf_context.Reset();
-    ASSERT_EQ("v2", Get(1, "foo"));
-    ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);
+  start = Key(100);
+  end = Key(300);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_EQ(size, 0);
 
-    writeOpt.disableWAL = false;
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3"));
-    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3"));
-    ASSERT_OK(Flush(1));
+  start = Key(1050);
+  end = Key(1080);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_GT(size, 6000);
 
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    // 'foo' should be there because its put
-    // has WAL enabled.
-    ASSERT_EQ("v3", Get(1, "foo"));
-    ASSERT_EQ("v3", Get(1, "bar"));
+  start = Key(2100);
+  end = Key(2300);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size, true);
+  ASSERT_EQ(size, 0);
 
-    SetPerfLevel(kDisable);
-  } while (ChangeCompactOptions());
+  start = Key(1050);
+  end = Key(1080);
+  r = Range(start, end);
+  uint64_t size_with_mt, size_without_mt;
+  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
+  ASSERT_GT(size_with_mt, 6000);
+  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
+  ASSERT_EQ(size_without_mt, 0);
+
+  Flush();
+
+  for (int i = 0; i < N; i++) {
+    ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
+  }
+
+  start = Key(1050);
+  end = Key(1080);
+  r = Range(start, end);
+  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
+  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
+  ASSERT_GT(size_with_mt, size_without_mt);
+  ASSERT_GT(size_without_mt, 6000);
 }
 
-TEST_F(DBTest, RecoveryWithEmptyLog) {
+TEST_F(DBTest, ApproximateSizes) {
   do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "foo", "v1"));
-    ASSERT_OK(Put(1, "foo", "v2"));
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "foo", "v3"));
-    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
-    ASSERT_EQ("v3", Get(1, "foo"));
-  } while (ChangeOptions());
-}
+    Options options = CurrentOptions();
+    options.write_buffer_size = 100000000;  // Large write buffer
+    options.compression = kNoCompression;
+    options.create_if_missing = true;
+    DestroyAndReopen(options);
+    CreateAndReopenWithCF({"pikachu"}, options);
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, FlushSchedule) {
-  Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.level0_stop_writes_trigger = 1 << 10;
-  options.level0_slowdown_writes_trigger = 1 << 10;
-  options.min_write_buffer_number_to_merge = 1;
-  options.max_write_buffer_number_to_maintain = 1;
-  options.max_write_buffer_number = 2;
-  options.write_buffer_size = 120 * 1024;
-  CreateAndReopenWithCF({"pikachu"}, options);
-  std::vector<std::thread> threads;
+    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
+    ReopenWithColumnFamilies({"default", "pikachu"}, options);
+    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
 
-  std::atomic<int> thread_num(0);
-  // each column family will have 5 thread, each thread generating 2 memtables.
-  // each column family should end up with 10 table files
-  std::function<void()> fill_memtable_func = [&]() {
-    int a = thread_num.fetch_add(1);
-    Random rnd(a);
-    WriteOptions wo;
-    // this should fill up 2 memtables
-    for (int k = 0; k < 5000; ++k) {
-      ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
+    // Write 8MB (80 values, each 100K)
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    const int N = 80;
+    static const int S1 = 100000;
+    static const int S2 = 105000;  // Allow some expansion from metadata
+    Random rnd(301);
+    for (int i = 0; i < N; i++) {
+      ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
     }
-  };
 
-  for (int i = 0; i < 10; ++i) {
-    threads.emplace_back(fill_memtable_func);
-  }
+    // 0 because GetApproximateSizes() does not account for memtable space
+    ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
 
-  for (auto& t : threads) {
-    t.join();
-  }
+    // Check sizes across recovery by reopening a few times
+    for (int run = 0; run < 3; run++) {
+      ReopenWithColumnFamilies({"default", "pikachu"}, options);
 
-  auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
-  auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
-  ASSERT_LE(default_tables, static_cast<uint64_t>(10));
-  ASSERT_GT(default_tables, static_cast<uint64_t>(0));
-  ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
-  ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
+      for (int compact_start = 0; compact_start < N; compact_start += 10) {
+        for (int i = 0; i < N; i += 10) {
+          ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
+          ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
+                              S2 * (i + 1)));
+          ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
+        }
+        ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
+        ASSERT_TRUE(
+            Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
+
+        std::string cstart_str = Key(compact_start);
+        std::string cend_str = Key(compact_start + 9);
+        Slice cstart = cstart_str;
+        Slice cend = cend_str;
+        dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
+      }
+
+      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+      ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
+    }
+    // ApproximateOffsetOf() is not yet implemented in plain table format.
+  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
+                         kSkipPlainTable | kSkipHashIndex));
 }
-#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, ManifestRollOver) {
+TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
   do {
-    Options options;
-    options.max_manifest_file_size = 10 ;  // 10 bytes
-    options = CurrentOptions(options);
+    Options options = CurrentOptions();
+    options.compression = kNoCompression;
     CreateAndReopenWithCF({"pikachu"}, options);
-    {
-      ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1')));
-      ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2')));
-      ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3')));
-      uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo();
-      ASSERT_OK(Flush(1));  // This should trigger LogAndApply.
-      uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo();
-      ASSERT_GT(manifest_after_flush, manifest_before_flush);
+
+    Random rnd(301);
+    std::string big1 = RandomString(&rnd, 100000);
+    ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
+    ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
+    ASSERT_OK(Put(1, Key(2), big1));
+    ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
+    ASSERT_OK(Put(1, Key(4), big1));
+    ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
+    ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
+    ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
+
+    // Check sizes across recovery by reopening a few times
+    for (int run = 0; run < 3; run++) {
       ReopenWithColumnFamilies({"default", "pikachu"}, options);
-      ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush);
-      // check if a new manifest file got inserted or not.
-      ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1"));
-      ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2"));
-      ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3"));
-    }
-  } while (ChangeCompactOptions());
-}
 
-TEST_F(DBTest, IdentityAcrossRestarts) {
-  do {
-    std::string id1;
-    ASSERT_OK(db_->GetDbIdentity(id1));
+      ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
+      ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
+      ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
+      ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
+      ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
+      ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
+      ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
+      ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
+      ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
 
-    Options options = CurrentOptions();
-    Reopen(options);
-    std::string id2;
-    ASSERT_OK(db_->GetDbIdentity(id2));
-    // id1 should match id2 because identity was not regenerated
-    ASSERT_EQ(id1.compare(id2), 0);
+      ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
 
-    std::string idfilename = IdentityFileName(dbname_);
-    ASSERT_OK(env_->DeleteFile(idfilename));
-    Reopen(options);
-    std::string id3;
-    ASSERT_OK(db_->GetDbIdentity(id3));
-    // id1 should NOT match id3 because identity was regenerated
-    ASSERT_NE(id1.compare(id3), 0);
-  } while (ChangeCompactOptions());
+      dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
+    }
+    // ApproximateOffsetOf() is not yet implemented in plain table format.
+  } while (ChangeOptions(kSkipPlainTable));
 }
+#endif  // ROCKSDB_LITE
 
 #ifndef ROCKSDB_LITE
-TEST_F(DBTest, RecoverWithLargeLog) {
+TEST_F(DBTest, Snapshot) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
   do {
-    {
-      Options options = CurrentOptions();
-      CreateAndReopenWithCF({"pikachu"}, options);
-      ASSERT_OK(Put(1, "big1", std::string(200000, '1')));
-      ASSERT_OK(Put(1, "big2", std::string(200000, '2')));
-      ASSERT_OK(Put(1, "small3", std::string(10, '3')));
-      ASSERT_OK(Put(1, "small4", std::string(10, '4')));
-      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-    }
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
+    Put(0, "foo", "0v1");
+    Put(1, "foo", "1v1");
 
-    // Make sure that if we re-open with a small write buffer size that
-    // we flush table files in the middle of a large log file.
-    Options options;
-    options.write_buffer_size = 100000;
-    options = CurrentOptions(options);
-    ReopenWithColumnFamilies({"default", "pikachu"}, options);
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3);
-    ASSERT_EQ(std::string(200000, '1'), Get(1, "big1"));
-    ASSERT_EQ(std::string(200000, '2'), Get(1, "big2"));
-    ASSERT_EQ(std::string(10, '3'), Get(1, "small3"));
-    ASSERT_EQ(std::string(10, '4'), Get(1, "small4"));
-    ASSERT_GT(NumTableFilesAtLevel(0, 1), 1);
-  } while (ChangeCompactOptions());
-}
-#endif  // ROCKSDB_LITE
+    const Snapshot* s1 = db_->GetSnapshot();
+    ASSERT_EQ(1U, GetNumSnapshots());
+    uint64_t time_snap1 = GetTimeOldestSnapshots();
+    ASSERT_GT(time_snap1, 0U);
+    Put(0, "foo", "0v2");
+    Put(1, "foo", "1v2");
 
-namespace {
-class KeepFilter : public CompactionFilter {
- public:
-  virtual bool Filter(int level, const Slice& key, const Slice& value,
-                      std::string* new_value, bool* value_changed) const
-      override {
-    return false;
-  }
+    env_->addon_time_.fetch_add(1);
 
-  virtual const char* Name() const override { return "KeepFilter"; }
-};
+    const Snapshot* s2 = db_->GetSnapshot();
+    ASSERT_EQ(2U, GetNumSnapshots());
+    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
+    Put(0, "foo", "0v3");
+    Put(1, "foo", "1v3");
 
-class KeepFilterFactory : public CompactionFilterFactory {
- public:
-  explicit KeepFilterFactory(bool check_context = false)
-      : check_context_(check_context) {}
+    {
+      ManagedSnapshot s3(db_);
+      ASSERT_EQ(3U, GetNumSnapshots());
+      ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
 
-  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
-      const CompactionFilter::Context& context) override {
-    if (check_context_) {
-      EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
-      EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
+      Put(0, "foo", "0v4");
+      Put(1, "foo", "1v4");
+      ASSERT_EQ("0v1", Get(0, "foo", s1));
+      ASSERT_EQ("1v1", Get(1, "foo", s1));
+      ASSERT_EQ("0v2", Get(0, "foo", s2));
+      ASSERT_EQ("1v2", Get(1, "foo", s2));
+      ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
+      ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
+      ASSERT_EQ("0v4", Get(0, "foo"));
+      ASSERT_EQ("1v4", Get(1, "foo"));
     }
-    return std::unique_ptr<CompactionFilter>(new KeepFilter());
-  }
-
-  virtual const char* Name() const override { return "KeepFilterFactory"; }
-  bool check_context_;
-  std::atomic_bool expect_full_compaction_;
-  std::atomic_bool expect_manual_compaction_;
-};
 
-class DelayFilter : public CompactionFilter {
- public:
-  explicit DelayFilter(DBTestBase* d) : db_test(d) {}
-  virtual bool Filter(int level, const Slice& key, const Slice& value,
-                      std::string* new_value,
-                      bool* value_changed) const override {
-    db_test->env_->addon_time_.fetch_add(1000);
-    return true;
-  }
+    ASSERT_EQ(2U, GetNumSnapshots());
+    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
+    ASSERT_EQ("0v1", Get(0, "foo", s1));
+    ASSERT_EQ("1v1", Get(1, "foo", s1));
+    ASSERT_EQ("0v2", Get(0, "foo", s2));
+    ASSERT_EQ("1v2", Get(1, "foo", s2));
+    ASSERT_EQ("0v4", Get(0, "foo"));
+    ASSERT_EQ("1v4", Get(1, "foo"));
 
-  virtual const char* Name() const override { return "DelayFilter"; }
+    db_->ReleaseSnapshot(s1);
+    ASSERT_EQ("0v2", Get(0, "foo", s2));
+    ASSERT_EQ("1v2", Get(1, "foo", s2));
+    ASSERT_EQ("0v4", Get(0, "foo"));
+    ASSERT_EQ("1v4", Get(1, "foo"));
+    ASSERT_EQ(1U, GetNumSnapshots());
+    ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
 
- private:
-  DBTestBase* db_test;
-};
+    db_->ReleaseSnapshot(s2);
+    ASSERT_EQ(0U, GetNumSnapshots());
+    ASSERT_EQ("0v4", Get(0, "foo"));
+    ASSERT_EQ("1v4", Get(1, "foo"));
+  } while (ChangeOptions(kSkipHashCuckoo));
+}
 
-class DelayFilterFactory : public CompactionFilterFactory {
- public:
-  explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
-  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
-      const CompactionFilter::Context& context) override {
-    return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
-  }
+TEST_F(DBTest, HiddenValuesAreRemoved) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  do {
+    Options options = CurrentOptions(options_override);
+    CreateAndReopenWithCF({"pikachu"}, options);
+    Random rnd(301);
+    FillLevels("a", "z", 1);
 
-  virtual const char* Name() const override { return "DelayFilterFactory"; }
+    std::string big = RandomString(&rnd, 50000);
+    Put(1, "foo", big);
+    Put(1, "pastfoo", "v");
+    const Snapshot* snapshot = db_->GetSnapshot();
+    Put(1, "foo", "tiny");
+    Put(1, "pastfoo2", "v2");  // Advance sequence number one more
 
- private:
-  DBTestBase* db_test;
-};
-}  // namespace
+    ASSERT_OK(Flush(1));
+    ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, CompressedCache) {
-  if (!Snappy_Supported()) {
-    return;
-  }
-  int num_iter = 80;
+    ASSERT_EQ(big, Get(1, "foo", snapshot));
+    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
+    db_->ReleaseSnapshot(snapshot);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
+    Slice x("x");
+    dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
+    dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
 
-  // Run this test three iterations.
-  // Iteration 1: only a uncompressed block cache
-  // Iteration 2: only a compressed block cache
-  // Iteration 3: both block cache and compressed cache
-  // Iteration 4: both block cache and compressed cache, but DB is not
-  // compressed
-  for (int iter = 0; iter < 4; iter++) {
-    Options options = CurrentOptions();
-    options.write_buffer_size = 64*1024;        // small write buffer
-    options.statistics = rocksdb::CreateDBStatistics();
+    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
+    // ApproximateOffsetOf() is not yet implemented in plain table format,
+    // which is used by Size().
+    // skip HashCuckooRep as it does not support snapshot
+  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
+                         kSkipPlainTable | kSkipHashCuckoo));
+}
+#endif  // ROCKSDB_LITE
 
-    BlockBasedTableOptions table_options;
-    switch (iter) {
-      case 0:
-        // only uncompressed block cache
-        table_options.block_cache = NewLRUCache(8*1024);
-        table_options.block_cache_compressed = nullptr;
-        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-        break;
-      case 1:
-        // no block cache, only compressed cache
-        table_options.no_block_cache = true;
-        table_options.block_cache = nullptr;
-        table_options.block_cache_compressed = NewLRUCache(8*1024);
-        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-        break;
-      case 2:
-        // both compressed and uncompressed block cache
-        table_options.block_cache = NewLRUCache(1024);
-        table_options.block_cache_compressed = NewLRUCache(8*1024);
-        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-        break;
-      case 3:
-        // both block cache and compressed cache, but DB is not compressed
-        // also, make block cache sizes bigger, to trigger block cache hits
-        table_options.block_cache = NewLRUCache(1024 * 1024);
-        table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
-        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-        options.compression = kNoCompression;
-        break;
-      default:
-        ASSERT_TRUE(false);
-    }
+TEST_F(DBTest, CompactBetweenSnapshots) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  do {
+    Options options = CurrentOptions(options_override);
+    options.disable_auto_compactions = true;
     CreateAndReopenWithCF({"pikachu"}, options);
-    // default column family doesn't have block cache
-    Options no_block_cache_opts;
-    no_block_cache_opts.statistics = options.statistics;
-    no_block_cache_opts = CurrentOptions(no_block_cache_opts);
-    BlockBasedTableOptions table_options_no_bc;
-    table_options_no_bc.no_block_cache = true;
-    no_block_cache_opts.table_factory.reset(
-        NewBlockBasedTableFactory(table_options_no_bc));
-    ReopenWithColumnFamilies({"default", "pikachu"},
-        std::vector<Options>({no_block_cache_opts, options}));
-
     Random rnd(301);
+    FillLevels("a", "z", 1);
 
-    // Write 8MB (80 values, each 100K)
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-    std::vector<std::string> values;
-    std::string str;
-    for (int i = 0; i < num_iter; i++) {
-      if (i % 4 == 0) {        // high compression ratio
-        str = RandomString(&rnd, 1000);
-      }
-      values.push_back(str);
-      ASSERT_OK(Put(1, Key(i), values[i]));
-    }
+    Put(1, "foo", "first");
+    const Snapshot* snapshot1 = db_->GetSnapshot();
+    Put(1, "foo", "second");
+    Put(1, "foo", "third");
+    Put(1, "foo", "fourth");
+    const Snapshot* snapshot2 = db_->GetSnapshot();
+    Put(1, "foo", "fifth");
+    Put(1, "foo", "sixth");
+
+    // All entries (including duplicates) exist
+    // before any compaction or flush is triggered.
+    ASSERT_EQ(AllEntriesFor("foo", 1),
+              "[ sixth, fifth, fourth, third, second, first ]");
+    ASSERT_EQ("sixth", Get(1, "foo"));
+    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
+    ASSERT_EQ("first", Get(1, "foo", snapshot1));
 
-    // flush all data from memtable so that reads are from block cache
+    // After a flush, "second", "third" and "fifth" should
+    // be removed
     ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]");
 
-    for (int i = 0; i < num_iter; i++) {
-      ASSERT_EQ(Get(1, Key(i)), values[i]);
-    }
+    // after we release the snapshot1, only two values left
+    db_->ReleaseSnapshot(snapshot1);
+    FillLevels("a", "z", 1);
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
 
-    // check that we triggered the appropriate code paths in the cache
-    switch (iter) {
-      case 0:
-        // only uncompressed block cache
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
-        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
-        break;
-      case 1:
-        // no block cache, only compressed cache
-        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
-        break;
-      case 2:
-        // both compressed and uncompressed block cache
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
-        break;
-      case 3:
-        // both compressed and uncompressed block cache
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
-        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
-        // compressed doesn't have any hits since blocks are not compressed on
-        // storage
-        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
-        break;
-      default:
-        ASSERT_TRUE(false);
-    }
+    // We have only one valid snapshot snapshot2. Since snapshot1 is
+    // not valid anymore, "first" should be removed by a compaction.
+    ASSERT_EQ("sixth", Get(1, "foo"));
+    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]");
 
-    options.create_if_missing = true;
-    DestroyAndReopen(options);
-  }
+    // after we release the snapshot2, only one value should be left
+    db_->ReleaseSnapshot(snapshot2);
+    FillLevels("a", "z", 1);
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ("sixth", Get(1, "foo"));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]");
+    // skip HashCuckooRep as it does not support snapshot
+  } while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction));
 }
 
-static std::string CompressibleString(Random* rnd, int len) {
-  std::string r;
-  test::CompressibleString(rnd, 0.8, len, &r);
-  return r;
-}
-#endif  // ROCKSDB_LITE
+TEST_F(DBTest, UnremovableSingleDelete) {
+  // If we compact:
+  //
+  // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
+  //
+  // We do not want to end up with:
+  //
+  // Put(A, v1) Snapshot Put(A, v2)
+  //
+  // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
+  // but not Put(A, v1), so Get(A) would return v1.
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  do {
+    Options options = CurrentOptions(options_override);
+    options.disable_auto_compactions = true;
+    CreateAndReopenWithCF({"pikachu"}, options);
 
-TEST_F(DBTest, FailMoreDbPaths) {
-  Options options = CurrentOptions();
-  options.db_paths.emplace_back(dbname_, 10000000);
-  options.db_paths.emplace_back(dbname_ + "_2", 1000000);
-  options.db_paths.emplace_back(dbname_ + "_3", 1000000);
-  options.db_paths.emplace_back(dbname_ + "_4", 1000000);
-  options.db_paths.emplace_back(dbname_ + "_5", 1000000);
-  ASSERT_TRUE(TryReopen(options).IsNotSupported());
-}
+    Put(1, "foo", "first");
+    const Snapshot* snapshot = db_->GetSnapshot();
+    SingleDelete(1, "foo");
+    Put(1, "foo", "second");
+    ASSERT_OK(Flush(1));
 
-void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
-  uint64_t cf_size = 0;
-  uint64_t cf_csize = 0;
-  size_t file_count = 0;
-  for (auto level_meta : cf_meta.levels) {
-    uint64_t level_size = 0;
-    uint64_t level_csize = 0;
-    file_count += level_meta.files.size();
-    for (auto file_meta : level_meta.files) {
-      level_size += file_meta.size;
-    }
-    ASSERT_EQ(level_meta.size, level_size);
-    cf_size += level_size;
-    cf_csize += level_csize;
-  }
-  ASSERT_EQ(cf_meta.file_count, file_count);
-  ASSERT_EQ(cf_meta.size, cf_size);
-}
-
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, ColumnFamilyMetaDataTest) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  DestroyAndReopen(options);
+    ASSERT_EQ("first", Get(1, "foo", snapshot));
+    ASSERT_EQ("second", Get(1, "foo"));
 
-  Random rnd(301);
-  int key_index = 0;
-  ColumnFamilyMetaData cf_meta;
-  for (int i = 0; i < 100; ++i) {
-    GenerateNewFile(&rnd, &key_index);
-    db_->GetColumnFamilyMetaData(&cf_meta);
-    CheckColumnFamilyMeta(cf_meta);
-  }
-}
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
 
-namespace {
-void MinLevelHelper(DBTest* self, Options& options) {
-  Random rnd(301);
+    SingleDelete(1, "foo");
 
-  for (int num = 0;
-    num < options.level0_file_num_compaction_trigger - 1;
-    num++)
-  {
-    std::vector<std::string> values;
-    // Write 120KB (12 values, each 10K)
-    for (int i = 0; i < 12; i++) {
-      values.push_back(DBTestBase::RandomString(&rnd, 10000));
-      ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
-    }
-    self->dbfull()->TEST_WaitForFlushMemTable();
-    ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
-  }
+    ASSERT_EQ("first", Get(1, "foo", snapshot));
+    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
 
-  //generate one more file in level-0, and should trigger level-0 compaction
-  std::vector<std::string> values;
-  for (int i = 0; i < 12; i++) {
-    values.push_back(DBTestBase::RandomString(&rnd, 10000));
-    ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
-  }
-  self->dbfull()->TEST_WaitForCompact();
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
 
-  ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
-  ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
+    ASSERT_EQ("first", Get(1, "foo", snapshot));
+    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
+    db_->ReleaseSnapshot(snapshot);
+    // Skip HashCuckooRep as it does not support single delete.  FIFO and
+    // universal compaction do not apply to the test case.  Skip MergePut
+    // because single delete does not get removed when it encounters a merge.
+  } while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
+                         kSkipUniversalCompaction | kSkipMergePut));
 }
 
-// returns false if the calling-Test should be skipped
-bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
-                        int lev, int strategy) {
-  fprintf(stderr, "Test with compression options : window_bits = %d, level =  %d, strategy = %d}\n", wbits, lev, strategy);
-  options.write_buffer_size = 100<<10; //100KB
-  options.arena_block_size = 4096;
-  options.num_levels = 3;
-  options.level0_file_num_compaction_trigger = 3;
-  options.create_if_missing = true;
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, DeletionMarkers1) {
+  Options options = CurrentOptions();
+  options.max_background_flushes = 0;
+  CreateAndReopenWithCF({"pikachu"}, options);
+  Put(1, "foo", "v1");
+  ASSERT_OK(Flush(1));
+  const int last = 2;
+  MoveFilesToLevel(last, 1);
+  // foo => v1 is now in last level
+  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
 
-  if (Snappy_Supported()) {
-    type = kSnappyCompression;
-    fprintf(stderr, "using snappy\n");
-  } else if (Zlib_Supported()) {
-    type = kZlibCompression;
-    fprintf(stderr, "using zlib\n");
-  } else if (BZip2_Supported()) {
-    type = kBZip2Compression;
-    fprintf(stderr, "using bzip2\n");
-  } else if (LZ4_Supported()) {
-    type = kLZ4Compression;
-    fprintf(stderr, "using lz4\n");
-  } else {
-    fprintf(stderr, "skipping test, compression disabled\n");
-    return false;
-  }
-  options.compression_per_level.resize(options.num_levels);
+  // Place a table at level last-1 to prevent merging with preceding mutation
+  Put(1, "a", "begin");
+  Put(1, "z", "end");
+  Flush(1);
+  MoveFilesToLevel(last - 1, 1);
+  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
+  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
 
-  // do not compress L0
-  for (int i = 0; i < 1; i++) {
-    options.compression_per_level[i] = kNoCompression;
-  }
-  for (int i = 1; i < options.num_levels; i++) {
-    options.compression_per_level[i] = type;
-  }
-  return true;
+  Delete(1, "foo");
+  Put(1, "foo", "v2");
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
+  ASSERT_OK(Flush(1));  // Moves to level last-2
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
+  Slice z("z");
+  dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
+  // DEL eliminated, but v1 remains because we aren't compacting that level
+  // (DEL can be eliminated because v2 hides v1).
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
+  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
+  // Merging last-1 w/ last, so we are the base level for "foo", so
+  // DEL is removed.  (as is v1).
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
 }
-}  // namespace
 
-TEST_F(DBTest, MinLevelToCompress1) {
+TEST_F(DBTest, DeletionMarkers2) {
   Options options = CurrentOptions();
-  CompressionType type = kSnappyCompression;
-  if (!MinLevelToCompress(type, options, -14, -1, 0)) {
-    return;
-  }
-  Reopen(options);
-  MinLevelHelper(this, options);
-
-  // do not compress L0 and L1
-  for (int i = 0; i < 2; i++) {
-    options.compression_per_level[i] = kNoCompression;
-  }
-  for (int i = 2; i < options.num_levels; i++) {
-    options.compression_per_level[i] = type;
-  }
-  DestroyAndReopen(options);
-  MinLevelHelper(this, options);
-}
+  CreateAndReopenWithCF({"pikachu"}, options);
+  Put(1, "foo", "v1");
+  ASSERT_OK(Flush(1));
+  const int last = 2;
+  MoveFilesToLevel(last, 1);
+  // foo => v1 is now in last level
+  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
 
-TEST_F(DBTest, MinLevelToCompress2) {
-  Options options = CurrentOptions();
-  CompressionType type = kSnappyCompression;
-  if (!MinLevelToCompress(type, options, 15, -1, 0)) {
-    return;
-  }
-  Reopen(options);
-  MinLevelHelper(this, options);
+  // Place a table at level last-1 to prevent merging with preceding mutation
+  Put(1, "a", "begin");
+  Put(1, "z", "end");
+  Flush(1);
+  MoveFilesToLevel(last - 1, 1);
+  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
+  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
 
-  // do not compress L0 and L1
-  for (int i = 0; i < 2; i++) {
-    options.compression_per_level[i] = kNoCompression;
-  }
-  for (int i = 2; i < options.num_levels; i++) {
-    options.compression_per_level[i] = type;
-  }
-  DestroyAndReopen(options);
-  MinLevelHelper(this, options);
+  Delete(1, "foo");
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
+  ASSERT_OK(Flush(1));  // Moves to level last-2
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
+  dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
+  // DEL kept: "last" file overlaps
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
+  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
+  // Merging last-1 w/ last, so we are the base level for "foo", so
+  // DEL is removed.  (as is v1).
+  ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
 }
 
-TEST_F(DBTest, RepeatedWritesToSameKey) {
+TEST_F(DBTest, OverlapInLevel0) {
   do {
     Options options = CurrentOptions();
-    options.env = env_;
-    options.write_buffer_size = 100000;  // Small write buffer
     CreateAndReopenWithCF({"pikachu"}, options);
 
-    // We must have at most one file per level except for level-0,
-    // which may have up to kL0_StopWritesTrigger files.
-    const int kMaxFiles =
-        options.num_levels + options.level0_stop_writes_trigger;
+    // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
+    // 0.
+    ASSERT_OK(Put(1, "100", "v100"));
+    ASSERT_OK(Put(1, "999", "v999"));
+    Flush(1);
+    MoveFilesToLevel(2, 1);
+    ASSERT_OK(Delete(1, "100"));
+    ASSERT_OK(Delete(1, "999"));
+    Flush(1);
+    MoveFilesToLevel(1, 1);
+    ASSERT_EQ("0,1,1", FilesPerLevel(1));
 
-    Random rnd(301);
-    std::string value =
-        RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
-    for (int i = 0; i < 5 * kMaxFiles; i++) {
-      ASSERT_OK(Put(1, "key", value));
-      ASSERT_LE(TotalTableFiles(1), kMaxFiles);
-    }
-  } while (ChangeCompactOptions());
-}
-#endif  // ROCKSDB_LITE
+    // Make files spanning the following ranges in level-0:
+    //  files[0]  200 .. 900
+    //  files[1]  300 .. 500
+    // Note that files are sorted by smallest key.
+    ASSERT_OK(Put(1, "300", "v300"));
+    ASSERT_OK(Put(1, "500", "v500"));
+    Flush(1);
+    ASSERT_OK(Put(1, "200", "v200"));
+    ASSERT_OK(Put(1, "600", "v600"));
+    ASSERT_OK(Put(1, "900", "v900"));
+    Flush(1);
+    ASSERT_EQ("2,1,1", FilesPerLevel(1));
 
-TEST_F(DBTest, SparseMerge) {
-  do {
-    Options options = CurrentOptions();
-    options.compression = kNoCompression;
-    CreateAndReopenWithCF({"pikachu"}, options);
+    // Compact away the placeholder files we created initially
+    dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
+    dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
+    ASSERT_EQ("2", FilesPerLevel(1));
 
-    FillLevels("A", "Z", 1);
+    // Do a memtable compaction.  Before bug-fix, the compaction would
+    // not detect the overlap with level-0 files and would incorrectly place
+    // the deletion in a deeper level.
+    ASSERT_OK(Delete(1, "600"));
+    Flush(1);
+    ASSERT_EQ("3", FilesPerLevel(1));
+    ASSERT_EQ("NOT_FOUND", Get(1, "600"));
+  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
+}
+#endif  // ROCKSDB_LITE
 
-    // Suppose there is:
-    //    small amount of data with prefix A
-    //    large amount of data with prefix B
-    //    small amount of data with prefix C
-    // and that recent updates have made small changes to all three prefixes.
-    // Check that we do not do a compaction that merges all of B in one shot.
-    const std::string value(1000, 'x');
-    Put(1, "A", "va");
-    // Write approximately 100MB of "B" values
-    for (int i = 0; i < 100000; i++) {
-      char key[100];
-      snprintf(key, sizeof(key), "B%010d", i);
-      Put(1, key, value);
+TEST_F(DBTest, ComparatorCheck) {
+  class NewComparator : public Comparator {
+   public:
+    virtual const char* Name() const override {
+      return "rocksdb.NewComparator";
     }
-    Put(1, "C", "vc");
-    ASSERT_OK(Flush(1));
-    dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
-
-    // Make sparse update
-    Put(1, "A", "va2");
-    Put(1, "B100", "bvalue2");
-    Put(1, "C", "vc2");
-    ASSERT_OK(Flush(1));
-
-    // Compactions should not cause us to create a situation where
-    // a file overlaps too much data at the next level.
-    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
-              20 * 1048576);
-    dbfull()->TEST_CompactRange(0, nullptr, nullptr);
-    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
-              20 * 1048576);
-    dbfull()->TEST_CompactRange(1, nullptr, nullptr);
-    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
-              20 * 1048576);
+    virtual int Compare(const Slice& a, const Slice& b) const override {
+      return BytewiseComparator()->Compare(a, b);
+    }
+    virtual void FindShortestSeparator(std::string* s,
+                                       const Slice& l) const override {
+      BytewiseComparator()->FindShortestSeparator(s, l);
+    }
+    virtual void FindShortSuccessor(std::string* key) const override {
+      BytewiseComparator()->FindShortSuccessor(key);
+    }
+  };
+  Options new_options, options;
+  NewComparator cmp;
+  do {
+    options = CurrentOptions();
+    CreateAndReopenWithCF({"pikachu"}, options);
+    new_options = CurrentOptions();
+    new_options.comparator = &cmp;
+    // only the non-default column family has non-matching comparator
+    Status s = TryReopenWithColumnFamilies(
+        {"default", "pikachu"}, std::vector<Options>({options, new_options}));
+    ASSERT_TRUE(!s.ok());
+    ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
+        << s.ToString();
   } while (ChangeCompactOptions());
 }
 
-#ifndef ROCKSDB_LITE
-static bool Between(uint64_t val, uint64_t low, uint64_t high) {
-  bool result = (val >= low) && (val <= high);
-  if (!result) {
-    fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
-            (unsigned long long)(val),
-            (unsigned long long)(low),
-            (unsigned long long)(high));
-  }
-  return result;
+TEST_F(DBTest, CustomComparator) {
+  class NumberComparator : public Comparator {
+   public:
+    virtual const char* Name() const override {
+      return "test.NumberComparator";
+    }
+    virtual int Compare(const Slice& a, const Slice& b) const override {
+      return ToNumber(a) - ToNumber(b);
+    }
+    virtual void FindShortestSeparator(std::string* s,
+                                       const Slice& l) const override {
+      ToNumber(*s);  // Check format
+      ToNumber(l);   // Check format
+    }
+    virtual void FindShortSuccessor(std::string* key) const override {
+      ToNumber(*key);  // Check format
+    }
+
+   private:
+    static int ToNumber(const Slice& x) {
+      // Check that there are no extra characters.
+      EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
+          << EscapeString(x);
+      int val;
+      char ignored;
+      EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
+          << EscapeString(x);
+      return val;
+    }
+  };
+  Options new_options;
+  NumberComparator cmp;
+  do {
+    new_options = CurrentOptions();
+    new_options.create_if_missing = true;
+    new_options.comparator = &cmp;
+    new_options.write_buffer_size = 4096;  // Compact more often
+    new_options.arena_block_size = 4096;
+    new_options = CurrentOptions(new_options);
+    DestroyAndReopen(new_options);
+    CreateAndReopenWithCF({"pikachu"}, new_options);
+    ASSERT_OK(Put(1, "[10]", "ten"));
+    ASSERT_OK(Put(1, "[0x14]", "twenty"));
+    for (int i = 0; i < 2; i++) {
+      ASSERT_EQ("ten", Get(1, "[10]"));
+      ASSERT_EQ("ten", Get(1, "[0xa]"));
+      ASSERT_EQ("twenty", Get(1, "[20]"));
+      ASSERT_EQ("twenty", Get(1, "[0x14]"));
+      ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
+      ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
+      Compact(1, "[0]", "[9999]");
+    }
+
+    for (int run = 0; run < 2; run++) {
+      for (int i = 0; i < 1000; i++) {
+        char buf[100];
+        snprintf(buf, sizeof(buf), "[%d]", i * 10);
+        ASSERT_OK(Put(1, buf, buf));
+      }
+      Compact(1, "[0]", "[1000000]");
+    }
+  } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, ApproximateSizesMemTable) {
+TEST_F(DBTest, DBOpen_Options) {
   Options options = CurrentOptions();
-  options.write_buffer_size = 100000000;  // Large write buffer
-  options.compression = kNoCompression;
-  options.create_if_missing = true;
-  DestroyAndReopen(options);
+  std::string dbname = test::TmpDir(env_) + "/db_options_test";
+  ASSERT_OK(DestroyDB(dbname, options));
 
-  const int N = 128;
-  Random rnd(301);
-  for (int i = 0; i < N; i++) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
-  }
+  // Does not exist, and create_if_missing == false: error
+  DB* db = nullptr;
+  options.create_if_missing = false;
+  Status s = DB::Open(options, dbname, &db);
+  ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
+  ASSERT_TRUE(db == nullptr);
 
-  uint64_t size;
-  std::string start = Key(50);
-  std::string end = Key(60);
-  Range r(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_GT(size, 6000);
-  ASSERT_LT(size, 204800);
-  // Zero if not including mem table
-  db_->GetApproximateSizes(&r, 1, &size, false);
-  ASSERT_EQ(size, 0);
+  // Does not exist, and create_if_missing == true: OK
+  options.create_if_missing = true;
+  s = DB::Open(options, dbname, &db);
+  ASSERT_OK(s);
+  ASSERT_TRUE(db != nullptr);
 
-  start = Key(500);
-  end = Key(600);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_EQ(size, 0);
+  delete db;
+  db = nullptr;
 
-  for (int i = 0; i < N; i++) {
-    ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
-  }
+  // Does exist, and error_if_exists == true: error
+  options.create_if_missing = false;
+  options.error_if_exists = true;
+  s = DB::Open(options, dbname, &db);
+  ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
+  ASSERT_TRUE(db == nullptr);
 
-  start = Key(500);
-  end = Key(600);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_EQ(size, 0);
+  // Does exist, and error_if_exists == false: OK
+  options.create_if_missing = true;
+  options.error_if_exists = false;
+  s = DB::Open(options, dbname, &db);
+  ASSERT_OK(s);
+  ASSERT_TRUE(db != nullptr);
 
-  start = Key(100);
-  end = Key(1020);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_GT(size, 6000);
+  delete db;
+  db = nullptr;
+}
 
-  options.max_write_buffer_number = 8;
-  options.min_write_buffer_number_to_merge = 5;
-  options.write_buffer_size = 1024 * N;  // Not very large
+TEST_F(DBTest, DBOpen_Change_NumLevels) {
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
   DestroyAndReopen(options);
+  ASSERT_TRUE(db_ != nullptr);
+  CreateAndReopenWithCF({"pikachu"}, options);
 
-  int keys[N * 3];
-  for (int i = 0; i < N; i++) {
-    keys[i * 3] = i * 5;
-    keys[i * 3 + 1] = i * 5 + 1;
-    keys[i * 3 + 2] = i * 5 + 2;
-  }
-  std::random_shuffle(std::begin(keys), std::end(keys));
-
-  for (int i = 0; i < N * 3; i++) {
-    ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
-  }
-
-  start = Key(100);
-  end = Key(300);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_EQ(size, 0);
+  ASSERT_OK(Put(1, "a", "123"));
+  ASSERT_OK(Put(1, "b", "234"));
+  Flush(1);
+  MoveFilesToLevel(3, 1);
+  Close();
 
-  start = Key(1050);
-  end = Key(1080);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_GT(size, 6000);
+  options.create_if_missing = false;
+  options.num_levels = 2;
+  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
+  ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
+  ASSERT_TRUE(db_ == nullptr);
+}
 
-  start = Key(2100);
-  end = Key(2300);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size, true);
-  ASSERT_EQ(size, 0);
+TEST_F(DBTest, DestroyDBMetaDatabase) {
+  std::string dbname = test::TmpDir(env_) + "/db_meta";
+  ASSERT_OK(env_->CreateDirIfMissing(dbname));
+  std::string metadbname = MetaDatabaseName(dbname, 0);
+  ASSERT_OK(env_->CreateDirIfMissing(metadbname));
+  std::string metametadbname = MetaDatabaseName(metadbname, 0);
+  ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
 
-  start = Key(1050);
-  end = Key(1080);
-  r = Range(start, end);
-  uint64_t size_with_mt, size_without_mt;
-  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
-  ASSERT_GT(size_with_mt, 6000);
-  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
-  ASSERT_EQ(size_without_mt, 0);
+  // Destroy previous versions if they exist. Using the long way.
+  Options options = CurrentOptions();
+  ASSERT_OK(DestroyDB(metametadbname, options));
+  ASSERT_OK(DestroyDB(metadbname, options));
+  ASSERT_OK(DestroyDB(dbname, options));
 
-  Flush();
+  // Setup databases
+  DB* db = nullptr;
+  ASSERT_OK(DB::Open(options, dbname, &db));
+  delete db;
+  db = nullptr;
+  ASSERT_OK(DB::Open(options, metadbname, &db));
+  delete db;
+  db = nullptr;
+  ASSERT_OK(DB::Open(options, metametadbname, &db));
+  delete db;
+  db = nullptr;
 
-  for (int i = 0; i < N; i++) {
-    ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
-  }
+  // Delete databases
+  ASSERT_OK(DestroyDB(dbname, options));
 
-  start = Key(1050);
-  end = Key(1080);
-  r = Range(start, end);
-  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
-  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
-  ASSERT_GT(size_with_mt, size_without_mt);
-  ASSERT_GT(size_without_mt, 6000);
+  // Check if deletion worked.
+  options.create_if_missing = false;
+  ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
+  ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
+  ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
 }
 
-TEST_F(DBTest, ApproximateSizes) {
+#ifndef ROCKSDB_LITE
+// Check that number of files does not grow when writes are dropped
+TEST_F(DBTest, DropWrites) {
   do {
     Options options = CurrentOptions();
-    options.write_buffer_size = 100000000;        // Large write buffer
-    options.compression = kNoCompression;
-    options.create_if_missing = true;
-    DestroyAndReopen(options);
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
-    ReopenWithColumnFamilies({"default", "pikachu"}, options);
-    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
+    options.env = env_;
+    options.paranoid_checks = false;
+    Reopen(options);
 
-    // Write 8MB (80 values, each 100K)
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-    const int N = 80;
-    static const int S1 = 100000;
-    static const int S2 = 105000;  // Allow some expansion from metadata
-    Random rnd(301);
-    for (int i = 0; i < N; i++) {
-      ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
+    ASSERT_OK(Put("foo", "v1"));
+    ASSERT_EQ("v1", Get("foo"));
+    Compact("a", "z");
+    const size_t num_files = CountFiles();
+    // Force out-of-space errors
+    env_->drop_writes_.store(true, std::memory_order_release);
+    env_->sleep_counter_.Reset();
+    env_->no_sleep_ = true;
+    for (int i = 0; i < 5; i++) {
+      if (option_config_ != kUniversalCompactionMultiLevel &&
+          option_config_ != kUniversalSubcompactions) {
+        for (int level = 0; level < dbfull()->NumberLevels(); level++) {
+          if (level > 0 && level == dbfull()->NumberLevels() - 1) {
+            break;
+          }
+          dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
+                                      true /* disallow trivial move */);
+        }
+      } else {
+        dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+      }
     }
 
-    // 0 because GetApproximateSizes() does not account for memtable space
-    ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
-
-    // Check sizes across recovery by reopening a few times
-    for (int run = 0; run < 3; run++) {
-      ReopenWithColumnFamilies({"default", "pikachu"}, options);
-
-      for (int compact_start = 0; compact_start < N; compact_start += 10) {
-        for (int i = 0; i < N; i += 10) {
-          ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
-          ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
-                              S2 * (i + 1)));
-          ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
-        }
-        ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
-        ASSERT_TRUE(
-            Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
+    std::string property_value;
+    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
+    ASSERT_EQ("5", property_value);
 
-        std::string cstart_str = Key(compact_start);
-        std::string cend_str = Key(compact_start + 9);
-        Slice cstart = cstart_str;
-        Slice cend = cend_str;
-        dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
-      }
+    env_->drop_writes_.store(false, std::memory_order_release);
+    ASSERT_LT(CountFiles(), num_files + 3);
 
-      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-      ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
-    }
-    // ApproximateOffsetOf() is not yet implemented in plain table format.
-  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
-                         kSkipPlainTable | kSkipHashIndex));
+    // Check that compaction attempts slept after errors
+    // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
+    // versions
+    ASSERT_GE(env_->sleep_counter_.Read(), 4);
+  } while (ChangeCompactOptions());
 }
 
-TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
+// Check background error counter bumped on flush failures.
+TEST_F(DBTest, DropWritesFlush) {
   do {
     Options options = CurrentOptions();
-    options.compression = kNoCompression;
-    CreateAndReopenWithCF({"pikachu"}, options);
+    options.env = env_;
+    options.max_background_flushes = 1;
+    Reopen(options);
 
-    Random rnd(301);
-    std::string big1 = RandomString(&rnd, 100000);
-    ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
-    ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
-    ASSERT_OK(Put(1, Key(2), big1));
-    ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
-    ASSERT_OK(Put(1, Key(4), big1));
-    ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
-    ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
-    ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
+    ASSERT_OK(Put("foo", "v1"));
+    // Force out-of-space errors
+    env_->drop_writes_.store(true, std::memory_order_release);
 
-    // Check sizes across recovery by reopening a few times
-    for (int run = 0; run < 3; run++) {
-      ReopenWithColumnFamilies({"default", "pikachu"}, options);
+    std::string property_value;
+    // Background error count is 0 now.
+    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
+    ASSERT_EQ("0", property_value);
 
-      ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
-      ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
-      ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
-      ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
-      ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
-      ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
-      ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
-      ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
-      ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
+    dbfull()->TEST_FlushMemTable(true);
 
-      ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
+    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
+    ASSERT_EQ("1", property_value);
 
-      dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
-    }
-    // ApproximateOffsetOf() is not yet implemented in plain table format.
-  } while (ChangeOptions(kSkipPlainTable));
+    env_->drop_writes_.store(false, std::memory_order_release);
+  } while (ChangeCompactOptions());
 }
 #endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, IteratorPinsRef) {
+// Check that CompactRange() returns failure if there is not enough space left
+// on device
+TEST_F(DBTest, NoSpaceCompactRange) {
   do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    Put(1, "foo", "hello");
-
-    // Get iterator that will yield the current contents of the DB.
-    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
+    Options options = CurrentOptions();
+    options.env = env_;
+    options.disable_auto_compactions = true;
+    Reopen(options);
 
-    // Write to force compactions
-    Put(1, "foo", "newvalue1");
-    for (int i = 0; i < 100; i++) {
-      // 100K values
-      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
+    // generate 5 tables
+    for (int i = 0; i < 5; ++i) {
+      ASSERT_OK(Put(Key(i), Key(i) + "v"));
+      ASSERT_OK(Flush());
     }
-    Put(1, "foo", "newvalue2");
 
-    iter->SeekToFirst();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ("foo", iter->key().ToString());
-    ASSERT_EQ("hello", iter->value().ToString());
-    iter->Next();
-    ASSERT_TRUE(!iter->Valid());
-    delete iter;
+    // Force out-of-space errors
+    env_->no_space_.store(true, std::memory_order_release);
+
+    Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
+                                           true /* disallow trivial move */);
+    ASSERT_TRUE(s.IsIOError());
+
+    env_->no_space_.store(false, std::memory_order_release);
   } while (ChangeCompactOptions());
 }
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, Snapshot) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
+TEST_F(DBTest, NonWritableFileSystem) {
   do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
-    Put(0, "foo", "0v1");
-    Put(1, "foo", "1v1");
+    Options options = CurrentOptions();
+    options.write_buffer_size = 4096;
+    options.arena_block_size = 4096;
+    options.env = env_;
+    Reopen(options);
+    ASSERT_OK(Put("foo", "v1"));
+    env_->non_writeable_rate_.store(100);
+    std::string big(100000, 'x');
+    int errors = 0;
+    for (int i = 0; i < 20; i++) {
+      if (!Put("foo", big).ok()) {
+        errors++;
+        env_->SleepForMicroseconds(100000);
+      }
+    }
+    ASSERT_GT(errors, 0);
+    env_->non_writeable_rate_.store(0);
+  } while (ChangeCompactOptions());
+}
 
-    const Snapshot* s1 = db_->GetSnapshot();
-    ASSERT_EQ(1U, GetNumSnapshots());
-    uint64_t time_snap1 = GetTimeOldestSnapshots();
-    ASSERT_GT(time_snap1, 0U);
-    Put(0, "foo", "0v2");
-    Put(1, "foo", "1v2");
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, ManifestWriteError) {
+  // Test for the following problem:
+  // (a) Compaction produces file F
+  // (b) Log record containing F is written to MANIFEST file, but Sync() fails
+  // (c) GC deletes F
+  // (d) After reopening DB, reads fail since deleted F is named in log record
 
-    env_->addon_time_.fetch_add(1);
+  // We iterate twice.  In the second iteration, everything is the
+  // same except the log record never makes it to the MANIFEST file.
+  for (int iter = 0; iter < 2; iter++) {
+    std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
+                                                : &env_->manifest_write_error_;
 
-    const Snapshot* s2 = db_->GetSnapshot();
-    ASSERT_EQ(2U, GetNumSnapshots());
-    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
-    Put(0, "foo", "0v3");
-    Put(1, "foo", "1v3");
+    // Insert foo=>bar mapping
+    Options options = CurrentOptions();
+    options.env = env_;
+    options.create_if_missing = true;
+    options.error_if_exists = false;
+    options.paranoid_checks = true;
+    DestroyAndReopen(options);
+    ASSERT_OK(Put("foo", "bar"));
+    ASSERT_EQ("bar", Get("foo"));
 
-    {
-      ManagedSnapshot s3(db_);
-      ASSERT_EQ(3U, GetNumSnapshots());
-      ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
+    // Memtable compaction (will succeed)
+    Flush();
+    ASSERT_EQ("bar", Get("foo"));
+    const int last = 2;
+    MoveFilesToLevel(2);
+    ASSERT_EQ(NumTableFilesAtLevel(last), 1);  // foo=>bar is now in last level
 
-      Put(0, "foo", "0v4");
-      Put(1, "foo", "1v4");
-      ASSERT_EQ("0v1", Get(0, "foo", s1));
-      ASSERT_EQ("1v1", Get(1, "foo", s1));
-      ASSERT_EQ("0v2", Get(0, "foo", s2));
-      ASSERT_EQ("1v2", Get(1, "foo", s2));
-      ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
-      ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
-      ASSERT_EQ("0v4", Get(0, "foo"));
-      ASSERT_EQ("1v4", Get(1, "foo"));
-    }
-
-    ASSERT_EQ(2U, GetNumSnapshots());
-    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
-    ASSERT_EQ("0v1", Get(0, "foo", s1));
-    ASSERT_EQ("1v1", Get(1, "foo", s1));
-    ASSERT_EQ("0v2", Get(0, "foo", s2));
-    ASSERT_EQ("1v2", Get(1, "foo", s2));
-    ASSERT_EQ("0v4", Get(0, "foo"));
-    ASSERT_EQ("1v4", Get(1, "foo"));
+    // Merging compaction (will fail)
+    error_type->store(true, std::memory_order_release);
+    dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
+    ASSERT_EQ("bar", Get("foo"));
 
-    db_->ReleaseSnapshot(s1);
-    ASSERT_EQ("0v2", Get(0, "foo", s2));
-    ASSERT_EQ("1v2", Get(1, "foo", s2));
-    ASSERT_EQ("0v4", Get(0, "foo"));
-    ASSERT_EQ("1v4", Get(1, "foo"));
-    ASSERT_EQ(1U, GetNumSnapshots());
-    ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
+    error_type->store(false, std::memory_order_release);
 
-    db_->ReleaseSnapshot(s2);
-    ASSERT_EQ(0U, GetNumSnapshots());
-    ASSERT_EQ("0v4", Get(0, "foo"));
-    ASSERT_EQ("1v4", Get(1, "foo"));
-  } while (ChangeOptions(kSkipHashCuckoo));
-}
+    // Since paranoid_checks=true, writes should fail
+    ASSERT_NOK(Put("foo2", "bar2"));
 
-TEST_F(DBTest, HiddenValuesAreRemoved) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  do {
-    Options options = CurrentOptions(options_override);
-    CreateAndReopenWithCF({"pikachu"}, options);
-    Random rnd(301);
-    FillLevels("a", "z", 1);
+    // Recovery: should not lose data
+    ASSERT_EQ("bar", Get("foo"));
 
-    std::string big = RandomString(&rnd, 50000);
-    Put(1, "foo", big);
-    Put(1, "pastfoo", "v");
-    const Snapshot* snapshot = db_->GetSnapshot();
-    Put(1, "foo", "tiny");
-    Put(1, "pastfoo2", "v2");  // Advance sequence number one more
+    // Try again with paranoid_checks=false
+    Close();
+    options.paranoid_checks = false;
+    Reopen(options);
 
-    ASSERT_OK(Flush(1));
-    ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
+    // Merging compaction (will fail)
+    error_type->store(true, std::memory_order_release);
+    dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
+    ASSERT_EQ("bar", Get("foo"));
 
-    ASSERT_EQ(big, Get(1, "foo", snapshot));
-    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
-    db_->ReleaseSnapshot(snapshot);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
-    Slice x("x");
-    dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-    ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
-    dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
+    // Recovery: should not lose data
+    error_type->store(false, std::memory_order_release);
+    Reopen(options);
+    ASSERT_EQ("bar", Get("foo"));
 
-    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
-    // ApproximateOffsetOf() is not yet implemented in plain table format,
-    // which is used by Size().
-    // skip HashCuckooRep as it does not support snapshot
-  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
-                         kSkipPlainTable | kSkipHashCuckoo));
+    // Since paranoid_checks=false, writes should succeed
+    ASSERT_OK(Put("foo2", "bar2"));
+    ASSERT_EQ("bar", Get("foo"));
+    ASSERT_EQ("bar2", Get("foo2"));
+  }
 }
 #endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, CompactBetweenSnapshots) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  do {
-    Options options = CurrentOptions(options_override);
-    options.disable_auto_compactions = true;
-    CreateAndReopenWithCF({"pikachu"}, options);
-    Random rnd(301);
-    FillLevels("a", "z", 1);
-
-    Put(1, "foo", "first");
-    const Snapshot* snapshot1 = db_->GetSnapshot();
-    Put(1, "foo", "second");
-    Put(1, "foo", "third");
-    Put(1, "foo", "fourth");
-    const Snapshot* snapshot2 = db_->GetSnapshot();
-    Put(1, "foo", "fifth");
-    Put(1, "foo", "sixth");
-
-    // All entries (including duplicates) exist
-    // before any compaction or flush is triggered.
-    ASSERT_EQ(AllEntriesFor("foo", 1),
-              "[ sixth, fifth, fourth, third, second, first ]");
-    ASSERT_EQ("sixth", Get(1, "foo"));
-    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
-    ASSERT_EQ("first", Get(1, "foo", snapshot1));
+TEST_F(DBTest, PutFailsParanoid) {
+  // Test the following:
+  // (a) A random put fails in paranoid mode (simulate by sync fail)
+  // (b) All other puts have to fail, even if writes would succeed
+  // (c) All of that should happen ONLY if paranoid_checks = true
 
-    // After a flush, "second", "third" and "fifth" should
-    // be removed
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]");
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  options.error_if_exists = false;
+  options.paranoid_checks = true;
+  DestroyAndReopen(options);
+  CreateAndReopenWithCF({"pikachu"}, options);
+  Status s;
 
-    // after we release the snapshot1, only two values left
-    db_->ReleaseSnapshot(snapshot1);
-    FillLevels("a", "z", 1);
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
+  ASSERT_OK(Put(1, "foo", "bar"));
+  ASSERT_OK(Put(1, "foo1", "bar1"));
+  // simulate error
+  env_->log_write_error_.store(true, std::memory_order_release);
+  s = Put(1, "foo2", "bar2");
+  ASSERT_TRUE(!s.ok());
+  env_->log_write_error_.store(false, std::memory_order_release);
+  s = Put(1, "foo3", "bar3");
+  // the next put should fail, too
+  ASSERT_TRUE(!s.ok());
+  // but we're still able to read
+  ASSERT_EQ("bar", Get(1, "foo"));
 
-    // We have only one valid snapshot snapshot2. Since snapshot1 is
-    // not valid anymore, "first" should be removed by a compaction.
-    ASSERT_EQ("sixth", Get(1, "foo"));
-    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]");
+  // do the same thing with paranoid checks off
+  options.paranoid_checks = false;
+  DestroyAndReopen(options);
+  CreateAndReopenWithCF({"pikachu"}, options);
 
-    // after we release the snapshot2, only one value should be left
-    db_->ReleaseSnapshot(snapshot2);
-    FillLevels("a", "z", 1);
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ("sixth", Get(1, "foo"));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]");
-    // skip HashCuckooRep as it does not support snapshot
-  } while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction));
+  ASSERT_OK(Put(1, "foo", "bar"));
+  ASSERT_OK(Put(1, "foo1", "bar1"));
+  // simulate error
+  env_->log_write_error_.store(true, std::memory_order_release);
+  s = Put(1, "foo2", "bar2");
+  ASSERT_TRUE(!s.ok());
+  env_->log_write_error_.store(false, std::memory_order_release);
+  s = Put(1, "foo3", "bar3");
+  // the next put should NOT fail
+  ASSERT_TRUE(s.ok());
 }
 
-TEST_F(DBTest, UnremovableSingleDelete) {
-  // If we compact:
-  //
-  // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
-  //
-  // We do not want to end up with:
-  //
-  // Put(A, v1) Snapshot Put(A, v2)
-  //
-  // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
-  // but not Put(A, v1), so Get(A) would return v1.
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, SnapshotFiles) {
   do {
-    Options options = CurrentOptions(options_override);
-    options.disable_auto_compactions = true;
+    Options options = CurrentOptions();
+    options.write_buffer_size = 100000000;  // Large write buffer
     CreateAndReopenWithCF({"pikachu"}, options);
 
-    Put(1, "foo", "first");
-    const Snapshot* snapshot = db_->GetSnapshot();
-    SingleDelete(1, "foo");
-    Put(1, "foo", "second");
-    ASSERT_OK(Flush(1));
+    Random rnd(301);
 
-    ASSERT_EQ("first", Get(1, "foo", snapshot));
-    ASSERT_EQ("second", Get(1, "foo"));
+    // Write 8MB (80 values, each 100K)
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    std::vector<std::string> values;
+    for (int i = 0; i < 80; i++) {
+      values.push_back(RandomString(&rnd, 100000));
+      ASSERT_OK(Put((i < 40), Key(i), values[i]));
+    }
 
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
+    // assert that nothing makes it to disk yet.
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
 
-    SingleDelete(1, "foo");
+    // get a file snapshot
+    uint64_t manifest_number = 0;
+    uint64_t manifest_size = 0;
+    std::vector<std::string> files;
+    dbfull()->DisableFileDeletions();
+    dbfull()->GetLiveFiles(files, &manifest_size);
 
-    ASSERT_EQ("first", Get(1, "foo", snapshot));
-    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
+    // CURRENT, MANIFEST, *.sst files (one for each CF)
+    ASSERT_EQ(files.size(), 4U);
 
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
+    uint64_t number = 0;
+    FileType type;
 
-    ASSERT_EQ("first", Get(1, "foo", snapshot));
-    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
-    db_->ReleaseSnapshot(snapshot);
-    // Skip HashCuckooRep as it does not support single delete.  FIFO and
-    // universal compaction do not apply to the test case.  Skip MergePut
-    // because single delete does not get removed when it encounters a merge.
-  } while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
-                         kSkipUniversalCompaction | kSkipMergePut));
-}
+    // copy these files to a new snapshot directory
+    std::string snapdir = dbname_ + ".snapdir/";
+    ASSERT_OK(env_->CreateDirIfMissing(snapdir));
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, DeletionMarkers1) {
-  Options options = CurrentOptions();
-  options.max_background_flushes = 0;
-  CreateAndReopenWithCF({"pikachu"}, options);
-  Put(1, "foo", "v1");
-  ASSERT_OK(Flush(1));
-  const int last = 2;
-  MoveFilesToLevel(last, 1);
-  // foo => v1 is now in last level
-  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
+    for (size_t i = 0; i < files.size(); i++) {
+      // our clients require that GetLiveFiles returns
+      // files with "/" as first character!
+      ASSERT_EQ(files[i][0], '/');
+      std::string src = dbname_ + files[i];
+      std::string dest = snapdir + files[i];
 
-  // Place a table at level last-1 to prevent merging with preceding mutation
-  Put(1, "a", "begin");
-  Put(1, "z", "end");
-  Flush(1);
-  MoveFilesToLevel(last - 1, 1);
-  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
-  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
-
-  Delete(1, "foo");
-  Put(1, "foo", "v2");
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
-  ASSERT_OK(Flush(1));  // Moves to level last-2
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
-  Slice z("z");
-  dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
-  // DEL eliminated, but v1 remains because we aren't compacting that level
-  // (DEL can be eliminated because v2 hides v1).
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
-  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
-  // Merging last-1 w/ last, so we are the base level for "foo", so
-  // DEL is removed.  (as is v1).
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
-}
-
-TEST_F(DBTest, DeletionMarkers2) {
-  Options options = CurrentOptions();
-  CreateAndReopenWithCF({"pikachu"}, options);
-  Put(1, "foo", "v1");
-  ASSERT_OK(Flush(1));
-  const int last = 2;
-  MoveFilesToLevel(last, 1);
-  // foo => v1 is now in last level
-  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
-
-  // Place a table at level last-1 to prevent merging with preceding mutation
-  Put(1, "a", "begin");
-  Put(1, "z", "end");
-  Flush(1);
-  MoveFilesToLevel(last - 1, 1);
-  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
-  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
-
-  Delete(1, "foo");
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
-  ASSERT_OK(Flush(1));  // Moves to level last-2
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
-  dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
-  // DEL kept: "last" file overlaps
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
-  dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
-  // Merging last-1 w/ last, so we are the base level for "foo", so
-  // DEL is removed.  (as is v1).
-  ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
-}
-
-TEST_F(DBTest, OverlapInLevel0) {
-  do {
-    Options options = CurrentOptions();
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    //Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
-    ASSERT_OK(Put(1, "100", "v100"));
-    ASSERT_OK(Put(1, "999", "v999"));
-    Flush(1);
-    MoveFilesToLevel(2, 1);
-    ASSERT_OK(Delete(1, "100"));
-    ASSERT_OK(Delete(1, "999"));
-    Flush(1);
-    MoveFilesToLevel(1, 1);
-    ASSERT_EQ("0,1,1", FilesPerLevel(1));
-
-    // Make files spanning the following ranges in level-0:
-    //  files[0]  200 .. 900
-    //  files[1]  300 .. 500
-    // Note that files are sorted by smallest key.
-    ASSERT_OK(Put(1, "300", "v300"));
-    ASSERT_OK(Put(1, "500", "v500"));
-    Flush(1);
-    ASSERT_OK(Put(1, "200", "v200"));
-    ASSERT_OK(Put(1, "600", "v600"));
-    ASSERT_OK(Put(1, "900", "v900"));
-    Flush(1);
-    ASSERT_EQ("2,1,1", FilesPerLevel(1));
-
-    // Compact away the placeholder files we created initially
-    dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
-    dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
-    ASSERT_EQ("2", FilesPerLevel(1));
-
-    // Do a memtable compaction.  Before bug-fix, the compaction would
-    // not detect the overlap with level-0 files and would incorrectly place
-    // the deletion in a deeper level.
-    ASSERT_OK(Delete(1, "600"));
-    Flush(1);
-    ASSERT_EQ("3", FilesPerLevel(1));
-    ASSERT_EQ("NOT_FOUND", Get(1, "600"));
-  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
-}
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, ComparatorCheck) {
-  class NewComparator : public Comparator {
-   public:
-    virtual const char* Name() const override {
-      return "rocksdb.NewComparator";
-    }
-    virtual int Compare(const Slice& a, const Slice& b) const override {
-      return BytewiseComparator()->Compare(a, b);
-    }
-    virtual void FindShortestSeparator(std::string* s,
-                                       const Slice& l) const override {
-      BytewiseComparator()->FindShortestSeparator(s, l);
-    }
-    virtual void FindShortSuccessor(std::string* key) const override {
-      BytewiseComparator()->FindShortSuccessor(key);
-    }
-  };
-  Options new_options, options;
-  NewComparator cmp;
-  do {
-    options = CurrentOptions();
-    CreateAndReopenWithCF({"pikachu"}, options);
-    new_options = CurrentOptions();
-    new_options.comparator = &cmp;
-    // only the non-default column family has non-matching comparator
-    Status s = TryReopenWithColumnFamilies({"default", "pikachu"},
-        std::vector<Options>({options, new_options}));
-    ASSERT_TRUE(!s.ok());
-    ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
-        << s.ToString();
-  } while (ChangeCompactOptions());
-}
-
-TEST_F(DBTest, CustomComparator) {
-  class NumberComparator : public Comparator {
-   public:
-    virtual const char* Name() const override {
-      return "test.NumberComparator";
-    }
-    virtual int Compare(const Slice& a, const Slice& b) const override {
-      return ToNumber(a) - ToNumber(b);
-    }
-    virtual void FindShortestSeparator(std::string* s,
-                                       const Slice& l) const override {
-      ToNumber(*s);     // Check format
-      ToNumber(l);      // Check format
-    }
-    virtual void FindShortSuccessor(std::string* key) const override {
-      ToNumber(*key);   // Check format
-    }
-   private:
-    static int ToNumber(const Slice& x) {
-      // Check that there are no extra characters.
-      EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
-          << EscapeString(x);
-      int val;
-      char ignored;
-      EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
-          << EscapeString(x);
-      return val;
-    }
-  };
-  Options new_options;
-  NumberComparator cmp;
-  do {
-    new_options = CurrentOptions();
-    new_options.create_if_missing = true;
-    new_options.comparator = &cmp;
-    new_options.write_buffer_size = 4096;  // Compact more often
-    new_options.arena_block_size = 4096;
-    new_options = CurrentOptions(new_options);
-    DestroyAndReopen(new_options);
-    CreateAndReopenWithCF({"pikachu"}, new_options);
-    ASSERT_OK(Put(1, "[10]", "ten"));
-    ASSERT_OK(Put(1, "[0x14]", "twenty"));
-    for (int i = 0; i < 2; i++) {
-      ASSERT_EQ("ten", Get(1, "[10]"));
-      ASSERT_EQ("ten", Get(1, "[0xa]"));
-      ASSERT_EQ("twenty", Get(1, "[20]"));
-      ASSERT_EQ("twenty", Get(1, "[0x14]"));
-      ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
-      ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
-      Compact(1, "[0]", "[9999]");
-    }
-
-    for (int run = 0; run < 2; run++) {
-      for (int i = 0; i < 1000; i++) {
-        char buf[100];
-        snprintf(buf, sizeof(buf), "[%d]", i*10);
-        ASSERT_OK(Put(1, buf, buf));
-      }
-      Compact(1, "[0]", "[1000000]");
-    }
-  } while (ChangeCompactOptions());
-}
-
-TEST_F(DBTest, DBOpen_Options) {
-  Options options = CurrentOptions();
-  std::string dbname = test::TmpDir(env_) + "/db_options_test";
-  ASSERT_OK(DestroyDB(dbname, options));
-
-  // Does not exist, and create_if_missing == false: error
-  DB* db = nullptr;
-  options.create_if_missing = false;
-  Status s = DB::Open(options, dbname, &db);
-  ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
-  ASSERT_TRUE(db == nullptr);
-
-  // Does not exist, and create_if_missing == true: OK
-  options.create_if_missing = true;
-  s = DB::Open(options, dbname, &db);
-  ASSERT_OK(s);
-  ASSERT_TRUE(db != nullptr);
-
-  delete db;
-  db = nullptr;
-
-  // Does exist, and error_if_exists == true: error
-  options.create_if_missing = false;
-  options.error_if_exists = true;
-  s = DB::Open(options, dbname, &db);
-  ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
-  ASSERT_TRUE(db == nullptr);
-
-  // Does exist, and error_if_exists == false: OK
-  options.create_if_missing = true;
-  options.error_if_exists = false;
-  s = DB::Open(options, dbname, &db);
-  ASSERT_OK(s);
-  ASSERT_TRUE(db != nullptr);
-
-  delete db;
-  db = nullptr;
-}
-
-TEST_F(DBTest, DBOpen_Change_NumLevels) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  DestroyAndReopen(options);
-  ASSERT_TRUE(db_ != nullptr);
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  ASSERT_OK(Put(1, "a", "123"));
-  ASSERT_OK(Put(1, "b", "234"));
-  Flush(1);
-  MoveFilesToLevel(3, 1);
-  Close();
-
-  options.create_if_missing = false;
-  options.num_levels = 2;
-  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
-  ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
-  ASSERT_TRUE(db_ == nullptr);
-}
-
-TEST_F(DBTest, DestroyDBMetaDatabase) {
-  std::string dbname = test::TmpDir(env_) + "/db_meta";
-  ASSERT_OK(env_->CreateDirIfMissing(dbname));
-  std::string metadbname = MetaDatabaseName(dbname, 0);
-  ASSERT_OK(env_->CreateDirIfMissing(metadbname));
-  std::string metametadbname = MetaDatabaseName(metadbname, 0);
-  ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
-
-  // Destroy previous versions if they exist. Using the long way.
-  Options options = CurrentOptions();
-  ASSERT_OK(DestroyDB(metametadbname, options));
-  ASSERT_OK(DestroyDB(metadbname, options));
-  ASSERT_OK(DestroyDB(dbname, options));
-
-  // Setup databases
-  DB* db = nullptr;
-  ASSERT_OK(DB::Open(options, dbname, &db));
-  delete db;
-  db = nullptr;
-  ASSERT_OK(DB::Open(options, metadbname, &db));
-  delete db;
-  db = nullptr;
-  ASSERT_OK(DB::Open(options, metametadbname, &db));
-  delete db;
-  db = nullptr;
-
-  // Delete databases
-  ASSERT_OK(DestroyDB(dbname, options));
-
-  // Check if deletion worked.
-  options.create_if_missing = false;
-  ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
-  ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
-  ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
-}
-
-#ifndef ROCKSDB_LITE
-// Check that number of files does not grow when writes are dropped
-TEST_F(DBTest, DropWrites) {
-  do {
-    Options options = CurrentOptions();
-    options.env = env_;
-    options.paranoid_checks = false;
-    Reopen(options);
-
-    ASSERT_OK(Put("foo", "v1"));
-    ASSERT_EQ("v1", Get("foo"));
-    Compact("a", "z");
-    const size_t num_files = CountFiles();
-    // Force out-of-space errors
-    env_->drop_writes_.store(true, std::memory_order_release);
-    env_->sleep_counter_.Reset();
-    env_->no_sleep_ = true;
-    for (int i = 0; i < 5; i++) {
-      if (option_config_ != kUniversalCompactionMultiLevel &&
-          option_config_ != kUniversalSubcompactions) {
-        for (int level = 0; level < dbfull()->NumberLevels(); level++) {
-          if (level > 0 && level == dbfull()->NumberLevels() - 1) {
-            break;
-          }
-          dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
-                                      true /* disallow trivial move */);
-        }
-      } else {
-        dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-      }
-    }
-
-    std::string property_value;
-    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
-    ASSERT_EQ("5", property_value);
-
-    env_->drop_writes_.store(false, std::memory_order_release);
-    ASSERT_LT(CountFiles(), num_files + 3);
-
-    // Check that compaction attempts slept after errors
-    // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
-    // versions
-    ASSERT_GE(env_->sleep_counter_.Read(), 4);
-  } while (ChangeCompactOptions());
-}
-
-// Check background error counter bumped on flush failures.
-TEST_F(DBTest, DropWritesFlush) {
-  do {
-    Options options = CurrentOptions();
-    options.env = env_;
-    options.max_background_flushes = 1;
-    Reopen(options);
-
-    ASSERT_OK(Put("foo", "v1"));
-    // Force out-of-space errors
-    env_->drop_writes_.store(true, std::memory_order_release);
-
-    std::string property_value;
-    // Background error count is 0 now.
-    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
-    ASSERT_EQ("0", property_value);
-
-    dbfull()->TEST_FlushMemTable(true);
-
-    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
-    ASSERT_EQ("1", property_value);
-
-    env_->drop_writes_.store(false, std::memory_order_release);
-  } while (ChangeCompactOptions());
-}
-#endif  // ROCKSDB_LITE
-
-// Check that CompactRange() returns failure if there is not enough space left
-// on device
-TEST_F(DBTest, NoSpaceCompactRange) {
-  do {
-    Options options = CurrentOptions();
-    options.env = env_;
-    options.disable_auto_compactions = true;
-    Reopen(options);
-
-    // generate 5 tables
-    for (int i = 0; i < 5; ++i) {
-      ASSERT_OK(Put(Key(i), Key(i) + "v"));
-      ASSERT_OK(Flush());
-    }
-
-    // Force out-of-space errors
-    env_->no_space_.store(true, std::memory_order_release);
-
-    Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
-                                           true /* disallow trivial move */);
-    ASSERT_TRUE(s.IsIOError());
-
-    env_->no_space_.store(false, std::memory_order_release);
-  } while (ChangeCompactOptions());
-}
-
-TEST_F(DBTest, NonWritableFileSystem) {
-  do {
-    Options options = CurrentOptions();
-    options.write_buffer_size = 4096;
-    options.arena_block_size = 4096;
-    options.env = env_;
-    Reopen(options);
-    ASSERT_OK(Put("foo", "v1"));
-    env_->non_writeable_rate_.store(100);
-    std::string big(100000, 'x');
-    int errors = 0;
-    for (int i = 0; i < 20; i++) {
-      if (!Put("foo", big).ok()) {
-        errors++;
-        env_->SleepForMicroseconds(100000);
-      }
-    }
-    ASSERT_GT(errors, 0);
-    env_->non_writeable_rate_.store(0);
-  } while (ChangeCompactOptions());
-}
-
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, ManifestWriteError) {
-  // Test for the following problem:
-  // (a) Compaction produces file F
-  // (b) Log record containing F is written to MANIFEST file, but Sync() fails
-  // (c) GC deletes F
-  // (d) After reopening DB, reads fail since deleted F is named in log record
-
-  // We iterate twice.  In the second iteration, everything is the
-  // same except the log record never makes it to the MANIFEST file.
-  for (int iter = 0; iter < 2; iter++) {
-    std::atomic<bool>* error_type = (iter == 0)
-        ? &env_->manifest_sync_error_
-        : &env_->manifest_write_error_;
-
-    // Insert foo=>bar mapping
-    Options options = CurrentOptions();
-    options.env = env_;
-    options.create_if_missing = true;
-    options.error_if_exists = false;
-    options.paranoid_checks = true;
-    DestroyAndReopen(options);
-    ASSERT_OK(Put("foo", "bar"));
-    ASSERT_EQ("bar", Get("foo"));
-
-    // Memtable compaction (will succeed)
-    Flush();
-    ASSERT_EQ("bar", Get("foo"));
-    const int last = 2;
-    MoveFilesToLevel(2);
-    ASSERT_EQ(NumTableFilesAtLevel(last), 1);   // foo=>bar is now in last level
-
-    // Merging compaction (will fail)
-    error_type->store(true, std::memory_order_release);
-    dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
-    ASSERT_EQ("bar", Get("foo"));
-
-    error_type->store(false, std::memory_order_release);
-
-    // Since paranoid_checks=true, writes should fail
-    ASSERT_NOK(Put("foo2", "bar2"));
-
-    // Recovery: should not lose data
-    ASSERT_EQ("bar", Get("foo"));
-
-    // Try again with paranoid_checks=false
-    Close();
-    options.paranoid_checks = false;
-    Reopen(options);
-
-    // Merging compaction (will fail)
-    error_type->store(true, std::memory_order_release);
-    dbfull()->TEST_CompactRange(last, nullptr, nullptr);  // Should fail
-    ASSERT_EQ("bar", Get("foo"));
-
-    // Recovery: should not lose data
-    error_type->store(false, std::memory_order_release);
-    Reopen(options);
-    ASSERT_EQ("bar", Get("foo"));
-
-    // Since paranoid_checks=false, writes should succeed
-    ASSERT_OK(Put("foo2", "bar2"));
-    ASSERT_EQ("bar", Get("foo"));
-    ASSERT_EQ("bar2", Get("foo2"));
-  }
-}
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, PutFailsParanoid) {
-  // Test the following:
-  // (a) A random put fails in paranoid mode (simulate by sync fail)
-  // (b) All other puts have to fail, even if writes would succeed
-  // (c) All of that should happen ONLY if paranoid_checks = true
-
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.create_if_missing = true;
-  options.error_if_exists = false;
-  options.paranoid_checks = true;
-  DestroyAndReopen(options);
-  CreateAndReopenWithCF({"pikachu"}, options);
-  Status s;
-
-  ASSERT_OK(Put(1, "foo", "bar"));
-  ASSERT_OK(Put(1, "foo1", "bar1"));
-  // simulate error
-  env_->log_write_error_.store(true, std::memory_order_release);
-  s = Put(1, "foo2", "bar2");
-  ASSERT_TRUE(!s.ok());
-  env_->log_write_error_.store(false, std::memory_order_release);
-  s = Put(1, "foo3", "bar3");
-  // the next put should fail, too
-  ASSERT_TRUE(!s.ok());
-  // but we're still able to read
-  ASSERT_EQ("bar", Get(1, "foo"));
-
-  // do the same thing with paranoid checks off
-  options.paranoid_checks = false;
-  DestroyAndReopen(options);
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  ASSERT_OK(Put(1, "foo", "bar"));
-  ASSERT_OK(Put(1, "foo1", "bar1"));
-  // simulate error
-  env_->log_write_error_.store(true, std::memory_order_release);
-  s = Put(1, "foo2", "bar2");
-  ASSERT_TRUE(!s.ok());
-  env_->log_write_error_.store(false, std::memory_order_release);
-  s = Put(1, "foo3", "bar3");
-  // the next put should NOT fail
-  ASSERT_TRUE(s.ok());
-}
-
-TEST_F(DBTest, BloomFilter) {
-  do {
-    Options options = CurrentOptions();
-    env_->count_random_reads_ = true;
-    options.env = env_;
-    // ChangeCompactOptions() only changes compaction style, which does not
-    // trigger reset of table_factory
-    BlockBasedTableOptions table_options;
-    table_options.no_block_cache = true;
-    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
-    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    // Populate multiple layers
-    const int N = 10000;
-    for (int i = 0; i < N; i++) {
-      ASSERT_OK(Put(1, Key(i), Key(i)));
-    }
-    Compact(1, "a", "z");
-    for (int i = 0; i < N; i += 100) {
-      ASSERT_OK(Put(1, Key(i), Key(i)));
-    }
-    Flush(1);
-
-    // Prevent auto compactions triggered by seeks
-    env_->delay_sstable_sync_.store(true, std::memory_order_release);
-
-    // Lookup present keys.  Should rarely read from small sstable.
-    env_->random_read_counter_.Reset();
-    for (int i = 0; i < N; i++) {
-      ASSERT_EQ(Key(i), Get(1, Key(i)));
-    }
-    int reads = env_->random_read_counter_.Read();
-    fprintf(stderr, "%d present => %d reads\n", N, reads);
-    ASSERT_GE(reads, N);
-    ASSERT_LE(reads, N + 2*N/100);
-
-    // Lookup present keys.  Should rarely read from either sstable.
-    env_->random_read_counter_.Reset();
-    for (int i = 0; i < N; i++) {
-      ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing"));
-    }
-    reads = env_->random_read_counter_.Read();
-    fprintf(stderr, "%d missing => %d reads\n", N, reads);
-    ASSERT_LE(reads, 3*N/100);
-
-    env_->delay_sstable_sync_.store(false, std::memory_order_release);
-    Close();
-  } while (ChangeCompactOptions());
-}
-
-TEST_F(DBTest, BloomFilterRate) {
-  while (ChangeFilterOptions()) {
-    Options options = CurrentOptions();
-    options.statistics = rocksdb::CreateDBStatistics();
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    const int maxKey = 10000;
-    for (int i = 0; i < maxKey; i++) {
-      ASSERT_OK(Put(1, Key(i), Key(i)));
-    }
-    // Add a large key to make the file contain wide range
-    ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
-    Flush(1);
-
-    // Check if they can be found
-    for (int i = 0; i < maxKey; i++) {
-      ASSERT_EQ(Key(i), Get(1, Key(i)));
-    }
-    ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-
-    // Check if filter is useful
-    for (int i = 0; i < maxKey; i++) {
-      ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
-    }
-    ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
-  }
-}
-
-TEST_F(DBTest, BloomFilterCompatibility) {
-  Options options = CurrentOptions();
-  options.statistics = rocksdb::CreateDBStatistics();
-  BlockBasedTableOptions table_options;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-
-  // Create with block based filter
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  const int maxKey = 10000;
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_OK(Put(1, Key(i), Key(i)));
-  }
-  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
-  Flush(1);
-
-  // Check db with full filter
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  ReopenWithColumnFamilies({"default", "pikachu"}, options);
-
-  // Check if they can be found
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_EQ(Key(i), Get(1, Key(i)));
-  }
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-}
-
-TEST_F(DBTest, BloomFilterReverseCompatibility) {
-  Options options = CurrentOptions();
-  options.statistics = rocksdb::CreateDBStatistics();
-  BlockBasedTableOptions table_options;
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-
-  // Create with full filter
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  const int maxKey = 10000;
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_OK(Put(1, Key(i), Key(i)));
-  }
-  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
-  Flush(1);
-
-  // Check db with block_based filter
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  ReopenWithColumnFamilies({"default", "pikachu"}, options);
-
-  // Check if they can be found
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_EQ(Key(i), Get(1, Key(i)));
-  }
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-}
-
-namespace {
-// A wrapped bloom over default FilterPolicy
-class WrappedBloom : public FilterPolicy {
- public:
-  explicit WrappedBloom(int bits_per_key) :
-        filter_(NewBloomFilterPolicy(bits_per_key)),
-        counter_(0) {}
-
-  ~WrappedBloom() { delete filter_; }
-
-  const char* Name() const override { return "WrappedRocksDbFilterPolicy"; }
-
-  void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst)
-      const override {
-    std::unique_ptr<rocksdb::Slice[]> user_keys(new rocksdb::Slice[n]);
-    for (int i = 0; i < n; ++i) {
-      user_keys[i] = convertKey(keys[i]);
-    }
-    return filter_->CreateFilter(user_keys.get(), n, dst);
-  }
-
-  bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter)
-      const override {
-    counter_++;
-    return filter_->KeyMayMatch(convertKey(key), filter);
-  }
-
-  uint32_t GetCounter() { return counter_; }
-
- private:
-  const FilterPolicy* filter_;
-  mutable uint32_t counter_;
-
-  rocksdb::Slice convertKey(const rocksdb::Slice& key) const {
-    return key;
-  }
-};
-}  // namespace
-
-TEST_F(DBTest, BloomFilterWrapper) {
-  Options options = CurrentOptions();
-  options.statistics = rocksdb::CreateDBStatistics();
-
-  BlockBasedTableOptions table_options;
-  WrappedBloom* policy = new WrappedBloom(10);
-  table_options.filter_policy.reset(policy);
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  const int maxKey = 10000;
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_OK(Put(1, Key(i), Key(i)));
-  }
-  // Add a large key to make the file contain wide range
-  ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
-  ASSERT_EQ(0U, policy->GetCounter());
-  Flush(1);
-
-  // Check if they can be found
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_EQ(Key(i), Get(1, Key(i)));
-  }
-  ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
-  ASSERT_EQ(1U * maxKey, policy->GetCounter());
-
-  // Check if filter is useful
-  for (int i = 0; i < maxKey; i++) {
-    ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
-  }
-  ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
-  ASSERT_EQ(2U * maxKey, policy->GetCounter());
-}
-
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, SnapshotFiles) {
-  do {
-    Options options = CurrentOptions();
-    options.write_buffer_size = 100000000;        // Large write buffer
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    Random rnd(301);
-
-    // Write 8MB (80 values, each 100K)
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-    std::vector<std::string> values;
-    for (int i = 0; i < 80; i++) {
-      values.push_back(RandomString(&rnd, 100000));
-      ASSERT_OK(Put((i < 40), Key(i), values[i]));
-    }
-
-    // assert that nothing makes it to disk yet.
-    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
-
-    // get a file snapshot
-    uint64_t manifest_number = 0;
-    uint64_t manifest_size = 0;
-    std::vector<std::string> files;
-    dbfull()->DisableFileDeletions();
-    dbfull()->GetLiveFiles(files, &manifest_size);
-
-    // CURRENT, MANIFEST, *.sst files (one for each CF)
-    ASSERT_EQ(files.size(), 4U);
-
-    uint64_t number = 0;
-    FileType type;
-
-    // copy these files to a new snapshot directory
-    std::string snapdir = dbname_ + ".snapdir/";
-    ASSERT_OK(env_->CreateDirIfMissing(snapdir));
-
-    for (size_t i = 0; i < files.size(); i++) {
-      // our clients require that GetLiveFiles returns
-      // files with "/" as first character!
-      ASSERT_EQ(files[i][0], '/');
-      std::string src = dbname_ + files[i];
-      std::string dest = snapdir + files[i];
-
-      uint64_t size;
-      ASSERT_OK(env_->GetFileSize(src, &size));
-
-      // record the number and the size of the
-      // latest manifest file
-      if (ParseFileName(files[i].substr(1), &number, &type)) {
-        if (type == kDescriptorFile) {
-          if (number > manifest_number) {
-            manifest_number = number;
-            ASSERT_GE(size, manifest_size);
-            size = manifest_size; // copy only valid MANIFEST data
-          }
-        }
-      }
-      CopyFile(src, dest, size);
-    }
-
-    // release file snapshot
-    dbfull()->DisableFileDeletions();
-    // overwrite one key, this key should not appear in the snapshot
-    std::vector<std::string> extras;
-    for (unsigned int i = 0; i < 1; i++) {
-      extras.push_back(RandomString(&rnd, 100000));
-      ASSERT_OK(Put(0, Key(i), extras[i]));
-    }
-
-    // verify that data in the snapshot are correct
-    std::vector<ColumnFamilyDescriptor> column_families;
-    column_families.emplace_back("default", ColumnFamilyOptions());
-    column_families.emplace_back("pikachu", ColumnFamilyOptions());
-    std::vector<ColumnFamilyHandle*> cf_handles;
-    DB* snapdb;
-    DBOptions opts;
-    opts.env = env_;
-    opts.create_if_missing = false;
-    Status stat =
-        DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
-    ASSERT_OK(stat);
-
-    ReadOptions roptions;
-    std::string val;
-    for (unsigned int i = 0; i < 80; i++) {
-      stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
-      ASSERT_EQ(values[i].compare(val), 0);
-    }
-    for (auto cfh : cf_handles) {
-      delete cfh;
-    }
-    delete snapdb;
-
-    // look at the new live files after we added an 'extra' key
-    // and after we took the first snapshot.
-    uint64_t new_manifest_number = 0;
-    uint64_t new_manifest_size = 0;
-    std::vector<std::string> newfiles;
-    dbfull()->DisableFileDeletions();
-    dbfull()->GetLiveFiles(newfiles, &new_manifest_size);
-
-    // find the new manifest file. assert that this manifest file is
-    // the same one as in the previous snapshot. But its size should be
-    // larger because we added an extra key after taking the
-    // previous shapshot.
-    for (size_t i = 0; i < newfiles.size(); i++) {
-      std::string src = dbname_ + "/" + newfiles[i];
-      // record the lognumber and the size of the
-      // latest manifest file
-      if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
-        if (type == kDescriptorFile) {
-          if (number > new_manifest_number) {
-            uint64_t size;
-            new_manifest_number = number;
-            ASSERT_OK(env_->GetFileSize(src, &size));
-            ASSERT_GE(size, new_manifest_size);
-          }
-        }
-      }
-    }
-    ASSERT_EQ(manifest_number, new_manifest_number);
-    ASSERT_GT(new_manifest_size, manifest_size);
-
-    // release file snapshot
-    dbfull()->DisableFileDeletions();
-  } while (ChangeCompactOptions());
-}
-#endif
-
-TEST_F(DBTest, CompactOnFlush) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  do {
-    Options options = CurrentOptions(options_override);
-    options.disable_auto_compactions = true;
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    Put(1, "foo", "v1");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]");
-
-    // Write two new keys
-    Put(1, "a", "begin");
-    Put(1, "z", "end");
-    Flush(1);
-
-    // Case1: Delete followed by a put
-    Delete(1, "foo");
-    Put(1, "foo", "v2");
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
-
-    // After the current memtable is flushed, the DEL should
-    // have been removed
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
-
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
-
-    // Case 2: Delete followed by another delete
-    Delete(1, "foo");
-    Delete(1, "foo");
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]");
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
-
-    // Case 3: Put followed by a delete
-    Put(1, "foo", "v3");
-    Delete(1, "foo");
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]");
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
-
-    // Case 4: Put followed by another Put
-    Put(1, "foo", "v4");
-    Put(1, "foo", "v5");
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
-
-    // clear database
-    Delete(1, "foo");
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
-
-    // Case 5: Put followed by snapshot followed by another Put
-    // Both puts should remain.
-    Put(1, "foo", "v6");
-    const Snapshot* snapshot = db_->GetSnapshot();
-    Put(1, "foo", "v7");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]");
-    db_->ReleaseSnapshot(snapshot);
-
-    // clear database
-    Delete(1, "foo");
-    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
-                           nullptr);
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
-
-    // Case 5: snapshot followed by a put followed by another Put
-    // Only the last put should remain.
-    const Snapshot* snapshot1 = db_->GetSnapshot();
-    Put(1, "foo", "v8");
-    Put(1, "foo", "v9");
-    ASSERT_OK(Flush(1));
-    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]");
-    db_->ReleaseSnapshot(snapshot1);
-  } while (ChangeCompactOptions());
-}
-
-namespace {
-std::vector<std::uint64_t> ListSpecificFiles(
-    Env* env, const std::string& path, const FileType expected_file_type) {
-  std::vector<std::string> files;
-  std::vector<uint64_t> file_numbers;
-  env->GetChildren(path, &files);
-  uint64_t number;
-  FileType type;
-  for (size_t i = 0; i < files.size(); ++i) {
-    if (ParseFileName(files[i], &number, &type)) {
-      if (type == expected_file_type) {
-        file_numbers.push_back(number);
-      }
-    }
-  }
-  return file_numbers;
-}
-
-std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path) {
-  return ListSpecificFiles(env, path, kTableFile);
-}
-}  // namespace
-
-TEST_F(DBTest, FlushOneColumnFamily) {
-  Options options = CurrentOptions();
-  CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
-                         "alyosha", "popovich"},
-                        options);
-
-  ASSERT_OK(Put(0, "Default", "Default"));
-  ASSERT_OK(Put(1, "pikachu", "pikachu"));
-  ASSERT_OK(Put(2, "ilya", "ilya"));
-  ASSERT_OK(Put(3, "muromec", "muromec"));
-  ASSERT_OK(Put(4, "dobrynia", "dobrynia"));
-  ASSERT_OK(Put(5, "nikitich", "nikitich"));
-  ASSERT_OK(Put(6, "alyosha", "alyosha"));
-  ASSERT_OK(Put(7, "popovich", "popovich"));
-
-  for (int i = 0; i < 8; ++i) {
-    Flush(i);
-    auto tables = ListTableFiles(env_, dbname_);
-    ASSERT_EQ(tables.size(), i + 1U);
-  }
-}
-
-#ifndef ROCKSDB_LITE
-// In https://reviews.facebook.net/D20661 we change
-// recovery behavior: previously for each log file each column family
-// memtable was flushed, even it was empty. Now it's changed:
-// we try to create the smallest number of table files by merging
-// updates from multiple logs
-TEST_F(DBTest, RecoverCheckFileAmountWithSmallWriteBuffer) {
-  Options options = CurrentOptions();
-  options.write_buffer_size = 5000000;
-  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
-
-  // Since we will reopen DB with smaller write_buffer_size,
-  // each key will go to new SST file
-  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
-  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
-  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
-  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
-
-  ASSERT_OK(Put(3, Key(10), DummyString(1)));
-  // Make 'dobrynia' to be flushed and new WAL file to be created
-  ASSERT_OK(Put(2, Key(10), DummyString(7500000)));
-  ASSERT_OK(Put(2, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  {
-    auto tables = ListTableFiles(env_, dbname_);
-    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
-    // Make sure 'dobrynia' was flushed: check sst files amount
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-  }
-  // New WAL file
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(3, Key(10), DummyString(1)));
-  ASSERT_OK(Put(3, Key(10), DummyString(1)));
-  ASSERT_OK(Put(3, Key(10), DummyString(1)));
-
-  options.write_buffer_size = 4096;
-  options.arena_block_size = 4096;
-  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
-                           options);
-  {
-    // No inserts => default is empty
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(5));
-    // 1 SST for big key + 1 SST for small one
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(2));
-    // 1 SST for all keys
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(1));
-  }
-}
-
-// In https://reviews.facebook.net/D20661 we change
-// recovery behavior: previously for each log file each column family
-// memtable was flushed, even it wasn't empty. Now it's changed:
-// we try to create the smallest number of table files by merging
-// updates from multiple logs
-TEST_F(DBTest, RecoverCheckFileAmount) {
-  Options options = CurrentOptions();
-  options.write_buffer_size = 100000;
-  options.arena_block_size = 4 * 1024;
-  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
-
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(2, Key(1), DummyString(1)));
-
-  // Make 'nikitich' memtable to be flushed
-  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
-  ASSERT_OK(Put(3, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  // 4 memtable are not flushed, 1 sst file
-  {
-    auto tables = ListTableFiles(env_, dbname_);
-    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(1));
-  }
-  // Memtable for 'nikitich' has flushed, new WAL file has opened
-  // 4 memtable still not flushed
-
-  // Write to new WAL file
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(2, Key(1), DummyString(1)));
-
-  // Fill up 'nikitich' one more time
-  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
-  // make it flush
-  ASSERT_OK(Put(3, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  // There are still 4 memtable not flushed, and 2 sst tables
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(2, Key(1), DummyString(1)));
-
-  {
-    auto tables = ListTableFiles(env_, dbname_);
-    ASSERT_EQ(tables.size(), static_cast<size_t>(2));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(2));
-  }
-
-  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
-                           options);
-  {
-    std::vector<uint64_t> table_files = ListTableFiles(env_, dbname_);
-    // Check, that records for 'default', 'dobrynia' and 'pikachu' from
-    // first, second and third WALs  went to the same SST.
-    // So, there is 6 SSTs: three  for 'nikitich', one for 'default', one for
-    // 'dobrynia', one for 'pikachu'
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(3));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(1));
-  }
-}
-
-TEST_F(DBTest, SharedWriteBuffer) {
-  Options options = CurrentOptions();
-  options.db_write_buffer_size = 100000;  // this is the real limit
-  options.write_buffer_size    = 500000;  // this is never hit
-  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
-
-  // Trigger a flush on CF "nikitich"
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  ASSERT_OK(Put(1, Key(1), DummyString(1)));
-  ASSERT_OK(Put(3, Key(1), DummyString(90000)));
-  ASSERT_OK(Put(2, Key(2), DummyString(20000)));
-  ASSERT_OK(Put(2, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(1));
-  }
-
-  // "dobrynia": 20KB
-  // Flush 'dobrynia'
-  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
-  ASSERT_OK(Put(2, Key(2), DummyString(70000)));
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(1));
-  }
-
-  // "nikitich" still has has data of 80KB
-  // Inserting Data in "dobrynia" triggers "nikitich" flushing.
-  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
-  ASSERT_OK(Put(2, Key(2), DummyString(40000)));
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(2));
-  }
-
-  // "dobrynia" still has 40KB
-  ASSERT_OK(Put(1, Key(2), DummyString(20000)));
-  ASSERT_OK(Put(0, Key(1), DummyString(10000)));
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  // This should triggers no flush
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(2));
-  }
-
-  // "default": 10KB, "pikachu": 20KB, "dobrynia": 40KB
-  ASSERT_OK(Put(1, Key(2), DummyString(40000)));
-  ASSERT_OK(Put(0, Key(1), DummyString(1)));
-  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
-  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
-  // This should triggers flush of "pikachu"
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(0));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(2));
-  }
-
-  // "default": 10KB, "dobrynia": 40KB
-  // Some remaining writes so 'default', 'dobrynia' and 'nikitich' flush on
-  // closure.
-  ASSERT_OK(Put(3, Key(1), DummyString(1)));
-  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
-                           options);
-  {
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
-              static_cast<uint64_t>(1));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
-              static_cast<uint64_t>(2));
-    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
-              static_cast<uint64_t>(3));
-  }
-}
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, PurgeInfoLogs) {
-  Options options = CurrentOptions();
-  options.keep_log_file_num = 5;
-  options.create_if_missing = true;
-  for (int mode = 0; mode <= 1; mode++) {
-    if (mode == 1) {
-      options.db_log_dir = dbname_ + "_logs";
-      env_->CreateDirIfMissing(options.db_log_dir);
-    } else {
-      options.db_log_dir = "";
-    }
-    for (int i = 0; i < 8; i++) {
-      Reopen(options);
-    }
-
-    std::vector<std::string> files;
-    env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
-                      &files);
-    int info_log_count = 0;
-    for (std::string file : files) {
-      if (file.find("LOG") != std::string::npos) {
-        info_log_count++;
-      }
-    }
-    ASSERT_EQ(5, info_log_count);
-
-    Destroy(options);
-    // For mode (1), test DestroyDB() to delete all the logs under DB dir.
-    // For mode (2), no info log file should have been put under DB dir.
-    std::vector<std::string> db_files;
-    env_->GetChildren(dbname_, &db_files);
-    for (std::string file : db_files) {
-      ASSERT_TRUE(file.find("LOG") == std::string::npos);
-    }
-
-    if (mode == 1) {
-      // Cleaning up
-      env_->GetChildren(options.db_log_dir, &files);
-      for (std::string file : files) {
-        env_->DeleteFile(options.db_log_dir + "/" + file);
-      }
-      env_->DeleteDir(options.db_log_dir);
-    }
-  }
-}
-
-TEST_F(DBTest, SyncMultipleLogs) {
-  const uint64_t kNumBatches = 2;
-  const int kBatchSize = 1000;
-
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.write_buffer_size = 4096;
-  Reopen(options);
-
-  WriteBatch batch;
-  WriteOptions wo;
-  wo.sync = true;
-
-  for (uint64_t b = 0; b < kNumBatches; b++) {
-    batch.Clear();
-    for (int i = 0; i < kBatchSize; i++) {
-      batch.Put(Key(i), DummyString(128));
-    }
-
-    dbfull()->Write(wo, &batch);
-  }
-
-  ASSERT_OK(dbfull()->SyncWAL());
-}
-
-#ifndef ROCKSDB_LITE
-//
-// Test WAL recovery for the various modes available
-//
-class RecoveryTestHelper {
- public:
-  // Number of WAL files to generate
-  static const int kWALFilesCount = 10;
-  // Starting number for the WAL file name like 00010.log
-  static const int kWALFileOffset = 10;
-  // Keys to be written per WAL file
-  static const int kKeysPerWALFile = 1024;
-  // Size of the value
-  static const int kValueSize = 10;
-
-  // Create WAL files with values filled in
-  static void FillData(DBTest* test, Options& options, const size_t wal_count,
-                       size_t& count) {
-    DBOptions& db_options = options;
-
-    count = 0;
-
-    shared_ptr<Cache> table_cache = NewLRUCache(50000, 16);
-    EnvOptions env_options;
-    WriteBuffer write_buffer(db_options.db_write_buffer_size);
-
-    unique_ptr<VersionSet> versions;
-    unique_ptr<WalManager> wal_manager;
-    WriteController write_controller;
-
-    versions.reset(new VersionSet(test->dbname_, &db_options, env_options,
-                                  table_cache.get(), &write_buffer,
-                                  &write_controller));
-
-    wal_manager.reset(new WalManager(db_options, env_options));
-
-    std::unique_ptr<log::Writer> current_log_writer;
-
-    for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) {
-      uint64_t current_log_number = j;
-      std::string fname = LogFileName(test->dbname_, current_log_number);
-      unique_ptr<WritableFile> file;
-      ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options));
-      unique_ptr<WritableFileWriter> file_writer(
-          new WritableFileWriter(std::move(file), env_options));
-      current_log_writer.reset(
-          new log::Writer(std::move(file_writer), current_log_number,
-                          db_options.recycle_log_file_num > 0));
-
-      for (int i = 0; i < kKeysPerWALFile; i++) {
-        std::string key = "key" + ToString(count++);
-        std::string value = test->DummyString(kValueSize);
-        assert(current_log_writer.get() != nullptr);
-        uint64_t seq = versions->LastSequence() + 1;
-        WriteBatch batch;
-        batch.Put(key, value);
-        WriteBatchInternal::SetSequence(&batch, seq);
-        current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch));
-        versions->SetLastSequence(seq);
-      }
-    }
-  }
-
-  // Recreate and fill the store with some data
-  static size_t FillData(DBTest* test, Options& options) {
-    options.create_if_missing = true;
-    test->DestroyAndReopen(options);
-    test->Close();
-
-    size_t count = 0;
-    FillData(test, options, kWALFilesCount, count);
-    return count;
-  }
-
-  // Read back all the keys we wrote and return the number of keys found
-  static size_t GetData(DBTest* test) {
-    size_t count = 0;
-    for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) {
-      if (test->Get("key" + ToString(i)) != "NOT_FOUND") {
-        ++count;
-      }
-    }
-    return count;
-  }
-
-  // Manuall corrupt the specified WAL
-  static void CorruptWAL(DBTest* test, Options& options, const double off,
-                         const double len, const int wal_file_id,
-                         const bool trunc = false) {
-    Env* env = options.env;
-    std::string fname = LogFileName(test->dbname_, wal_file_id);
-    uint64_t size;
-    ASSERT_OK(env->GetFileSize(fname, &size));
-    ASSERT_GT(size, 0);
-#ifdef OS_WIN
-    // Windows disk cache behaves differently. When we truncate
-    // the original content is still in the cache due to the original
-    // handle is still open. Generally, in Windows, one prohibits
-    // shared access to files and it is not needed for WAL but we allow
-    // it to induce corruption at various tests.
-    test->Close();
-#endif
-    if (trunc) {
-      ASSERT_EQ(0, truncate(fname.c_str(),
-        static_cast<int64_t>(size * off)));
-    } else {
-      InduceCorruption(fname, static_cast<size_t>(size * off),
-        static_cast<size_t>(size * len));
-    }
-  }
-
-  // Overwrite data with 'a' from offset for length len
-  static void InduceCorruption(const std::string& filename, size_t offset,
-                               size_t len) {
-    ASSERT_GT(len, 0U);
-
-    int fd = open(filename.c_str(), O_RDWR);
-
-    ASSERT_GT(fd, 0);
-    ASSERT_EQ(offset, lseek(fd, static_cast<long>(offset), SEEK_SET));
-
-    void* buf = alloca(len);
-    memset(buf, 'a', len);
-    ASSERT_EQ(len, write(fd, buf, static_cast<unsigned int>(len)));
-
-    close(fd);
-  }
-};
-
-// Test scope:
-// - We expect to open the data store when there is incomplete trailing writes
-// at the end of any of the logs
-// - We do not expect to open the data store for corruption
-TEST_F(DBTest, kTolerateCorruptedTailRecords) {
-  const int jstart = RecoveryTestHelper::kWALFileOffset;
-  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
-
-  for (auto trunc : {true, false}) {        /* Corruption style */
-    for (int i = 0; i < 4; i++) {           /* Corruption offset position */
-      for (int j = jstart; j < jend; j++) { /* WAL file */
-        // Fill data for testing
-        Options options = CurrentOptions();
-        const size_t row_count = RecoveryTestHelper::FillData(this, options);
-        // test checksum failure or parsing
-        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
-                                       /*len%=*/.1, /*wal=*/j, trunc);
-
-        if (trunc) {
-          options.wal_recovery_mode =
-              WALRecoveryMode::kTolerateCorruptedTailRecords;
-          options.create_if_missing = false;
-          ASSERT_OK(TryReopen(options));
-          const size_t recovered_row_count = RecoveryTestHelper::GetData(this);
-          ASSERT_TRUE(i == 0 || recovered_row_count > 0);
-          ASSERT_LT(recovered_row_count, row_count);
-        } else {
-          options.wal_recovery_mode =
-              WALRecoveryMode::kTolerateCorruptedTailRecords;
-          ASSERT_NOK(TryReopen(options));
-        }
-      }
-    }
-  }
-}
-
-// Test scope:
-// We don't expect the data store to be opened if there is any corruption
-// (leading, middle or trailing -- incomplete writes or corruption)
-TEST_F(DBTest, kAbsoluteConsistency) {
-  const int jstart = RecoveryTestHelper::kWALFileOffset;
-  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
-
-  // Verify clean slate behavior
-  Options options = CurrentOptions();
-  const size_t row_count = RecoveryTestHelper::FillData(this, options);
-  options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
-  options.create_if_missing = false;
-  ASSERT_OK(TryReopen(options));
-  ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count);
-
-  for (auto trunc : {true, false}) { /* Corruption style */
-    for (int i = 0; i < 4; i++) {    /* Corruption offset position */
-      if (trunc && i == 0) {
-        continue;
-      }
-
-      for (int j = jstart; j < jend; j++) { /* wal files */
-        // fill with new date
-        RecoveryTestHelper::FillData(this, options);
-        // corrupt the wal
-        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
-                                       /*len%=*/.1, j, trunc);
-        // verify
-        options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
-        options.create_if_missing = false;
-        ASSERT_NOK(TryReopen(options));
-      }
-    }
-  }
-}
-
-// Test scope:
-// - We expect to open data store under all circumstances
-// - We expect only data upto the point where the first error was encountered
-TEST_F(DBTest, kPointInTimeRecovery) {
-  const int jstart = RecoveryTestHelper::kWALFileOffset;
-  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
-  const int maxkeys =
-      RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile;
-
-  for (auto trunc : {true, false}) {        /* Corruption style */
-    for (int i = 0; i < 4; i++) {           /* Offset of corruption */
-      for (int j = jstart; j < jend; j++) { /* WAL file */
-        // Fill data for testing
-        Options options = CurrentOptions();
-        const size_t row_count = RecoveryTestHelper::FillData(this, options);
-
-        // Corrupt the wal
-        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
-                                       /*len%=*/.1, j, trunc);
-
-        // Verify
-        options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
-        options.create_if_missing = false;
-        ASSERT_OK(TryReopen(options));
-
-        // Probe data for invariants
-        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
-        ASSERT_LT(recovered_row_count, row_count);
-
-        bool expect_data = true;
-        for (size_t k = 0; k < maxkeys; ++k) {
-          bool found = Get("key" + ToString(i)) != "NOT_FOUND";
-          if (expect_data && !found) {
-            expect_data = false;
-          }
-          ASSERT_EQ(found, expect_data);
-        }
-
-        const size_t min = RecoveryTestHelper::kKeysPerWALFile *
-                           (j - RecoveryTestHelper::kWALFileOffset);
-        ASSERT_GE(recovered_row_count, min);
-        if (!trunc && i != 0) {
-          const size_t max = RecoveryTestHelper::kKeysPerWALFile *
-                             (j - RecoveryTestHelper::kWALFileOffset + 1);
-          ASSERT_LE(recovered_row_count, max);
-        }
-      }
-    }
-  }
-}
-
-// Test scope:
-// - We expect to open the data store under all scenarios
-// - We expect to have recovered records past the corruption zone
-TEST_F(DBTest, kSkipAnyCorruptedRecords) {
-  const int jstart = RecoveryTestHelper::kWALFileOffset;
-  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
-
-  for (auto trunc : {true, false}) {        /* Corruption style */
-    for (int i = 0; i < 4; i++) {           /* Corruption offset */
-      for (int j = jstart; j < jend; j++) { /* wal files */
-        // Fill data for testing
-        Options options = CurrentOptions();
-        const size_t row_count = RecoveryTestHelper::FillData(this, options);
-
-        // Corrupt the WAL
-        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
-                                       /*len%=*/.1, j, trunc);
-
-        // Verify behavior
-        options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords;
-        options.create_if_missing = false;
-        ASSERT_OK(TryReopen(options));
-
-        // Probe data for invariants
-        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
-        ASSERT_LT(recovered_row_count, row_count);
-
-        if (!trunc) {
-          ASSERT_TRUE(i != 0 || recovered_row_count > 0);
-        }
-      }
-    }
-  }
-}
-
-// Multi-threaded test:
-namespace {
-
-static const int kColumnFamilies = 10;
-static const int kNumThreads = 10;
-static const int kTestSeconds = 10;
-static const int kNumKeys = 1000;
-
-struct MTState {
-  DBTest* test;
-  std::atomic<bool> stop;
-  std::atomic<int> counter[kNumThreads];
-  std::atomic<bool> thread_done[kNumThreads];
-};
-
-struct MTThread {
-  MTState* state;
-  int id;
-};
-
-static void MTThreadBody(void* arg) {
-  MTThread* t = reinterpret_cast<MTThread*>(arg);
-  int id = t->id;
-  DB* db = t->state->test->db_;
-  int counter = 0;
-  fprintf(stderr, "... starting thread %d\n", id);
-  Random rnd(1000 + id);
-  char valbuf[1500];
-  while (t->state->stop.load(std::memory_order_acquire) == false) {
-    t->state->counter[id].store(counter, std::memory_order_release);
-
-    int key = rnd.Uniform(kNumKeys);
-    char keybuf[20];
-    snprintf(keybuf, sizeof(keybuf), "%016d", key);
-
-    if (rnd.OneIn(2)) {
-      // Write values of the form <key, my id, counter, cf, unique_id>.
-      // into each of the CFs
-      // We add some padding for force compactions.
-      int unique_id = rnd.Uniform(1000000);
-
-      // Half of the time directly use WriteBatch. Half of the time use
-      // WriteBatchWithIndex.
-      if (rnd.OneIn(2)) {
-        WriteBatch batch;
-        for (int cf = 0; cf < kColumnFamilies; ++cf) {
-          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
-                   static_cast<int>(counter), cf, unique_id);
-          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
-        }
-        ASSERT_OK(db->Write(WriteOptions(), &batch));
-      } else {
-        WriteBatchWithIndex batch(db->GetOptions().comparator);
-        for (int cf = 0; cf < kColumnFamilies; ++cf) {
-          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
-                   static_cast<int>(counter), cf, unique_id);
-          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
-        }
-        ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
-      }
-    } else {
-      // Read a value and verify that it matches the pattern written above
-      // and that writes to all column families were atomic (unique_id is the
-      // same)
-      std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
-      std::vector<std::string> values;
-      std::vector<Status> statuses =
-          db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values);
-      Status s = statuses[0];
-      // all statuses have to be the same
-      for (size_t i = 1; i < statuses.size(); ++i) {
-        // they are either both ok or both not-found
-        ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
-                    (s.IsNotFound() && statuses[i].IsNotFound()));
-      }
-      if (s.IsNotFound()) {
-        // Key has not yet been written
-      } else {
-        // Check that the writer thread counter is >= the counter in the value
-        ASSERT_OK(s);
-        int unique_id = -1;
-        for (int i = 0; i < kColumnFamilies; ++i) {
-          int k, w, c, cf, u;
-          ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w,
-                              &c, &cf, &u))
-              << values[i];
-          ASSERT_EQ(k, key);
-          ASSERT_GE(w, 0);
-          ASSERT_LT(w, kNumThreads);
-          ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
-          ASSERT_EQ(cf, i);
-          if (i == 0) {
-            unique_id = u;
-          } else {
-            // this checks that updates across column families happened
-            // atomically -- all unique ids are the same
-            ASSERT_EQ(u, unique_id);
-          }
-        }
-      }
-    }
-    counter++;
-  }
-  t->state->thread_done[id].store(true, std::memory_order_release);
-  fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
-}
-
-}  // namespace
-
-class MultiThreadedDBTest : public DBTest,
-                            public ::testing::WithParamInterface<int> {
- public:
-  virtual void SetUp() override { option_config_ = GetParam(); }
-
-  static std::vector<int> GenerateOptionConfigs() {
-    std::vector<int> optionConfigs;
-    for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
-      // skip as HashCuckooRep does not support snapshot
-      if (optionConfig != kHashCuckoo) {
-        optionConfigs.push_back(optionConfig);
-      }
-    }
-    return optionConfigs;
-  }
-};
-
-TEST_P(MultiThreadedDBTest, MultiThreaded) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  std::vector<std::string> cfs;
-  for (int i = 1; i < kColumnFamilies; ++i) {
-    cfs.push_back(ToString(i));
-  }
-  CreateAndReopenWithCF(cfs, CurrentOptions(options_override));
-  // Initialize state
-  MTState mt;
-  mt.test = this;
-  mt.stop.store(false, std::memory_order_release);
-  for (int id = 0; id < kNumThreads; id++) {
-    mt.counter[id].store(0, std::memory_order_release);
-    mt.thread_done[id].store(false, std::memory_order_release);
-  }
-
-  // Start threads
-  MTThread thread[kNumThreads];
-  for (int id = 0; id < kNumThreads; id++) {
-    thread[id].state = &mt;
-    thread[id].id = id;
-    env_->StartThread(MTThreadBody, &thread[id]);
-  }
-
-  // Let them run for a while
-  env_->SleepForMicroseconds(kTestSeconds * 1000000);
-
-  // Stop the threads and wait for them to finish
-  mt.stop.store(true, std::memory_order_release);
-  for (int id = 0; id < kNumThreads; id++) {
-    while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
-      env_->SleepForMicroseconds(100000);
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(
-    MultiThreaded, MultiThreadedDBTest,
-    ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()));
-#endif  // ROCKSDB_LITE
-
-// Group commit test:
-namespace {
-
-static const int kGCNumThreads = 4;
-static const int kGCNumKeys = 1000;
-
-struct GCThread {
-  DB* db;
-  int id;
-  std::atomic<bool> done;
-};
-
-static void GCThreadBody(void* arg) {
-  GCThread* t = reinterpret_cast<GCThread*>(arg);
-  int id = t->id;
-  DB* db = t->db;
-  WriteOptions wo;
-
-  for (int i = 0; i < kGCNumKeys; ++i) {
-    std::string kv(ToString(i + id * kGCNumKeys));
-    ASSERT_OK(db->Put(wo, kv, kv));
-  }
-  t->done = true;
-}
-
-}  // namespace
-
-TEST_F(DBTest, GroupCommitTest) {
-  do {
-    Options options = CurrentOptions();
-    options.env = env_;
-    env_->log_write_slowdown_.store(100);
-    options.statistics = rocksdb::CreateDBStatistics();
-    Reopen(options);
-
-    // Start threads
-    GCThread thread[kGCNumThreads];
-    for (int id = 0; id < kGCNumThreads; id++) {
-      thread[id].id = id;
-      thread[id].db = db_;
-      thread[id].done = false;
-      env_->StartThread(GCThreadBody, &thread[id]);
-    }
-
-    for (int id = 0; id < kGCNumThreads; id++) {
-      while (thread[id].done == false) {
-        env_->SleepForMicroseconds(100000);
-      }
-    }
-    env_->log_write_slowdown_.store(0);
-
-    ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
-
-    std::vector<std::string> expected_db;
-    for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
-      expected_db.push_back(ToString(i));
-    }
-    sort(expected_db.begin(), expected_db.end());
-
-    Iterator* itr = db_->NewIterator(ReadOptions());
-    itr->SeekToFirst();
-    for (auto x : expected_db) {
-      ASSERT_TRUE(itr->Valid());
-      ASSERT_EQ(itr->key().ToString(), x);
-      ASSERT_EQ(itr->value().ToString(), x);
-      itr->Next();
-    }
-    ASSERT_TRUE(!itr->Valid());
-    delete itr;
-
-    HistogramData hist_data = {0, 0, 0, 0, 0};
-    options.statistics->histogramData(DB_WRITE, &hist_data);
-    ASSERT_GT(hist_data.average, 0.0);
-  } while (ChangeOptions(kSkipNoSeekToLast));
-}
-
-namespace {
-typedef std::map<std::string, std::string> KVMap;
-}
-
-class ModelDB: public DB {
- public:
-  class ModelSnapshot : public Snapshot {
-   public:
-    KVMap map_;
-
-    virtual SequenceNumber GetSequenceNumber() const override {
-      // no need to call this
-      assert(false);
-      return 0;
-    }
-  };
-
-  explicit ModelDB(const Options& options) : options_(options) {}
-  using DB::Put;
-  virtual Status Put(const WriteOptions& o, ColumnFamilyHandle* cf,
-                     const Slice& k, const Slice& v) override {
-    WriteBatch batch;
-    batch.Put(cf, k, v);
-    return Write(o, &batch);
-  }
-  using DB::Delete;
-  virtual Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
-                        const Slice& key) override {
-    WriteBatch batch;
-    batch.Delete(cf, key);
-    return Write(o, &batch);
-  }
-  using DB::SingleDelete;
-  virtual Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
-                              const Slice& key) override {
-    WriteBatch batch;
-    batch.SingleDelete(cf, key);
-    return Write(o, &batch);
-  }
-  using DB::Merge;
-  virtual Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf,
-                       const Slice& k, const Slice& v) override {
-    WriteBatch batch;
-    batch.Merge(cf, k, v);
-    return Write(o, &batch);
-  }
-  using DB::Get;
-  virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* cf,
-                     const Slice& key, std::string* value) override {
-    return Status::NotSupported(key);
-  }
-
-  using DB::MultiGet;
-  virtual std::vector<Status> MultiGet(
-      const ReadOptions& options,
-      const std::vector<ColumnFamilyHandle*>& column_family,
-      const std::vector<Slice>& keys,
-      std::vector<std::string>* values) override {
-    std::vector<Status> s(keys.size(),
-                          Status::NotSupported("Not implemented."));
-    return s;
-  }
-
-#ifndef ROCKSDB_LITE
-  using DB::AddFile;
-  virtual Status AddFile(ColumnFamilyHandle* column_family,
-                         const ExternalSstFileInfo* file_path,
-                         bool move_file) override {
-    return Status::NotSupported("Not implemented.");
-  }
-  virtual Status AddFile(ColumnFamilyHandle* column_family,
-                         const std::string& file_path,
-                         bool move_file) override {
-    return Status::NotSupported("Not implemented.");
-  }
-
-  using DB::GetPropertiesOfAllTables;
-  virtual Status GetPropertiesOfAllTables(
-      ColumnFamilyHandle* column_family,
-      TablePropertiesCollection* props) override {
-    return Status();
-  }
-
-  virtual Status GetPropertiesOfTablesInRange(
-      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
-      TablePropertiesCollection* props) override {
-    return Status();
-  }
-#endif  // ROCKSDB_LITE
-
-  using DB::KeyMayExist;
-  virtual bool KeyMayExist(const ReadOptions& options,
-                           ColumnFamilyHandle* column_family, const Slice& key,
-                           std::string* value,
-                           bool* value_found = nullptr) override {
-    if (value_found != nullptr) {
-      *value_found = false;
-    }
-    return true; // Not Supported directly
-  }
-  using DB::NewIterator;
-  virtual Iterator* NewIterator(const ReadOptions& options,
-                                ColumnFamilyHandle* column_family) override {
-    if (options.snapshot == nullptr) {
-      KVMap* saved = new KVMap;
-      *saved = map_;
-      return new ModelIter(saved, true);
-    } else {
-      const KVMap* snapshot_state =
-          &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
-      return new ModelIter(snapshot_state, false);
-    }
-  }
-  virtual Status NewIterators(
-      const ReadOptions& options,
-      const std::vector<ColumnFamilyHandle*>& column_family,
-      std::vector<Iterator*>* iterators) override {
-    return Status::NotSupported("Not supported yet");
-  }
-  virtual const Snapshot* GetSnapshot() override {
-    ModelSnapshot* snapshot = new ModelSnapshot;
-    snapshot->map_ = map_;
-    return snapshot;
-  }
-
-  virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
-    delete reinterpret_cast<const ModelSnapshot*>(snapshot);
-  }
-
-  virtual Status Write(const WriteOptions& options,
-                       WriteBatch* batch) override {
-    class Handler : public WriteBatch::Handler {
-     public:
-      KVMap* map_;
-      virtual void Put(const Slice& key, const Slice& value) override {
-        (*map_)[key.ToString()] = value.ToString();
-      }
-      virtual void Merge(const Slice& key, const Slice& value) override {
-        // ignore merge for now
-        //(*map_)[key.ToString()] = value.ToString();
-      }
-      virtual void Delete(const Slice& key) override {
-        map_->erase(key.ToString());
-      }
-    };
-    Handler handler;
-    handler.map_ = &map_;
-    return batch->Iterate(&handler);
-  }
-
-  using DB::GetProperty;
-  virtual bool GetProperty(ColumnFamilyHandle* column_family,
-                           const Slice& property, std::string* value) override {
-    return false;
-  }
-  using DB::GetIntProperty;
-  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
-                              const Slice& property, uint64_t* value) override {
-    return false;
-  }
-  using DB::GetAggregatedIntProperty;
-  virtual bool GetAggregatedIntProperty(const Slice& property,
-                                        uint64_t* value) override {
-    return false;
-  }
-  using DB::GetApproximateSizes;
-  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
-                                   const Range* range, int n, uint64_t* sizes,
-                                   bool include_memtable) override {
-    for (int i = 0; i < n; i++) {
-      sizes[i] = 0;
-    }
-  }
-  using DB::CompactRange;
-  virtual Status CompactRange(const CompactRangeOptions& options,
-                              ColumnFamilyHandle* column_family,
-                              const Slice* start, const Slice* end) override {
-    return Status::NotSupported("Not supported operation.");
-  }
-
-  using DB::CompactFiles;
-  virtual Status CompactFiles(
-      const CompactionOptions& compact_options,
-      ColumnFamilyHandle* column_family,
-      const std::vector<std::string>& input_file_names,
-      const int output_level, const int output_path_id = -1) override {
-    return Status::NotSupported("Not supported operation.");
-  }
-
-  Status PauseBackgroundWork() override {
-    return Status::NotSupported("Not supported operation.");
-  }
-
-  Status ContinueBackgroundWork() override {
-    return Status::NotSupported("Not supported operation.");
-  }
-
-  Status EnableAutoCompaction(
-      const std::vector<ColumnFamilyHandle*>& column_family_handles) override {
-    return Status::NotSupported("Not supported operation.");
-  }
-
-  using DB::NumberLevels;
-  virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
-    return 1;
-  }
-
-  using DB::MaxMemCompactionLevel;
-  virtual int MaxMemCompactionLevel(
-      ColumnFamilyHandle* column_family) override {
-    return 1;
-  }
-
-  using DB::Level0StopWriteTrigger;
-  virtual int Level0StopWriteTrigger(
-      ColumnFamilyHandle* column_family) override {
-    return -1;
-  }
-
-  virtual const std::string& GetName() const override { return name_; }
-
-  virtual Env* GetEnv() const override { return nullptr; }
-
-  using DB::GetOptions;
-  virtual const Options& GetOptions(
-      ColumnFamilyHandle* column_family) const override {
-    return options_;
-  }
-
-  using DB::GetDBOptions;
-  virtual const DBOptions& GetDBOptions() const override { return options_; }
-
-  using DB::Flush;
-  virtual Status Flush(const rocksdb::FlushOptions& options,
-                       ColumnFamilyHandle* column_family) override {
-    Status ret;
-    return ret;
-  }
-
-  virtual Status SyncWAL() override {
-    return Status::OK();
-  }
-
-#ifndef ROCKSDB_LITE
-  virtual Status DisableFileDeletions() override { return Status::OK(); }
-
-  virtual Status EnableFileDeletions(bool force) override {
-    return Status::OK();
-  }
-  virtual Status GetLiveFiles(std::vector<std::string>&, uint64_t* size,
-                              bool flush_memtable = true) override {
-    return Status::OK();
-  }
-
-  virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
-    return Status::OK();
-  }
-
-  virtual Status DeleteFile(std::string name) override { return Status::OK(); }
-
-  virtual Status GetUpdatesSince(
-      rocksdb::SequenceNumber, unique_ptr<rocksdb::TransactionLogIterator>*,
-      const TransactionLogIterator::ReadOptions&
-          read_options = TransactionLogIterator::ReadOptions()) override {
-    return Status::NotSupported("Not supported in Model DB");
-  }
-
-  virtual void GetColumnFamilyMetaData(
-      ColumnFamilyHandle* column_family,
-      ColumnFamilyMetaData* metadata) override {}
-#endif  // ROCKSDB_LITE
-
-  virtual Status GetDbIdentity(std::string& identity) const override {
-    return Status::OK();
-  }
-
-  virtual SequenceNumber GetLatestSequenceNumber() const override { return 0; }
-
-  virtual ColumnFamilyHandle* DefaultColumnFamily() const override {
-    return nullptr;
-  }
-
- private:
-  class ModelIter: public Iterator {
-   public:
-    ModelIter(const KVMap* map, bool owned)
-        : map_(map), owned_(owned), iter_(map_->end()) {
-    }
-    ~ModelIter() {
-      if (owned_) delete map_;
-    }
-    virtual bool Valid() const override { return iter_ != map_->end(); }
-    virtual void SeekToFirst() override { iter_ = map_->begin(); }
-    virtual void SeekToLast() override {
-      if (map_->empty()) {
-        iter_ = map_->end();
-      } else {
-        iter_ = map_->find(map_->rbegin()->first);
-      }
-    }
-    virtual void Seek(const Slice& k) override {
-      iter_ = map_->lower_bound(k.ToString());
-    }
-    virtual void Next() override { ++iter_; }
-    virtual void Prev() override {
-      if (iter_ == map_->begin()) {
-        iter_ = map_->end();
-        return;
-      }
-      --iter_;
-    }
-
-    virtual Slice key() const override { return iter_->first; }
-    virtual Slice value() const override { return iter_->second; }
-    virtual Status status() const override { return Status::OK(); }
-
-   private:
-    const KVMap* const map_;
-    const bool owned_;  // Do we own map_
-    KVMap::const_iterator iter_;
-  };
-  const Options options_;
-  KVMap map_;
-  std::string name_ = "";
-};
-
-static std::string RandomKey(Random* rnd, int minimum = 0) {
-  int len;
-  do {
-    len = (rnd->OneIn(3)
-           ? 1                // Short sometimes to encourage collisions
-           : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
-  } while (len < minimum);
-  return test::RandomKey(rnd, len);
-}
-
-static bool CompareIterators(int step,
-                             DB* model,
-                             DB* db,
-                             const Snapshot* model_snap,
-                             const Snapshot* db_snap) {
-  ReadOptions options;
-  options.snapshot = model_snap;
-  Iterator* miter = model->NewIterator(options);
-  options.snapshot = db_snap;
-  Iterator* dbiter = db->NewIterator(options);
-  bool ok = true;
-  int count = 0;
-  for (miter->SeekToFirst(), dbiter->SeekToFirst();
-       ok && miter->Valid() && dbiter->Valid();
-       miter->Next(), dbiter->Next()) {
-    count++;
-    if (miter->key().compare(dbiter->key()) != 0) {
-      fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
-              step,
-              EscapeString(miter->key()).c_str(),
-              EscapeString(dbiter->key()).c_str());
-      ok = false;
-      break;
-    }
-
-    if (miter->value().compare(dbiter->value()) != 0) {
-      fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
-              step,
-              EscapeString(miter->key()).c_str(),
-              EscapeString(miter->value()).c_str(),
-              EscapeString(miter->value()).c_str());
-      ok = false;
-    }
-  }
-
-  if (ok) {
-    if (miter->Valid() != dbiter->Valid()) {
-      fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
-              step, miter->Valid(), dbiter->Valid());
-      ok = false;
-    }
-  }
-  delete miter;
-  delete dbiter;
-  return ok;
-}
-
-class DBTestRandomized : public DBTest,
-                         public ::testing::WithParamInterface<int> {
- public:
-  virtual void SetUp() override { option_config_ = GetParam(); }
-
-  static std::vector<int> GenerateOptionConfigs() {
-    std::vector<int> option_configs;
-    // skip cuckoo hash as it does not support snapshot.
-    for (int option_config = kDefault; option_config < kEnd; ++option_config) {
-      if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst |
-                                                kSkipNoSeekToLast |
-                                                kSkipHashCuckoo)) {
-        option_configs.push_back(option_config);
-      }
-    }
-    option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
-    return option_configs;
-  }
-};
-
-INSTANTIATE_TEST_CASE_P(
-    DBTestRandomized, DBTestRandomized,
-    ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
-
-TEST_P(DBTestRandomized, Randomized) {
-  anon::OptionsOverride options_override;
-  options_override.skip_policy = kSkipNoSnapshot;
-  Options options = CurrentOptions(options_override);
-  DestroyAndReopen(options);
-
-  Random rnd(test::RandomSeed() + GetParam());
-  ModelDB model(options);
-    const int N = 10000;
-    const Snapshot* model_snap = nullptr;
-    const Snapshot* db_snap = nullptr;
-    std::string k, v;
-    for (int step = 0; step < N; step++) {
-      // TODO(sanjay): Test Get() works
-      int p = rnd.Uniform(100);
-      int minimum = 0;
-      if (option_config_ == kHashSkipList ||
-          option_config_ == kHashLinkList ||
-          option_config_ == kHashCuckoo ||
-          option_config_ == kPlainTableFirstBytePrefix ||
-          option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
-          option_config_ == kBlockBasedTableWithPrefixHashIndex) {
-        minimum = 1;
-      }
-      if (p < 45) {                               // Put
-        k = RandomKey(&rnd, minimum);
-        v = RandomString(&rnd,
-                         rnd.OneIn(20)
-                         ? 100 + rnd.Uniform(100)
-                         : rnd.Uniform(8));
-        ASSERT_OK(model.Put(WriteOptions(), k, v));
-        ASSERT_OK(db_->Put(WriteOptions(), k, v));
-      } else if (p < 90) {                        // Delete
-        k = RandomKey(&rnd, minimum);
-        ASSERT_OK(model.Delete(WriteOptions(), k));
-        ASSERT_OK(db_->Delete(WriteOptions(), k));
-      } else {                                    // Multi-element batch
-        WriteBatch b;
-        const int num = rnd.Uniform(8);
-        for (int i = 0; i < num; i++) {
-          if (i == 0 || !rnd.OneIn(10)) {
-            k = RandomKey(&rnd, minimum);
-          } else {
-            // Periodically re-use the same key from the previous iter, so
-            // we have multiple entries in the write batch for the same key
-          }
-          if (rnd.OneIn(2)) {
-            v = RandomString(&rnd, rnd.Uniform(10));
-            b.Put(k, v);
-          } else {
-            b.Delete(k);
-          }
-        }
-        ASSERT_OK(model.Write(WriteOptions(), &b));
-        ASSERT_OK(db_->Write(WriteOptions(), &b));
-      }
-
-      if ((step % 100) == 0) {
-        // For DB instances that use the hash index + block-based table, the
-        // iterator will be invalid right when seeking a non-existent key, right
-        // than return a key that is close to it.
-        if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
-            option_config_ != kBlockBasedTableWithPrefixHashIndex) {
-          ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
-          ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
-        }
-
-        // Save a snapshot from each DB this time that we'll use next
-        // time we compare things, to make sure the current state is
-        // preserved with the snapshot
-        if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
-        if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
-
-        Reopen(options);
-        ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
-
-        model_snap = model.GetSnapshot();
-        db_snap = db_->GetSnapshot();
-      }
-    }
-    if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
-    if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
-}
-
-TEST_F(DBTest, MultiGetSimple) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    ASSERT_OK(Put(1, "k1", "v1"));
-    ASSERT_OK(Put(1, "k2", "v2"));
-    ASSERT_OK(Put(1, "k3", "v3"));
-    ASSERT_OK(Put(1, "k4", "v4"));
-    ASSERT_OK(Delete(1, "k4"));
-    ASSERT_OK(Put(1, "k5", "v5"));
-    ASSERT_OK(Delete(1, "no_key"));
-
-    std::vector<Slice> keys({"k1", "k2", "k3", "k4", "k5", "no_key"});
-
-    std::vector<std::string> values(20, "Temporary data to be overwritten");
-    std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
-
-    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
-    ASSERT_EQ(values.size(), keys.size());
-    ASSERT_EQ(values[0], "v1");
-    ASSERT_EQ(values[1], "v2");
-    ASSERT_EQ(values[2], "v3");
-    ASSERT_EQ(values[4], "v5");
-
-    ASSERT_OK(s[0]);
-    ASSERT_OK(s[1]);
-    ASSERT_OK(s[2]);
-    ASSERT_TRUE(s[3].IsNotFound());
-    ASSERT_OK(s[4]);
-    ASSERT_TRUE(s[5].IsNotFound());
-  } while (ChangeCompactOptions());
-}
-
-TEST_F(DBTest, MultiGetEmpty) {
-  do {
-    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
-    // Empty Key Set
-    std::vector<Slice> keys;
-    std::vector<std::string> values;
-    std::vector<ColumnFamilyHandle*> cfs;
-    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
-    ASSERT_EQ(s.size(), 0U);
-
-    // Empty Database, Empty Key Set
-    Options options = CurrentOptions();
-    options.create_if_missing = true;
-    DestroyAndReopen(options);
-    CreateAndReopenWithCF({"pikachu"}, options);
-    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
-    ASSERT_EQ(s.size(), 0U);
-
-    // Empty Database, Search for Keys
-    keys.resize(2);
-    keys[0] = "a";
-    keys[1] = "b";
-    cfs.push_back(handles_[0]);
-    cfs.push_back(handles_[1]);
-    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
-    ASSERT_EQ((int)s.size(), 2);
-    ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound());
-  } while (ChangeCompactOptions());
-}
-
-#ifndef ROCKSDB_LITE
-namespace {
-void PrefixScanInit(DBTest *dbtest) {
-  char buf[100];
-  std::string keystr;
-  const int small_range_sstfiles = 5;
-  const int big_range_sstfiles = 5;
-
-  // Generate 11 sst files with the following prefix ranges.
-  // GROUP 0: [0,10]                              (level 1)
-  // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6]  (level 0)
-  // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10]  (level 0)
-  //
-  // A seek with the previous API would do 11 random I/Os (to all the
-  // files).  With the new API and a prefix filter enabled, we should
-  // only do 2 random I/O, to the 2 files containing the key.
-
-  // GROUP 0
-  snprintf(buf, sizeof(buf), "%02d______:start", 0);
-  keystr = std::string(buf);
-  ASSERT_OK(dbtest->Put(keystr, keystr));
-  snprintf(buf, sizeof(buf), "%02d______:end", 10);
-  keystr = std::string(buf);
-  ASSERT_OK(dbtest->Put(keystr, keystr));
-  dbtest->Flush();
-  dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr,
-                                 nullptr);  // move to level 1
-
-  // GROUP 1
-  for (int i = 1; i <= small_range_sstfiles; i++) {
-    snprintf(buf, sizeof(buf), "%02d______:start", i);
-    keystr = std::string(buf);
-    ASSERT_OK(dbtest->Put(keystr, keystr));
-    snprintf(buf, sizeof(buf), "%02d______:end", i+1);
-    keystr = std::string(buf);
-    ASSERT_OK(dbtest->Put(keystr, keystr));
-    dbtest->Flush();
-  }
-
-  // GROUP 2
-  for (int i = 1; i <= big_range_sstfiles; i++) {
-    snprintf(buf, sizeof(buf), "%02d______:start", 0);
-    keystr = std::string(buf);
-    ASSERT_OK(dbtest->Put(keystr, keystr));
-    snprintf(buf, sizeof(buf), "%02d______:end",
-             small_range_sstfiles+i+1);
-    keystr = std::string(buf);
-    ASSERT_OK(dbtest->Put(keystr, keystr));
-    dbtest->Flush();
-  }
-}
-}  // namespace
-
-TEST_F(DBTest, PrefixScan) {
-  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip,
-             kSkipNoPrefix);
-  while (ChangeFilterOptions()) {
-    int count;
-    Slice prefix;
-    Slice key;
-    char buf[100];
-    Iterator* iter;
-    snprintf(buf, sizeof(buf), "03______:");
-    prefix = Slice(buf, 8);
-    key = Slice(buf, 9);
-    ASSERT_EQ(key.difference_offset(prefix), 8);
-    ASSERT_EQ(prefix.difference_offset(key), 8);
-    // db configs
-    env_->count_random_reads_ = true;
-    Options options = CurrentOptions();
-    options.env = env_;
-    options.prefix_extractor.reset(NewFixedPrefixTransform(8));
-    options.disable_auto_compactions = true;
-    options.max_background_compactions = 2;
-    options.create_if_missing = true;
-    options.memtable_factory.reset(NewHashSkipListRepFactory(16));
-
-    BlockBasedTableOptions table_options;
-    table_options.no_block_cache = true;
-    table_options.filter_policy.reset(NewBloomFilterPolicy(10));
-    table_options.whole_key_filtering = false;
-    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-
-    // 11 RAND I/Os
-    DestroyAndReopen(options);
-    PrefixScanInit(this);
-    count = 0;
-    env_->random_read_counter_.Reset();
-    iter = db_->NewIterator(ReadOptions());
-    for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
-      if (! iter->key().starts_with(prefix)) {
-        break;
-      }
-      count++;
-    }
-    ASSERT_OK(iter->status());
-    delete iter;
-    ASSERT_EQ(count, 2);
-    ASSERT_EQ(env_->random_read_counter_.Read(), 2);
-    Close();
-  }  // end of while
-  XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0);
-}
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
-  // create a DB with block prefix index
-  BlockBasedTableOptions table_options;
-  Options options = CurrentOptions();
-  table_options.index_type = BlockBasedTableOptions::kHashSearch;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-
-
-  Reopen(options);
-  ASSERT_OK(Put("k1", "v1"));
-  Flush();
-  ASSERT_OK(Put("k2", "v2"));
-
-  // Reopen it without prefix extractor, make sure everything still works.
-  // RocksDB should just fall back to the binary index.
-  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.prefix_extractor.reset();
-
-  Reopen(options);
-  ASSERT_EQ("v1", Get("k1"));
-  ASSERT_EQ("v2", Get("k2"));
-}
-
-TEST_F(DBTest, ChecksumTest) {
-  BlockBasedTableOptions table_options;
-  Options options = CurrentOptions();
-
-  table_options.checksum = kCRC32c;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  Reopen(options);
-  ASSERT_OK(Put("a", "b"));
-  ASSERT_OK(Put("c", "d"));
-  ASSERT_OK(Flush());  // table with crc checksum
-
-  table_options.checksum = kxxHash;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  Reopen(options);
-  ASSERT_OK(Put("e", "f"));
-  ASSERT_OK(Put("g", "h"));
-  ASSERT_OK(Flush());  // table with xxhash checksum
-
-  table_options.checksum = kCRC32c;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  Reopen(options);
-  ASSERT_EQ("b", Get("a"));
-  ASSERT_EQ("d", Get("c"));
-  ASSERT_EQ("f", Get("e"));
-  ASSERT_EQ("h", Get("g"));
-
-  table_options.checksum = kCRC32c;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  Reopen(options);
-  ASSERT_EQ("b", Get("a"));
-  ASSERT_EQ("d", Get("c"));
-  ASSERT_EQ("f", Get("e"));
-  ASSERT_EQ("h", Get("g"));
-}
-
-#ifndef ROCKSDB_LITE
-TEST_P(DBTestWithParam, FIFOCompactionTest) {
-  for (int iter = 0; iter < 2; ++iter) {
-    // first iteration -- auto compaction
-    // second iteration -- manual compaction
-    Options options;
-    options.compaction_style = kCompactionStyleFIFO;
-    options.write_buffer_size = 100 << 10;                             // 100KB
-    options.arena_block_size = 4096;
-    options.compaction_options_fifo.max_table_files_size = 500 << 10;  // 500KB
-    options.compression = kNoCompression;
-    options.create_if_missing = true;
-    options.max_subcompactions = max_subcompactions_;
-    if (iter == 1) {
-      options.disable_auto_compactions = true;
-    }
-    options = CurrentOptions(options);
-    DestroyAndReopen(options);
-
-    Random rnd(301);
-    for (int i = 0; i < 6; ++i) {
-      for (int j = 0; j < 110; ++j) {
-        ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 980)));
-      }
-      // flush should happen here
-      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
-    }
-    if (iter == 0) {
-      ASSERT_OK(dbfull()->TEST_WaitForCompact());
-    } else {
-      CompactRangeOptions cro;
-      cro.exclusive_manual_compaction = exclusive_manual_compaction_;
-      ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
-    }
-    // only 5 files should survive
-    ASSERT_EQ(NumTableFilesAtLevel(0), 5);
-    for (int i = 0; i < 50; ++i) {
-      // these keys should be deleted in previous compaction
-      ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
-    }
-  }
-}
-#endif  // ROCKSDB_LITE
-
-// verify that we correctly deprecated timeout_hint_us
-TEST_F(DBTest, SimpleWriteTimeoutTest) {
-  WriteOptions write_opt;
-  write_opt.timeout_hint_us = 0;
-  ASSERT_OK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
-  write_opt.timeout_hint_us = 10;
-  ASSERT_NOK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
-}
-
-#ifndef ROCKSDB_LITE
-/*
- * This test is not reliable enough as it heavily depends on disk behavior.
- */
-TEST_F(DBTest, RateLimitingTest) {
-  Options options = CurrentOptions();
-  options.write_buffer_size = 1 << 20;         // 1MB
-  options.level0_file_num_compaction_trigger = 2;
-  options.target_file_size_base = 1 << 20;     // 1MB
-  options.max_bytes_for_level_base = 4 << 20;  // 4MB
-  options.max_bytes_for_level_multiplier = 4;
-  options.compression = kNoCompression;
-  options.create_if_missing = true;
-  options.env = env_;
-  options.IncreaseParallelism(4);
-  DestroyAndReopen(options);
-
-  WriteOptions wo;
-  wo.disableWAL = true;
-
-  // # no rate limiting
-  Random rnd(301);
-  uint64_t start = env_->NowMicros();
-  // Write ~96M data
-  for (int64_t i = 0; i < (96 << 10); ++i) {
-    ASSERT_OK(Put(RandomString(&rnd, 32),
-                  RandomString(&rnd, (1 << 10) + 1), wo));
-  }
-  uint64_t elapsed = env_->NowMicros() - start;
-  double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed;
-  Close();
-
-  // # rate limiting with 0.7 x threshold
-  options.rate_limiter.reset(
-    NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
-  env_->bytes_written_ = 0;
-  DestroyAndReopen(options);
-
-  start = env_->NowMicros();
-  // Write ~96M data
-  for (int64_t i = 0; i < (96 << 10); ++i) {
-    ASSERT_OK(Put(RandomString(&rnd, 32),
-                  RandomString(&rnd, (1 << 10) + 1), wo));
-  }
-  elapsed = env_->NowMicros() - start;
-  Close();
-  ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
-  double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
-  fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
-  ASSERT_TRUE(ratio < 0.8);
-
-  // # rate limiting with half of the raw_rate
-  options.rate_limiter.reset(
-    NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
-  env_->bytes_written_ = 0;
-  DestroyAndReopen(options);
-
-  start = env_->NowMicros();
-  // Write ~96M data
-  for (int64_t i = 0; i < (96 << 10); ++i) {
-    ASSERT_OK(Put(RandomString(&rnd, 32),
-                  RandomString(&rnd, (1 << 10) + 1), wo));
-  }
-  elapsed = env_->NowMicros() - start;
-  Close();
-  ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
-  ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
-  fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
-  ASSERT_LT(ratio, 0.6);
-}
-
-TEST_F(DBTest, TableOptionsSanitizeTest) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  DestroyAndReopen(options);
-  ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
-
-  options.table_factory.reset(new PlainTableFactory());
-  options.prefix_extractor.reset(NewNoopTransform());
-  Destroy(options);
-  ASSERT_TRUE(!TryReopen(options).IsNotSupported());
-
-  // Test for check of prefix_extractor when hash index is used for
-  // block-based table
-  BlockBasedTableOptions to;
-  to.index_type = BlockBasedTableOptions::kHashSearch;
-  options = CurrentOptions();
-  options.create_if_missing = true;
-  options.table_factory.reset(NewBlockBasedTableFactory(to));
-  ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-  ASSERT_OK(TryReopen(options));
-}
-
-TEST_F(DBTest, MmapAndBufferOptions) {
-  Options options = CurrentOptions();
-
-  // If allow_mmap_reads is on allow_os_buffer must also be on
-  // On Windows you can have either memory mapped file or a file
-  // with unbuffered access.
-#ifndef OS_WIN
-  options.allow_os_buffer = false;
-#endif
-  options.allow_mmap_reads = true;
-  ASSERT_NOK(TryReopen(options));
-
-  // All other combinations are acceptable
-  options.allow_os_buffer = true;
-  ASSERT_OK(TryReopen(options));
-
-  options.allow_os_buffer = false;
-  options.allow_mmap_reads = false;
-  ASSERT_OK(TryReopen(options));
-
-  options.allow_os_buffer = true;
-  ASSERT_OK(TryReopen(options));
-}
-
-TEST_F(DBTest, ConcurrentMemtableNotSupported) {
-  Options options = CurrentOptions();
-  options.allow_concurrent_memtable_write = true;
-  options.soft_pending_compaction_bytes_limit = 0;
-  options.hard_pending_compaction_bytes_limit = 100;
-  options.create_if_missing = true;
-
-  DestroyDB(dbname_, options);
-  options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
-  ASSERT_NOK(TryReopen(options));
-
-  options.memtable_factory.reset(new SkipListFactory);
-  ASSERT_OK(TryReopen(options));
-
-  ColumnFamilyOptions cf_options(options);
-  cf_options.memtable_factory.reset(
-      NewHashLinkListRepFactory(4, 0, 3, true, 4));
-  ColumnFamilyHandle* handle;
-  ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
-}
-
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, SanitizeNumThreads) {
-  for (int attempt = 0; attempt < 2; attempt++) {
-    const size_t kTotalTasks = 8;
-    test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
-
-    Options options = CurrentOptions();
-    if (attempt == 0) {
-      options.max_background_compactions = 3;
-      options.max_background_flushes = 2;
-    }
-    options.create_if_missing = true;
-    DestroyAndReopen(options);
-
-    for (size_t i = 0; i < kTotalTasks; i++) {
-      // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
-      env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
-                     &sleeping_tasks[i],
-                     (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
-    }
-
-    // Wait 100 milliseconds for they are scheduled.
-    env_->SleepForMicroseconds(100000);
-
-    // pool size 3, total task 4. Queue size should be 1.
-    ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
-    // pool size 2, total task 4. Queue size should be 2.
-    ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
-
-    for (size_t i = 0; i < kTotalTasks; i++) {
-      sleeping_tasks[i].WakeUp();
-      sleeping_tasks[i].WaitUntilDone();
-    }
-
-    ASSERT_OK(Put("abc", "def"));
-    ASSERT_EQ("def", Get("abc"));
-    Flush();
-    ASSERT_EQ("def", Get("abc"));
-  }
-}
-
-TEST_F(DBTest, DBIteratorBoundTest) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.create_if_missing = true;
-
-  options.prefix_extractor = nullptr;
-  DestroyAndReopen(options);
-  ASSERT_OK(Put("a", "0"));
-  ASSERT_OK(Put("foo", "bar"));
-  ASSERT_OK(Put("foo1", "bar1"));
-  ASSERT_OK(Put("g1", "0"));
-
-  // testing basic case with no iterate_upper_bound and no prefix_extractor
-  {
-    ReadOptions ro;
-    ro.iterate_upper_bound = nullptr;
-
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
-
-    iter->Seek("foo");
-
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
-  }
-
-  // testing iterate_upper_bound and forward iterator
-  // to make sure it stops at bound
-  {
-    ReadOptions ro;
-    // iterate_upper_bound points beyond the last expected entry
-    Slice prefix("foo2");
-    ro.iterate_upper_bound = &prefix;
-
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
-
-    iter->Seek("foo");
-
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(("foo1")), 0);
-
-    iter->Next();
-    // should stop here...
-    ASSERT_TRUE(!iter->Valid());
-  }
-  // Testing SeekToLast with iterate_upper_bound set
-  {
-    ReadOptions ro;
-
-    Slice prefix("foo");
-    ro.iterate_upper_bound = &prefix;
-
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
-
-    iter->SeekToLast();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("a")), 0);
-  }
-
-  // prefix is the first letter of the key
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-
-  DestroyAndReopen(options);
-  ASSERT_OK(Put("a", "0"));
-  ASSERT_OK(Put("foo", "bar"));
-  ASSERT_OK(Put("foo1", "bar1"));
-  ASSERT_OK(Put("g1", "0"));
-
-  // testing with iterate_upper_bound and prefix_extractor
-  // Seek target and iterate_upper_bound are not is same prefix
-  // This should be an error
-  {
-    ReadOptions ro;
-    Slice upper_bound("g");
-    ro.iterate_upper_bound = &upper_bound;
-
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
-
-    iter->Seek("foo");
-
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ("foo", iter->key().ToString());
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ("foo1", iter->key().ToString());
-
-    iter->Next();
-    ASSERT_TRUE(!iter->Valid());
-  }
-
-  // testing that iterate_upper_bound prevents iterating over deleted items
-  // if the bound has already reached
-  {
-    options.prefix_extractor = nullptr;
-    DestroyAndReopen(options);
-    ASSERT_OK(Put("a", "0"));
-    ASSERT_OK(Put("b", "0"));
-    ASSERT_OK(Put("b1", "0"));
-    ASSERT_OK(Put("c", "0"));
-    ASSERT_OK(Put("d", "0"));
-    ASSERT_OK(Put("e", "0"));
-    ASSERT_OK(Delete("c"));
-    ASSERT_OK(Delete("d"));
-
-    // base case with no bound
-    ReadOptions ro;
-    ro.iterate_upper_bound = nullptr;
-
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
-
-    iter->Seek("b");
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(("b1")), 0);
-
-    perf_context.Reset();
-    iter->Next();
-
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 2);
-
-    // now testing with iterate_bound
-    Slice prefix("c");
-    ro.iterate_upper_bound = &prefix;
-
-    iter.reset(db_->NewIterator(ro));
-
-    perf_context.Reset();
-
-    iter->Seek("b");
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
-
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(("b1")), 0);
-
-    iter->Next();
-    // the iteration should stop as soon as the the bound key is reached
-    // even though the key is deleted
-    // hence internal_delete_skipped_count should be 0
-    ASSERT_TRUE(!iter->Valid());
-    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
-  }
-}
-
-TEST_F(DBTest, WriteSingleThreadEntry) {
-  std::vector<std::thread> threads;
-  dbfull()->TEST_LockMutex();
-  auto w = dbfull()->TEST_BeginWrite();
-  threads.emplace_back([&] { Put("a", "b"); });
-  env_->SleepForMicroseconds(10000);
-  threads.emplace_back([&] { Flush(); });
-  env_->SleepForMicroseconds(10000);
-  dbfull()->TEST_UnlockMutex();
-  dbfull()->TEST_LockMutex();
-  dbfull()->TEST_EndWrite(w);
-  dbfull()->TEST_UnlockMutex();
-
-  for (auto& t : threads) {
-    t.join();
-  }
-}
-
-TEST_F(DBTest, DisableDataSyncTest) {
-  env_->sync_counter_.store(0);
-  // iter 0 -- no sync
-  // iter 1 -- sync
-  for (int iter = 0; iter < 2; ++iter) {
-    Options options = CurrentOptions();
-    options.disableDataSync = iter == 0;
-    options.create_if_missing = true;
-    options.num_levels = 10;
-    options.env = env_;
-    Reopen(options);
-    CreateAndReopenWithCF({"pikachu"}, options);
-
-    MakeTables(10, "a", "z");
-    Compact("a", "z");
+      uint64_t size;
+      ASSERT_OK(env_->GetFileSize(src, &size));
 
-    if (iter == 0) {
-      ASSERT_EQ(env_->sync_counter_.load(), 0);
-    } else {
-      ASSERT_GT(env_->sync_counter_.load(), 0);
+      // record the number and the size of the
+      // latest manifest file
+      if (ParseFileName(files[i].substr(1), &number, &type)) {
+        if (type == kDescriptorFile) {
+          if (number > manifest_number) {
+            manifest_number = number;
+            ASSERT_GE(size, manifest_size);
+            size = manifest_size;  // copy only valid MANIFEST data
+          }
+        }
+      }
+      CopyFile(src, dest, size);
     }
-    Destroy(options);
-  }
-}
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, DynamicMemtableOptions) {
-  const uint64_t k64KB = 1 << 16;
-  const uint64_t k128KB = 1 << 17;
-  const uint64_t k5KB = 5 * 1024;
-  const int kNumPutsBeforeWaitForFlush = 64;
-  Options options;
-  options.env = env_;
-  options.create_if_missing = true;
-  options.compression = kNoCompression;
-  options.max_background_compactions = 1;
-  options.write_buffer_size = k64KB;
-  options.arena_block_size = 16 * 1024;
-  options.max_write_buffer_number = 2;
-  // Don't trigger compact/slowdown/stop
-  options.level0_file_num_compaction_trigger = 1024;
-  options.level0_slowdown_writes_trigger = 1024;
-  options.level0_stop_writes_trigger = 1024;
-  DestroyAndReopen(options);
+    // release file snapshot
+    dbfull()->DisableFileDeletions();
+    // overwrite one key, this key should not appear in the snapshot
+    std::vector<std::string> extras;
+    for (unsigned int i = 0; i < 1; i++) {
+      extras.push_back(RandomString(&rnd, 100000));
+      ASSERT_OK(Put(0, Key(i), extras[i]));
+    }
 
-  auto gen_l0_kb = [this, kNumPutsBeforeWaitForFlush](int size) {
-    Random rnd(301);
-    for (int i = 0; i < size; i++) {
-      ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
+    // verify that data in the snapshot are correct
+    std::vector<ColumnFamilyDescriptor> column_families;
+    column_families.emplace_back("default", ColumnFamilyOptions());
+    column_families.emplace_back("pikachu", ColumnFamilyOptions());
+    std::vector<ColumnFamilyHandle*> cf_handles;
+    DB* snapdb;
+    DBOptions opts;
+    opts.env = env_;
+    opts.create_if_missing = false;
+    Status stat =
+        DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
+    ASSERT_OK(stat);
 
-      // The following condition prevents a race condition between flush jobs
-      // acquiring work and this thread filling up multiple memtables. Without
-      // this, the flush might produce less files than expected because
-      // multiple memtables are flushed into a single L0 file. This race
-      // condition affects assertion (A).
-      if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
-        dbfull()->TEST_WaitForFlushMemTable();
-      }
+    ReadOptions roptions;
+    std::string val;
+    for (unsigned int i = 0; i < 80; i++) {
+      stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
+      ASSERT_EQ(values[i].compare(val), 0);
     }
-    dbfull()->TEST_WaitForFlushMemTable();
-  };
+    for (auto cfh : cf_handles) {
+      delete cfh;
+    }
+    delete snapdb;
 
-  // Test write_buffer_size
-  gen_l0_kb(64);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
-  ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
-  ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
+    // look at the new live files after we added an 'extra' key
+    // and after we took the first snapshot.
+    uint64_t new_manifest_number = 0;
+    uint64_t new_manifest_size = 0;
+    std::vector<std::string> newfiles;
+    dbfull()->DisableFileDeletions();
+    dbfull()->GetLiveFiles(newfiles, &new_manifest_size);
 
-  // Clean up L0
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
+    // find the new manifest file. assert that this manifest file is
+    // the same one as in the previous snapshot. But its size should be
+    // larger because we added an extra key after taking the
+    // previous shapshot.
+    for (size_t i = 0; i < newfiles.size(); i++) {
+      std::string src = dbname_ + "/" + newfiles[i];
+      // record the lognumber and the size of the
+      // latest manifest file
+      if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
+        if (type == kDescriptorFile) {
+          if (number > new_manifest_number) {
+            uint64_t size;
+            new_manifest_number = number;
+            ASSERT_OK(env_->GetFileSize(src, &size));
+            ASSERT_GE(size, new_manifest_size);
+          }
+        }
+      }
+    }
+    ASSERT_EQ(manifest_number, new_manifest_number);
+    ASSERT_GT(new_manifest_size, manifest_size);
 
-  // Increase buffer size
-  ASSERT_OK(dbfull()->SetOptions({
-    {"write_buffer_size", "131072"},
-  }));
+    // release file snapshot
+    dbfull()->DisableFileDeletions();
+  } while (ChangeCompactOptions());
+}
+#endif
 
-  // The existing memtable is still 64KB in size, after it becomes immutable,
-  // the next memtable will be 128KB in size. Write 256KB total, we should
-  // have a 64KB L0 file, a 128KB L0 file, and a memtable with 64KB data
-  gen_l0_kb(256);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 2);  // (A)
-  ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
-  ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
+TEST_F(DBTest, CompactOnFlush) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  do {
+    Options options = CurrentOptions(options_override);
+    options.disable_auto_compactions = true;
+    CreateAndReopenWithCF({"pikachu"}, options);
 
-  // Test max_write_buffer_number
-  // Block compaction thread, which will also block the flushes because
-  // max_background_flushes == 0, so flushes are getting executed by the
-  // compaction thread
-  env_->SetBackgroundThreads(1, Env::LOW);
-  test::SleepingBackgroundTask sleeping_task_low;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  // Start from scratch and disable compaction/flush. Flush can only happen
-  // during compaction but trigger is pretty high
-  options.max_background_flushes = 0;
-  options.disable_auto_compactions = true;
-  DestroyAndReopen(options);
+    Put(1, "foo", "v1");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]");
 
-  // Put until writes are stopped, bounded by 256 puts. We should see stop at
-  // ~128KB
-  int count = 0;
-  Random rnd(301);
+    // Write two new keys
+    Put(1, "a", "begin");
+    Put(1, "z", "end");
+    Flush(1);
 
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::DelayWrite:Wait",
-      [&](void* arg) { sleeping_task_low.WakeUp(); });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+    // Case1: Delete followed by a put
+    Delete(1, "foo");
+    Put(1, "foo", "v2");
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
 
-  while (!sleeping_task_low.WokenUp() && count < 256) {
-    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
-    count++;
-  }
-  ASSERT_GT(static_cast<double>(count), 128 * 0.8);
-  ASSERT_LT(static_cast<double>(count), 128 * 1.2);
+    // After the current memtable is flushed, the DEL should
+    // have been removed
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
 
-  sleeping_task_low.WaitUntilDone();
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
 
-  // Increase
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_write_buffer_number", "8"},
-  }));
-  // Clean up memtable and L0
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+    // Case 2: Delete followed by another delete
+    Delete(1, "foo");
+    Delete(1, "foo");
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]");
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
 
-  sleeping_task_low.Reset();
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  count = 0;
-  while (!sleeping_task_low.WokenUp() && count < 1024) {
-    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
-    count++;
-  }
-  // Windows fails this test. Will tune in the future and figure out
-  // approp number
-#ifndef OS_WIN
-  ASSERT_GT(static_cast<double>(count), 512 * 0.8);
-  ASSERT_LT(static_cast<double>(count), 512 * 1.2);
-#endif
-  sleeping_task_low.WaitUntilDone();
+    // Case 3: Put followed by a delete
+    Put(1, "foo", "v3");
+    Delete(1, "foo");
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]");
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
 
-  // Decrease
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_write_buffer_number", "4"},
-  }));
-  // Clean up memtable and L0
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+    // Case 4: Put followed by another Put
+    Put(1, "foo", "v4");
+    Put(1, "foo", "v5");
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
 
-  sleeping_task_low.Reset();
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
+    // clear database
+    Delete(1, "foo");
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
 
-  count = 0;
-  while (!sleeping_task_low.WokenUp() && count < 1024) {
-    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
-    count++;
-  }
-  // Windows fails this test. Will tune in the future and figure out
-  // approp number
-#ifndef OS_WIN
-  ASSERT_GT(static_cast<double>(count), 256 * 0.8);
-  ASSERT_LT(static_cast<double>(count), 266 * 1.2);
-#endif
-  sleeping_task_low.WaitUntilDone();
+    // Case 5: Put followed by snapshot followed by another Put
+    // Both puts should remain.
+    Put(1, "foo", "v6");
+    const Snapshot* snapshot = db_->GetSnapshot();
+    Put(1, "foo", "v7");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]");
+    db_->ReleaseSnapshot(snapshot);
 
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-}
-#endif  // ROCKSDB_LITE
+    // clear database
+    Delete(1, "foo");
+    dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
+                           nullptr);
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
 
-#if ROCKSDB_USING_THREAD_STATUS
-namespace {
-void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
-                          int expected_count) {
-  int op_count = 0;
-  std::vector<ThreadStatus> thread_list;
-  ASSERT_OK(env->GetThreadList(&thread_list));
-  for (auto thread : thread_list) {
-    if (thread.operation_type == op_type) {
-      op_count++;
-    }
-  }
-  ASSERT_EQ(op_count, expected_count);
+    // Case 5: snapshot followed by a put followed by another Put
+    // Only the last put should remain.
+    const Snapshot* snapshot1 = db_->GetSnapshot();
+    Put(1, "foo", "v8");
+    Put(1, "foo", "v9");
+    ASSERT_OK(Flush(1));
+    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]");
+    db_->ReleaseSnapshot(snapshot1);
+  } while (ChangeCompactOptions());
 }
-}  // namespace
 
-TEST_F(DBTest, GetThreadStatus) {
-  Options options;
-  options.env = env_;
-  options.enable_thread_tracking = true;
-  TryReopen(options);
+TEST_F(DBTest, FlushOneColumnFamily) {
+  Options options = CurrentOptions();
+  CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
+                         "alyosha", "popovich"},
+                        options);
 
-  std::vector<ThreadStatus> thread_list;
-  Status s = env_->GetThreadList(&thread_list);
+  ASSERT_OK(Put(0, "Default", "Default"));
+  ASSERT_OK(Put(1, "pikachu", "pikachu"));
+  ASSERT_OK(Put(2, "ilya", "ilya"));
+  ASSERT_OK(Put(3, "muromec", "muromec"));
+  ASSERT_OK(Put(4, "dobrynia", "dobrynia"));
+  ASSERT_OK(Put(5, "nikitich", "nikitich"));
+  ASSERT_OK(Put(6, "alyosha", "alyosha"));
+  ASSERT_OK(Put(7, "popovich", "popovich"));
 
-  for (int i = 0; i < 2; ++i) {
-    // repeat the test with differet number of high / low priority threads
-    const int kTestCount = 3;
-    const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
-    const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
-    for (int test = 0; test < kTestCount; ++test) {
-      // Change the number of threads in high / low priority pool.
-      env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
-      env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
-      // Wait to ensure the all threads has been registered
-      env_->SleepForMicroseconds(100000);
-      s = env_->GetThreadList(&thread_list);
-      ASSERT_OK(s);
-      unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
-      memset(thread_type_counts, 0, sizeof(thread_type_counts));
-      for (auto thread : thread_list) {
-        ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
-        thread_type_counts[thread.thread_type]++;
-      }
-      // Verify the total number of threades
-      ASSERT_EQ(
-          thread_type_counts[ThreadStatus::HIGH_PRIORITY] +
-              thread_type_counts[ThreadStatus::LOW_PRIORITY],
-          kHighPriCounts[test] + kLowPriCounts[test]);
-      // Verify the number of high-priority threads
-      ASSERT_EQ(
-          thread_type_counts[ThreadStatus::HIGH_PRIORITY],
-          kHighPriCounts[test]);
-      // Verify the number of low-priority threads
-      ASSERT_EQ(
-          thread_type_counts[ThreadStatus::LOW_PRIORITY],
-          kLowPriCounts[test]);
-    }
-    if (i == 0) {
-      // repeat the test with multiple column families
-      CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
-      env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
-          handles_, true);
-    }
+  for (int i = 0; i < 8; ++i) {
+    Flush(i);
+    auto tables = ListTableFiles(env_, dbname_);
+    ASSERT_EQ(tables.size(), i + 1U);
   }
-  db_->DropColumnFamily(handles_[2]);
-  delete handles_[2];
-  handles_.erase(handles_.begin() + 2);
-  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
-      handles_, true);
-  Close();
-  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
-      handles_, true);
-}
-
-TEST_F(DBTest, DisableThreadStatus) {
-  Options options;
-  options.env = env_;
-  options.enable_thread_tracking = false;
-  TryReopen(options);
-  CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
-  // Verify non of the column family info exists
-  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(
-      handles_, false);
 }
 
-TEST_F(DBTest, ThreadStatusFlush) {
-  Options options;
-  options.env = env_;
-  options.write_buffer_size = 100000;  // Small write buffer
-  options.enable_thread_tracking = true;
-  options = CurrentOptions(options);
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, SharedWriteBuffer) {
+  Options options = CurrentOptions();
+  options.db_write_buffer_size = 100000;  // this is the real limit
+  options.write_buffer_size = 500000;     // this is never hit
+  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
 
-  rocksdb::SyncPoint::GetInstance()->LoadDependency({
-      {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
-      {"DBTest::ThreadStatusFlush:2",
-       "FlushJob::LogAndNotifyTableFileCreation()"},
-  });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  // Trigger a flush on CF "nikitich"
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(3, Key(1), DummyString(90000)));
+  ASSERT_OK(Put(2, Key(2), DummyString(20000)));
+  ASSERT_OK(Put(2, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(1));
+  }
 
-  CreateAndReopenWithCF({"pikachu"}, options);
-  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
+  // "dobrynia": 20KB
+  // Flush 'dobrynia'
+  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
+  ASSERT_OK(Put(2, Key(2), DummyString(70000)));
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(1));
+  }
 
-  ASSERT_OK(Put(1, "foo", "v1"));
-  ASSERT_EQ("v1", Get(1, "foo"));
-  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
+  // "nikitich" still has has data of 80KB
+  // Inserting Data in "dobrynia" triggers "nikitich" flushing.
+  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
+  ASSERT_OK(Put(2, Key(2), DummyString(40000)));
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(2));
+  }
 
-  uint64_t num_running_flushes = 0;
-  db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
-  ASSERT_EQ(num_running_flushes, 0);
+  // "dobrynia" still has 40KB
+  ASSERT_OK(Put(1, Key(2), DummyString(20000)));
+  ASSERT_OK(Put(0, Key(1), DummyString(10000)));
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  // This should triggers no flush
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(2));
+  }
 
-  Put(1, "k1", std::string(100000, 'x'));  // Fill memtable
-  Put(1, "k2", std::string(100000, 'y'));  // Trigger flush
+  // "default": 10KB, "pikachu": 20KB, "dobrynia": 40KB
+  ASSERT_OK(Put(1, Key(2), DummyString(40000)));
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[0]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  // This should triggers flush of "pikachu"
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(2));
+  }
 
-  // The first sync point is to make sure there's one flush job
-  // running when we perform VerifyOperationCount().
-  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
-  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
-  db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
-  ASSERT_EQ(num_running_flushes, 1);
-  // This second sync point is to ensure the flush job will not
-  // be completed until we already perform VerifyOperationCount().
-  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  // "default": 10KB, "dobrynia": 40KB
+  // Some remaining writes so 'default', 'dobrynia' and 'nikitich' flush on
+  // closure.
+  ASSERT_OK(Put(3, Key(1), DummyString(1)));
+  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
+                           options);
+  {
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(2));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(3));
+  }
 }
+#endif  // ROCKSDB_LITE
 
-TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
-  const int kTestKeySize = 16;
-  const int kTestValueSize = 984;
-  const int kEntrySize = kTestKeySize + kTestValueSize;
-  const int kEntriesPerBuffer = 100;
-  Options options;
+TEST_F(DBTest, PurgeInfoLogs) {
+  Options options = CurrentOptions();
+  options.keep_log_file_num = 5;
   options.create_if_missing = true;
-  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
-  options.compaction_style = kCompactionStyleLevel;
-  options.target_file_size_base = options.write_buffer_size;
-  options.max_bytes_for_level_base = options.target_file_size_base * 2;
-  options.max_bytes_for_level_multiplier = 2;
-  options.compression = kNoCompression;
-  options = CurrentOptions(options);
-  options.env = env_;
-  options.enable_thread_tracking = true;
-  const int kNumL0Files = 4;
-  options.level0_file_num_compaction_trigger = kNumL0Files;
-  options.max_subcompactions = max_subcompactions_;
-
-  rocksdb::SyncPoint::GetInstance()->LoadDependency({
-      {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
-      {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
-      {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
-  });
-  for (int tests = 0; tests < 2; ++tests) {
-    DestroyAndReopen(options);
-    rocksdb::SyncPoint::GetInstance()->ClearTrace();
-    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  for (int mode = 0; mode <= 1; mode++) {
+    if (mode == 1) {
+      options.db_log_dir = dbname_ + "_logs";
+      env_->CreateDirIfMissing(options.db_log_dir);
+    } else {
+      options.db_log_dir = "";
+    }
+    for (int i = 0; i < 8; i++) {
+      Reopen(options);
+    }
 
-    Random rnd(301);
-    // The Put Phase.
-    for (int file = 0; file < kNumL0Files; ++file) {
-      for (int key = 0; key < kEntriesPerBuffer; ++key) {
-        ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
-                      RandomString(&rnd, kTestValueSize)));
+    std::vector<std::string> files;
+    env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
+                      &files);
+    int info_log_count = 0;
+    for (std::string file : files) {
+      if (file.find("LOG") != std::string::npos) {
+        info_log_count++;
       }
-      Flush();
     }
-    // This makes sure a compaction won't be scheduled until
-    // we have done with the above Put Phase.
-    uint64_t num_running_compactions = 0;
-    db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
-                        &num_running_compactions);
-    ASSERT_EQ(num_running_compactions, 0);
-    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
-    ASSERT_GE(NumTableFilesAtLevel(0),
-              options.level0_file_num_compaction_trigger);
-
-    // This makes sure at least one compaction is running.
-    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
+    ASSERT_EQ(5, info_log_count);
 
-    if (options.enable_thread_tracking) {
-      // expecting one single L0 to L1 compaction
-      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
-    } else {
-      // If thread tracking is not enabled, compaction count should be 0.
-      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
+    Destroy(options);
+    // For mode (1), test DestroyDB() to delete all the logs under DB dir.
+    // For mode (2), no info log file should have been put under DB dir.
+    std::vector<std::string> db_files;
+    env_->GetChildren(dbname_, &db_files);
+    for (std::string file : db_files) {
+      ASSERT_TRUE(file.find("LOG") == std::string::npos);
     }
-    db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
-                        &num_running_compactions);
-    ASSERT_EQ(num_running_compactions, 1);
-    // TODO(yhchiang): adding assert to verify each compaction stage.
-    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
 
-    // repeat the test with disabling thread tracking.
-    options.enable_thread_tracking = false;
-    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+    if (mode == 1) {
+      // Cleaning up
+      env_->GetChildren(options.db_log_dir, &files);
+      for (std::string file : files) {
+        env_->DeleteFile(options.db_log_dir + "/" + file);
+      }
+      env_->DeleteDir(options.db_log_dir);
+    }
   }
 }
 
-TEST_P(DBTestWithParam, PreShutdownManualCompaction) {
-  Options options = CurrentOptions();
-  options.max_background_flushes = 0;
-  options.max_subcompactions = max_subcompactions_;
-  CreateAndReopenWithCF({"pikachu"}, options);
-
-  // iter - 0 with 7 levels
-  // iter - 1 with 3 levels
-  for (int iter = 0; iter < 2; ++iter) {
-    MakeTables(3, "p", "q", 1);
-    ASSERT_EQ("1,1,1", FilesPerLevel(1));
-
-    // Compaction range falls before files
-    Compact(1, "", "c");
-    ASSERT_EQ("1,1,1", FilesPerLevel(1));
-
-    // Compaction range falls after files
-    Compact(1, "r", "z");
-    ASSERT_EQ("1,1,1", FilesPerLevel(1));
-
-    // Compaction range overlaps files
-    Compact(1, "p1", "p9");
-    ASSERT_EQ("0,0,1", FilesPerLevel(1));
-
-    // Populate a different range
-    MakeTables(3, "c", "e", 1);
-    ASSERT_EQ("1,1,2", FilesPerLevel(1));
-
-    // Compact just the new range
-    Compact(1, "b", "f");
-    ASSERT_EQ("0,0,2", FilesPerLevel(1));
-
-    // Compact all
-    MakeTables(1, "a", "z", 1);
-    ASSERT_EQ("1,0,2", FilesPerLevel(1));
-    CancelAllBackgroundWork(db_);
-    db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
-    ASSERT_EQ("1,0,2", FilesPerLevel(1));
+#ifndef ROCKSDB_LITE
+// Multi-threaded test:
+namespace {
 
-    if (iter == 0) {
-      options = CurrentOptions();
-      options.max_background_flushes = 0;
-      options.num_levels = 3;
-      options.create_if_missing = true;
-      DestroyAndReopen(options);
-      CreateAndReopenWithCF({"pikachu"}, options);
-    }
-  }
-}
+static const int kColumnFamilies = 10;
+static const int kNumThreads = 10;
+static const int kTestSeconds = 10;
+static const int kNumKeys = 1000;
 
-TEST_F(DBTest, PreShutdownFlush) {
-  Options options = CurrentOptions();
-  options.max_background_flushes = 0;
-  CreateAndReopenWithCF({"pikachu"}, options);
-  ASSERT_OK(Put(1, "key", "value"));
-  CancelAllBackgroundWork(db_);
-  Status s =
-      db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
-  ASSERT_TRUE(s.IsShutdownInProgress());
-}
+struct MTState {
+  DBTest* test;
+  std::atomic<bool> stop;
+  std::atomic<int> counter[kNumThreads];
+  std::atomic<bool> thread_done[kNumThreads];
+};
 
-TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) {
-  const int kTestKeySize = 16;
-  const int kTestValueSize = 984;
-  const int kEntrySize = kTestKeySize + kTestValueSize;
-  const int kEntriesPerBuffer = 40;
-  const int kNumL0Files = 4;
+struct MTThread {
+  MTState* state;
+  int id;
+};
 
-  const int kHighPriCount = 3;
-  const int kLowPriCount = 5;
-  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
-  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
+static void MTThreadBody(void* arg) {
+  MTThread* t = reinterpret_cast<MTThread*>(arg);
+  int id = t->id;
+  DB* db = t->state->test->db_;
+  int counter = 0;
+  fprintf(stderr, "... starting thread %d\n", id);
+  Random rnd(1000 + id);
+  char valbuf[1500];
+  while (t->state->stop.load(std::memory_order_acquire) == false) {
+    t->state->counter[id].store(counter, std::memory_order_release);
 
-  Options options;
-  options.create_if_missing = true;
-  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
-  options.compaction_style = kCompactionStyleLevel;
-  options.target_file_size_base = options.write_buffer_size;
-  options.max_bytes_for_level_base =
-      options.target_file_size_base * kNumL0Files;
-  options.compression = kNoCompression;
-  options = CurrentOptions(options);
-  options.env = env_;
-  options.enable_thread_tracking = true;
-  options.level0_file_num_compaction_trigger = kNumL0Files;
-  options.max_bytes_for_level_multiplier = 2;
-  options.max_background_compactions = kLowPriCount;
-  options.level0_stop_writes_trigger = 1 << 10;
-  options.level0_slowdown_writes_trigger = 1 << 10;
-  options.max_subcompactions = max_subcompactions_;
+    int key = rnd.Uniform(kNumKeys);
+    char keybuf[20];
+    snprintf(keybuf, sizeof(keybuf), "%016d", key);
 
-  TryReopen(options);
-  Random rnd(301);
+    if (rnd.OneIn(2)) {
+      // Write values of the form <key, my id, counter, cf, unique_id>.
+      // into each of the CFs
+      // We add some padding for force compactions.
+      int unique_id = rnd.Uniform(1000000);
 
-  std::vector<ThreadStatus> thread_list;
-  // Delay both flush and compaction
-  rocksdb::SyncPoint::GetInstance()->LoadDependency(
-      {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
-       {"CompactionJob::Run():Start",
-        "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
-        {"CompactionJob::Run():Start",
-        "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
-       {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
-        "CompactionJob::Run():End"},
-       {"CompactionJob::Run():End",
-        "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
+      // Half of the time directly use WriteBatch. Half of the time use
+      // WriteBatchWithIndex.
+      if (rnd.OneIn(2)) {
+        WriteBatch batch;
+        for (int cf = 0; cf < kColumnFamilies; ++cf) {
+          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
+                   static_cast<int>(counter), cf, unique_id);
+          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
+        }
+        ASSERT_OK(db->Write(WriteOptions(), &batch));
+      } else {
+        WriteBatchWithIndex batch(db->GetOptions().comparator);
+        for (int cf = 0; cf < kColumnFamilies; ++cf) {
+          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
+                   static_cast<int>(counter), cf, unique_id);
+          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
+        }
+        ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
+      }
+    } else {
+      // Read a value and verify that it matches the pattern written above
+      // and that writes to all column families were atomic (unique_id is the
+      // same)
+      std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
+      std::vector<std::string> values;
+      std::vector<Status> statuses =
+          db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values);
+      Status s = statuses[0];
+      // all statuses have to be the same
+      for (size_t i = 1; i < statuses.size(); ++i) {
+        // they are either both ok or both not-found
+        ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
+                    (s.IsNotFound() && statuses[i].IsNotFound()));
+      }
+      if (s.IsNotFound()) {
+        // Key has not yet been written
+      } else {
+        // Check that the writer thread counter is >= the counter in the value
+        ASSERT_OK(s);
+        int unique_id = -1;
+        for (int i = 0; i < kColumnFamilies; ++i) {
+          int k, w, c, cf, u;
+          ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c,
+                              &cf, &u))
+              << values[i];
+          ASSERT_EQ(k, key);
+          ASSERT_GE(w, 0);
+          ASSERT_LT(w, kNumThreads);
+          ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
+          ASSERT_EQ(cf, i);
+          if (i == 0) {
+            unique_id = u;
+          } else {
+            // this checks that updates across column families happened
+            // atomically -- all unique ids are the same
+            ASSERT_EQ(u, unique_id);
+          }
+        }
+      }
+    }
+    counter++;
+  }
+  t->state->thread_done[id].store(true, std::memory_order_release);
+  fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
+}
 
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+}  // namespace
 
-  // Make rocksdb busy
-  int key = 0;
-  // check how many threads are doing compaction using GetThreadList
-  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
-  for (int file = 0; file < 16 * kNumL0Files; ++file) {
-    for (int k = 0; k < kEntriesPerBuffer; ++k) {
-      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
-    }
+class MultiThreadedDBTest : public DBTest,
+                            public ::testing::WithParamInterface<int> {
+ public:
+  virtual void SetUp() override { option_config_ = GetParam(); }
 
-    Status s = env_->GetThreadList(&thread_list);
-    for (auto thread : thread_list) {
-      operation_count[thread.operation_type]++;
+  static std::vector<int> GenerateOptionConfigs() {
+    std::vector<int> optionConfigs;
+    for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
+      // skip as HashCuckooRep does not support snapshot
+      if (optionConfig != kHashCuckoo) {
+        optionConfigs.push_back(optionConfig);
+      }
     }
+    return optionConfigs;
+  }
+};
 
-    // Speed up the test
-    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
-        operation_count[ThreadStatus::OP_COMPACTION] >
-            0.6 * options.max_background_compactions) {
-      break;
-    }
-    if (file == 15 * kNumL0Files) {
-      TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
-    }
+TEST_P(MultiThreadedDBTest, MultiThreaded) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  std::vector<std::string> cfs;
+  for (int i = 1; i < kColumnFamilies; ++i) {
+    cfs.push_back(ToString(i));
+  }
+  CreateAndReopenWithCF(cfs, CurrentOptions(options_override));
+  // Initialize state
+  MTState mt;
+  mt.test = this;
+  mt.stop.store(false, std::memory_order_release);
+  for (int id = 0; id < kNumThreads; id++) {
+    mt.counter[id].store(0, std::memory_order_release);
+    mt.thread_done[id].store(false, std::memory_order_release);
   }
 
-  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
-  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
-  CancelAllBackgroundWork(db_);
-  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
-  dbfull()->TEST_WaitForCompact();
-  // Record the number of compactions at a time.
-  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
-    operation_count[i] = 0;
+  // Start threads
+  MTThread thread[kNumThreads];
+  for (int id = 0; id < kNumThreads; id++) {
+    thread[id].state = &mt;
+    thread[id].id = id;
+    env_->StartThread(MTThreadBody, &thread[id]);
   }
-  Status s = env_->GetThreadList(&thread_list);
-  for (auto thread : thread_list) {
-    operation_count[thread.operation_type]++;
+
+  // Let them run for a while
+  env_->SleepForMicroseconds(kTestSeconds * 1000000);
+
+  // Stop the threads and wait for them to finish
+  mt.stop.store(true, std::memory_order_release);
+  for (int id = 0; id < kNumThreads; id++) {
+    while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
+      env_->SleepForMicroseconds(100000);
+    }
   }
-  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
 }
 
-TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) {
-  const int kTestKeySize = 16;
-  const int kTestValueSize = 984;
-  const int kEntrySize = kTestKeySize + kTestValueSize;
-  const int kEntriesPerBuffer = 40;
-  const int kNumL0Files = 4;
+INSTANTIATE_TEST_CASE_P(
+    MultiThreaded, MultiThreadedDBTest,
+    ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()));
+#endif  // ROCKSDB_LITE
 
-  const int kHighPriCount = 3;
-  const int kLowPriCount = 5;
-  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
-  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
+// Group commit test:
+namespace {
 
-  Options options;
-  options.create_if_missing = true;
-  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
-  options.compaction_style = kCompactionStyleLevel;
-  options.target_file_size_base = options.write_buffer_size;
-  options.max_bytes_for_level_base =
-      options.target_file_size_base * kNumL0Files;
-  options.compression = kNoCompression;
-  options = CurrentOptions(options);
-  options.env = env_;
-  options.enable_thread_tracking = true;
-  options.level0_file_num_compaction_trigger = kNumL0Files;
-  options.max_bytes_for_level_multiplier = 2;
-  options.max_background_compactions = kLowPriCount;
-  options.level0_stop_writes_trigger = 1 << 10;
-  options.level0_slowdown_writes_trigger = 1 << 10;
-  options.max_subcompactions = max_subcompactions_;
+static const int kGCNumThreads = 4;
+static const int kGCNumKeys = 1000;
 
-  TryReopen(options);
-  Random rnd(301);
+struct GCThread {
+  DB* db;
+  int id;
+  std::atomic<bool> done;
+};
 
-  std::vector<ThreadStatus> thread_list;
-  // Delay both flush and compaction
-  rocksdb::SyncPoint::GetInstance()->LoadDependency(
-      {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
-        "CompactionJob::Run():Inprogress"},
-        {"CompactionJob::Run():Start",
-        "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
-       {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
-       {"CompactionJob::Run():End",
-        "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
+static void GCThreadBody(void* arg) {
+  GCThread* t = reinterpret_cast<GCThread*>(arg);
+  int id = t->id;
+  DB* db = t->db;
+  WriteOptions wo;
 
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  for (int i = 0; i < kGCNumKeys; ++i) {
+    std::string kv(ToString(i + id * kGCNumKeys));
+    ASSERT_OK(db->Put(wo, kv, kv));
+  }
+  t->done = true;
+}
 
-  // Make rocksdb busy
-  int key = 0;
-  // check how many threads are doing compaction using GetThreadList
-  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
-  for (int file = 0; file < 16 * kNumL0Files; ++file) {
-    for (int k = 0; k < kEntriesPerBuffer; ++k) {
-      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
+}  // namespace
+
+TEST_F(DBTest, GroupCommitTest) {
+  do {
+    Options options = CurrentOptions();
+    options.env = env_;
+    env_->log_write_slowdown_.store(100);
+    options.statistics = rocksdb::CreateDBStatistics();
+    Reopen(options);
+
+    // Start threads
+    GCThread thread[kGCNumThreads];
+    for (int id = 0; id < kGCNumThreads; id++) {
+      thread[id].id = id;
+      thread[id].db = db_;
+      thread[id].done = false;
+      env_->StartThread(GCThreadBody, &thread[id]);
     }
 
-    Status s = env_->GetThreadList(&thread_list);
-    for (auto thread : thread_list) {
-      operation_count[thread.operation_type]++;
+    for (int id = 0; id < kGCNumThreads; id++) {
+      while (thread[id].done == false) {
+        env_->SleepForMicroseconds(100000);
+      }
     }
+    env_->log_write_slowdown_.store(0);
 
-    // Speed up the test
-    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
-        operation_count[ThreadStatus::OP_COMPACTION] >
-            0.6 * options.max_background_compactions) {
-      break;
+    ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
+
+    std::vector<std::string> expected_db;
+    for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
+      expected_db.push_back(ToString(i));
     }
-    if (file == 15 * kNumL0Files) {
-      TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
+    sort(expected_db.begin(), expected_db.end());
+
+    Iterator* itr = db_->NewIterator(ReadOptions());
+    itr->SeekToFirst();
+    for (auto x : expected_db) {
+      ASSERT_TRUE(itr->Valid());
+      ASSERT_EQ(itr->key().ToString(), x);
+      ASSERT_EQ(itr->value().ToString(), x);
+      itr->Next();
     }
-  }
+    ASSERT_TRUE(!itr->Valid());
+    delete itr;
 
-  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
-  CancelAllBackgroundWork(db_);
-  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
-  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
-  dbfull()->TEST_WaitForCompact();
-  // Record the number of compactions at a time.
-  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
-    operation_count[i] = 0;
-  }
-  Status s = env_->GetThreadList(&thread_list);
-  for (auto thread : thread_list) {
-    operation_count[thread.operation_type]++;
-  }
-  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
+    HistogramData hist_data = {0, 0, 0, 0, 0};
+    options.statistics->histogramData(DB_WRITE, &hist_data);
+    ASSERT_GT(hist_data.average, 0.0);
+  } while (ChangeOptions(kSkipNoSeekToLast));
 }
 
-#endif  // ROCKSDB_USING_THREAD_STATUS
-
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, FlushOnDestroy) {
-  WriteOptions wo;
-  wo.disableWAL = true;
-  ASSERT_OK(Put("foo", "v1", wo));
-  CancelAllBackgroundWork(db_);
+namespace {
+typedef std::map<std::string, std::string> KVMap;
 }
 
-namespace {
-class OnFileDeletionListener : public EventListener {
+class ModelDB : public DB {
  public:
-  OnFileDeletionListener() :
-      matched_count_(0),
-      expected_file_name_("") {}
+  class ModelSnapshot : public Snapshot {
+   public:
+    KVMap map_;
 
-  void SetExpectedFileName(
-      const std::string file_name) {
-    expected_file_name_ = file_name;
-  }
+    virtual SequenceNumber GetSequenceNumber() const override {
+      // no need to call this
+      assert(false);
+      return 0;
+    }
+  };
 
-  void VerifyMatchedCount(size_t expected_value) {
-    ASSERT_EQ(matched_count_, expected_value);
+  explicit ModelDB(const Options& options) : options_(options) {}
+  using DB::Put;
+  virtual Status Put(const WriteOptions& o, ColumnFamilyHandle* cf,
+                     const Slice& k, const Slice& v) override {
+    WriteBatch batch;
+    batch.Put(cf, k, v);
+    return Write(o, &batch);
   }
-
-  void OnTableFileDeleted(
-      const TableFileDeletionInfo& info) override {
-    if (expected_file_name_ != "") {
-      ASSERT_EQ(expected_file_name_, info.file_path);
-      expected_file_name_ = "";
-      matched_count_++;
-    }
+  using DB::Delete;
+  virtual Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
+                        const Slice& key) override {
+    WriteBatch batch;
+    batch.Delete(cf, key);
+    return Write(o, &batch);
   }
-
- private:
-  size_t matched_count_;
-  std::string expected_file_name_;
-};
-
-}  // namespace
-
-TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
-  if (!Snappy_Supported()) {
-    return;
+  using DB::SingleDelete;
+  virtual Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
+                              const Slice& key) override {
+    WriteBatch batch;
+    batch.SingleDelete(cf, key);
+    return Write(o, &batch);
   }
-  const int kNKeys = 120;
-  int keys[kNKeys];
-  for (int i = 0; i < kNKeys; i++) {
-    keys[i] = i;
+  using DB::Merge;
+  virtual Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf,
+                       const Slice& k, const Slice& v) override {
+    WriteBatch batch;
+    batch.Merge(cf, k, v);
+    return Write(o, &batch);
+  }
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* cf,
+                     const Slice& key, std::string* value) override {
+    return Status::NotSupported(key);
   }
-  std::random_shuffle(std::begin(keys), std::end(keys));
-
-  Random rnd(301);
-  Options options;
-  options.create_if_missing = true;
-  options.db_write_buffer_size = 20480;
-  options.write_buffer_size = 20480;
-  options.max_write_buffer_number = 2;
-  options.level0_file_num_compaction_trigger = 2;
-  options.level0_slowdown_writes_trigger = 2;
-  options.level0_stop_writes_trigger = 2;
-  options.target_file_size_base = 2048;
-  options.level_compaction_dynamic_level_bytes = true;
-  options.max_bytes_for_level_base = 102400;
-  options.max_bytes_for_level_multiplier = 4;
-  options.max_background_compactions = 1;
-  options.num_levels = 5;
-
-  options.compression_per_level.resize(3);
-  options.compression_per_level[0] = kNoCompression;
-  options.compression_per_level[1] = kNoCompression;
-  options.compression_per_level[2] = kSnappyCompression;
 
-  OnFileDeletionListener* listener = new OnFileDeletionListener();
-  options.listeners.emplace_back(listener);
+  using DB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override {
+    std::vector<Status> s(keys.size(),
+                          Status::NotSupported("Not implemented."));
+    return s;
+  }
 
-  DestroyAndReopen(options);
+#ifndef ROCKSDB_LITE
+  using DB::AddFile;
+  virtual Status AddFile(ColumnFamilyHandle* column_family,
+                         const ExternalSstFileInfo* file_path,
+                         bool move_file) override {
+    return Status::NotSupported("Not implemented.");
+  }
+  virtual Status AddFile(ColumnFamilyHandle* column_family,
+                         const std::string& file_path,
+                         bool move_file) override {
+    return Status::NotSupported("Not implemented.");
+  }
 
-  // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
-  // be compressed, so total data size should be more than 80K.
-  for (int i = 0; i < 20; i++) {
-    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
+  using DB::GetPropertiesOfAllTables;
+  virtual Status GetPropertiesOfAllTables(
+      ColumnFamilyHandle* column_family,
+      TablePropertiesCollection* props) override {
+    return Status();
   }
-  Flush();
-  dbfull()->TEST_WaitForCompact();
 
-  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
-  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U);
+  virtual Status GetPropertiesOfTablesInRange(
+      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
+      TablePropertiesCollection* props) override {
+    return Status();
+  }
+#endif  // ROCKSDB_LITE
 
-  // Insert 400KB. Some data will be compressed
-  for (int i = 21; i < 120; i++) {
-    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
+  using DB::KeyMayExist;
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value,
+                           bool* value_found = nullptr) override {
+    if (value_found != nullptr) {
+      *value_found = false;
+    }
+    return true;  // Not Supported directly
   }
-  Flush();
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
-  ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), 120U * 4000U);
-  // Make sure data in files in L3 is not compacted by removing all files
-  // in L4 and calculate number of rows
-  ASSERT_OK(dbfull()->SetOptions({
-      {"disable_auto_compactions", "true"},
-  }));
-  ColumnFamilyMetaData cf_meta;
-  db_->GetColumnFamilyMetaData(&cf_meta);
-  for (auto file : cf_meta.levels[4].files) {
-    listener->SetExpectedFileName(dbname_ + file.name);
-    ASSERT_OK(dbfull()->DeleteFile(file.name));
+  using DB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) override {
+    if (options.snapshot == nullptr) {
+      KVMap* saved = new KVMap;
+      *saved = map_;
+      return new ModelIter(saved, true);
+    } else {
+      const KVMap* snapshot_state =
+          &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
+      return new ModelIter(snapshot_state, false);
+    }
   }
-  listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
-
-  int num_keys = 0;
-  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    num_keys++;
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      std::vector<Iterator*>* iterators) override {
+    return Status::NotSupported("Not supported yet");
+  }
+  virtual const Snapshot* GetSnapshot() override {
+    ModelSnapshot* snapshot = new ModelSnapshot;
+    snapshot->map_ = map_;
+    return snapshot;
   }
-  ASSERT_OK(iter->status());
-  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U);
-}
 
-TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
-  if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
-    return;
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
+    delete reinterpret_cast<const ModelSnapshot*>(snapshot);
   }
-  const int kNKeys = 500;
-  int keys[kNKeys];
-  for (int i = 0; i < kNKeys; i++) {
-    keys[i] = i;
+
+  virtual Status Write(const WriteOptions& options,
+                       WriteBatch* batch) override {
+    class Handler : public WriteBatch::Handler {
+     public:
+      KVMap* map_;
+      virtual void Put(const Slice& key, const Slice& value) override {
+        (*map_)[key.ToString()] = value.ToString();
+      }
+      virtual void Merge(const Slice& key, const Slice& value) override {
+        // ignore merge for now
+        // (*map_)[key.ToString()] = value.ToString();
+      }
+      virtual void Delete(const Slice& key) override {
+        map_->erase(key.ToString());
+      }
+    };
+    Handler handler;
+    handler.map_ = &map_;
+    return batch->Iterate(&handler);
   }
-  std::random_shuffle(std::begin(keys), std::end(keys));
 
-  Random rnd(301);
-  Options options;
-  options.create_if_missing = true;
-  options.db_write_buffer_size = 6000;
-  options.write_buffer_size = 6000;
-  options.max_write_buffer_number = 2;
-  options.level0_file_num_compaction_trigger = 2;
-  options.level0_slowdown_writes_trigger = 2;
-  options.level0_stop_writes_trigger = 2;
-  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
+  using DB::GetProperty;
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) override {
+    return false;
+  }
+  using DB::GetIntProperty;
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) override {
+    return false;
+  }
+  using DB::GetAggregatedIntProperty;
+  virtual bool GetAggregatedIntProperty(const Slice& property,
+                                        uint64_t* value) override {
+    return false;
+  }
+  using DB::GetApproximateSizes;
+  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
+                                   const Range* range, int n, uint64_t* sizes,
+                                   bool include_memtable) override {
+    for (int i = 0; i < n; i++) {
+      sizes[i] = 0;
+    }
+  }
+  using DB::CompactRange;
+  virtual Status CompactRange(const CompactRangeOptions& options,
+                              ColumnFamilyHandle* column_family,
+                              const Slice* start, const Slice* end) override {
+    return Status::NotSupported("Not supported operation.");
+  }
 
-  // Use file size to distinguish levels
-  // L1: 10, L2: 20, L3 40, L4 80
-  // L0 is less than 30
-  options.target_file_size_base = 10;
-  options.target_file_size_multiplier = 2;
+  using DB::CompactFiles;
+  virtual Status CompactFiles(const CompactionOptions& compact_options,
+                              ColumnFamilyHandle* column_family,
+                              const std::vector<std::string>& input_file_names,
+                              const int output_level,
+                              const int output_path_id = -1) override {
+    return Status::NotSupported("Not supported operation.");
+  }
 
-  options.level_compaction_dynamic_level_bytes = true;
-  options.max_bytes_for_level_base = 200;
-  options.max_bytes_for_level_multiplier = 8;
-  options.max_background_compactions = 1;
-  options.num_levels = 5;
-  std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
-  options.table_factory = mtf;
+  Status PauseBackgroundWork() override {
+    return Status::NotSupported("Not supported operation.");
+  }
 
-  options.compression_per_level.resize(3);
-  options.compression_per_level[0] = kNoCompression;
-  options.compression_per_level[1] = kLZ4Compression;
-  options.compression_per_level[2] = kZlibCompression;
+  Status ContinueBackgroundWork() override {
+    return Status::NotSupported("Not supported operation.");
+  }
 
-  DestroyAndReopen(options);
-  // When base level is L4, L4 is LZ4.
-  std::atomic<int> num_zlib(0);
-  std::atomic<int> num_lz4(0);
-  std::atomic<int> num_no(0);
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
-        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
-        if (compaction->output_level() == 4) {
-          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
-          num_lz4.fetch_add(1);
-        }
-      });
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
-        auto* compression = reinterpret_cast<CompressionType*>(arg);
-        ASSERT_TRUE(*compression == kNoCompression);
-        num_no.fetch_add(1);
-      });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  Status EnableAutoCompaction(
+      const std::vector<ColumnFamilyHandle*>& column_family_handles) override {
+    return Status::NotSupported("Not supported operation.");
+  }
 
-  for (int i = 0; i < 100; i++) {
-    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
+  using DB::NumberLevels;
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
+    return 1;
+  }
 
-    if (i % 25 == 0) {
-      dbfull()->TEST_WaitForFlushMemTable();
-    }
+  using DB::MaxMemCompactionLevel;
+  virtual int MaxMemCompactionLevel(
+      ColumnFamilyHandle* column_family) override {
+    return 1;
   }
 
-  Flush();
-  dbfull()->TEST_WaitForFlushMemTable();
-  dbfull()->TEST_WaitForCompact();
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
+  using DB::Level0StopWriteTrigger;
+  virtual int Level0StopWriteTrigger(
+      ColumnFamilyHandle* column_family) override {
+    return -1;
+  }
 
-  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
-  ASSERT_GT(NumTableFilesAtLevel(4), 0);
-  ASSERT_GT(num_no.load(), 2);
-  ASSERT_GT(num_lz4.load(), 0);
-  int prev_num_files_l4 = NumTableFilesAtLevel(4);
+  virtual const std::string& GetName() const override { return name_; }
 
-  // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
-  num_lz4.store(0);
-  num_no.store(0);
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
-        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
-        if (compaction->output_level() == 4 && compaction->start_level() == 3) {
-          ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
-          num_zlib.fetch_add(1);
-        } else {
-          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
-          num_lz4.fetch_add(1);
-        }
-      });
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
-        auto* compression = reinterpret_cast<CompressionType*>(arg);
-        ASSERT_TRUE(*compression == kNoCompression);
-        num_no.fetch_add(1);
-      });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  virtual Env* GetEnv() const override { return nullptr; }
 
-  for (int i = 101; i < 500; i++) {
-    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
-    if (i % 100 == 99) {
-      Flush();
-      dbfull()->TEST_WaitForCompact();
-    }
+  using DB::GetOptions;
+  virtual const Options& GetOptions(
+      ColumnFamilyHandle* column_family) const override {
+    return options_;
   }
 
-  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
-  ASSERT_GT(NumTableFilesAtLevel(3), 0);
-  ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
-  ASSERT_GT(num_no.load(), 2);
-  ASSERT_GT(num_lz4.load(), 0);
-  ASSERT_GT(num_zlib.load(), 0);
-}
+  using DB::GetDBOptions;
+  virtual const DBOptions& GetDBOptions() const override { return options_; }
 
-TEST_F(DBTest, DynamicCompactionOptions) {
-  // minimum write buffer size is enforced at 64KB
-  const uint64_t k32KB = 1 << 15;
-  const uint64_t k64KB = 1 << 16;
-  const uint64_t k128KB = 1 << 17;
-  const uint64_t k1MB = 1 << 20;
-  const uint64_t k4KB = 1 << 12;
-  Options options;
-  options.env = env_;
-  options.create_if_missing = true;
-  options.compression = kNoCompression;
-  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
-  options.write_buffer_size = k64KB;
-  options.arena_block_size = 4 * k4KB;
-  options.max_write_buffer_number = 2;
-  // Compaction related options
-  options.level0_file_num_compaction_trigger = 3;
-  options.level0_slowdown_writes_trigger = 4;
-  options.level0_stop_writes_trigger = 8;
-  options.max_grandparent_overlap_factor = 10;
-  options.expanded_compaction_factor = 25;
-  options.source_compaction_factor = 1;
-  options.target_file_size_base = k64KB;
-  options.target_file_size_multiplier = 1;
-  options.max_bytes_for_level_base = k128KB;
-  options.max_bytes_for_level_multiplier = 4;
+  using DB::Flush;
+  virtual Status Flush(const rocksdb::FlushOptions& options,
+                       ColumnFamilyHandle* column_family) override {
+    Status ret;
+    return ret;
+  }
 
-  // Block flush thread and disable compaction thread
-  env_->SetBackgroundThreads(1, Env::LOW);
-  env_->SetBackgroundThreads(1, Env::HIGH);
-  DestroyAndReopen(options);
+  virtual Status SyncWAL() override { return Status::OK(); }
 
-  auto gen_l0_kb = [this](int start, int size, int stride) {
-    Random rnd(301);
-    for (int i = 0; i < size; i++) {
-      ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024)));
-    }
-    dbfull()->TEST_WaitForFlushMemTable();
-  };
+#ifndef ROCKSDB_LITE
+  virtual Status DisableFileDeletions() override { return Status::OK(); }
 
-  // Write 3 files that have the same key range.
-  // Since level0_file_num_compaction_trigger is 3, compaction should be
-  // triggered. The compaction should result in one L1 file
-  gen_l0_kb(0, 64, 1);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
-  gen_l0_kb(0, 64, 1);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 2);
-  gen_l0_kb(0, 64, 1);
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ("0,1", FilesPerLevel());
-  std::vector<LiveFileMetaData> metadata;
-  db_->GetLiveFilesMetaData(&metadata);
-  ASSERT_EQ(1U, metadata.size());
-  ASSERT_LE(metadata[0].size, k64KB + k4KB);
-  ASSERT_GE(metadata[0].size, k64KB - k4KB);
+  virtual Status EnableFileDeletions(bool force) override {
+    return Status::OK();
+  }
+  virtual Status GetLiveFiles(std::vector<std::string>&, uint64_t* size,
+                              bool flush_memtable = true) override {
+    return Status::OK();
+  }
 
-  // Test compaction trigger and target_file_size_base
-  // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
-  // Writing to 64KB L0 files should trigger a compaction. Since these
-  // 2 L0 files have the same key range, compaction merge them and should
-  // result in 2 32KB L1 files.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"level0_file_num_compaction_trigger", "2"},
-    {"target_file_size_base", ToString(k32KB) }
-  }));
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
+    return Status::OK();
+  }
 
-  gen_l0_kb(0, 64, 1);
-  ASSERT_EQ("1,1", FilesPerLevel());
-  gen_l0_kb(0, 64, 1);
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ("0,2", FilesPerLevel());
-  metadata.clear();
-  db_->GetLiveFilesMetaData(&metadata);
-  ASSERT_EQ(2U, metadata.size());
-  ASSERT_LE(metadata[0].size, k32KB + k4KB);
-  ASSERT_GE(metadata[0].size, k32KB - k4KB);
-  ASSERT_LE(metadata[1].size, k32KB + k4KB);
-  ASSERT_GE(metadata[1].size, k32KB - k4KB);
+  virtual Status DeleteFile(std::string name) override { return Status::OK(); }
 
-  // Test max_bytes_for_level_base
-  // Increase level base size to 256KB and write enough data that will
-  // fill L1 and L2. L1 size should be around 256KB while L2 size should be
-  // around 256KB x 4.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_bytes_for_level_base", ToString(k1MB) }
-  }));
+  virtual Status GetUpdatesSince(
+      rocksdb::SequenceNumber, unique_ptr<rocksdb::TransactionLogIterator>*,
+      const TransactionLogIterator::ReadOptions& read_options =
+          TransactionLogIterator::ReadOptions()) override {
+    return Status::NotSupported("Not supported in Model DB");
+  }
 
-  // writing 96 x 64KB => 6 * 1024KB
-  // (L1 + L2) = (1 + 4) * 1024KB
-  for (int i = 0; i < 96; ++i) {
-    gen_l0_kb(i, 64, 96);
+  virtual void GetColumnFamilyMetaData(
+      ColumnFamilyHandle* column_family,
+      ColumnFamilyMetaData* metadata) override {}
+#endif  // ROCKSDB_LITE
+
+  virtual Status GetDbIdentity(std::string& identity) const override {
+    return Status::OK();
   }
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_GT(SizeAtLevel(1), k1MB / 2);
-  ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
 
-  // Within (0.5, 1.5) of 4MB.
-  ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
-  ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
+  virtual SequenceNumber GetLatestSequenceNumber() const override { return 0; }
+
+  virtual ColumnFamilyHandle* DefaultColumnFamily() const override {
+    return nullptr;
+  }
+
+ private:
+  class ModelIter : public Iterator {
+   public:
+    ModelIter(const KVMap* map, bool owned)
+        : map_(map), owned_(owned), iter_(map_->end()) {}
+    ~ModelIter() {
+      if (owned_) delete map_;
+    }
+    virtual bool Valid() const override { return iter_ != map_->end(); }
+    virtual void SeekToFirst() override { iter_ = map_->begin(); }
+    virtual void SeekToLast() override {
+      if (map_->empty()) {
+        iter_ = map_->end();
+      } else {
+        iter_ = map_->find(map_->rbegin()->first);
+      }
+    }
+    virtual void Seek(const Slice& k) override {
+      iter_ = map_->lower_bound(k.ToString());
+    }
+    virtual void Next() override { ++iter_; }
+    virtual void Prev() override {
+      if (iter_ == map_->begin()) {
+        iter_ = map_->end();
+        return;
+      }
+      --iter_;
+    }
+
+    virtual Slice key() const override { return iter_->first; }
+    virtual Slice value() const override { return iter_->second; }
+    virtual Status status() const override { return Status::OK(); }
 
-  // Test max_bytes_for_level_multiplier and
-  // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
-  // After filling enough data that can fit in L1 - L3, we should see L1 size
-  // reduces to 128KB from 256KB which was asserted previously. Same for L2.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_bytes_for_level_multiplier", "2"},
-    {"max_bytes_for_level_base", ToString(k128KB) }
-  }));
+   private:
+    const KVMap* const map_;
+    const bool owned_;  // Do we own map_
+    KVMap::const_iterator iter_;
+  };
+  const Options options_;
+  KVMap map_;
+  std::string name_ = "";
+};
 
-  // writing 20 x 64KB = 10 x 128KB
-  // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
-  for (int i = 0; i < 20; ++i) {
-    gen_l0_kb(i, 64, 32);
-  }
-  dbfull()->TEST_WaitForCompact();
-  uint64_t total_size =
-    SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
-  ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
+static std::string RandomKey(Random* rnd, int minimum = 0) {
+  int len;
+  do {
+    len = (rnd->OneIn(3)
+               ? 1  // Short sometimes to encourage collisions
+               : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
+  } while (len < minimum);
+  return test::RandomKey(rnd, len);
+}
 
-  // Test level0_stop_writes_trigger.
-  // Clean up memtable and L0. Block compaction threads. If continue to write
-  // and flush memtables. We should see put stop after 8 memtable flushes
-  // since level0_stop_writes_trigger = 8
-  dbfull()->TEST_FlushMemTable(true);
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-  // Block compaction
-  test::SleepingBackgroundTask sleeping_task_low;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  sleeping_task_low.WaitUntilSleeping();
-  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
+static bool CompareIterators(int step, DB* model, DB* db,
+                             const Snapshot* model_snap,
+                             const Snapshot* db_snap) {
+  ReadOptions options;
+  options.snapshot = model_snap;
+  Iterator* miter = model->NewIterator(options);
+  options.snapshot = db_snap;
+  Iterator* dbiter = db->NewIterator(options);
+  bool ok = true;
   int count = 0;
-  Random rnd(301);
-  WriteOptions wo;
-  while (count < 64) {
-    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
-    dbfull()->TEST_FlushMemTable(true);
+  for (miter->SeekToFirst(), dbiter->SeekToFirst();
+       ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
     count++;
-    if (dbfull()->TEST_write_controler().IsStopped()) {
-      sleeping_task_low.WakeUp();
+    if (miter->key().compare(dbiter->key()) != 0) {
+      fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
+              EscapeString(miter->key()).c_str(),
+              EscapeString(dbiter->key()).c_str());
+      ok = false;
       break;
     }
-  }
-  // Stop trigger = 8
-  ASSERT_EQ(count, 8);
-  // Unblock
-  sleeping_task_low.WaitUntilDone();
-
-  // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
-  // Block compaction thread again. Perform the put and memtable flushes
-  // until we see the stop after 6 memtable flushes.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"level0_stop_writes_trigger", "6"}
-  }));
-  dbfull()->TEST_FlushMemTable(true);
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  // Block compaction again
-  sleeping_task_low.Reset();
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  sleeping_task_low.WaitUntilSleeping();
-  count = 0;
-  while (count < 64) {
-    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
-    dbfull()->TEST_FlushMemTable(true);
-    count++;
-    if (dbfull()->TEST_write_controler().IsStopped()) {
-      sleeping_task_low.WakeUp();
-      break;
+    if (miter->value().compare(dbiter->value()) != 0) {
+      fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
+              step, EscapeString(miter->key()).c_str(),
+              EscapeString(miter->value()).c_str(),
+              EscapeString(miter->value()).c_str());
+      ok = false;
     }
   }
-  ASSERT_EQ(count, 6);
-  // Unblock
-  sleeping_task_low.WaitUntilDone();
-
-  // Test disable_auto_compactions
-  // Compaction thread is unblocked but auto compaction is disabled. Write
-  // 4 L0 files and compaction should be triggered. If auto compaction is
-  // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
-  // L0 files do not change after the call.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"disable_auto_compactions", "true"}
-  }));
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  for (int i = 0; i < 4; ++i) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
-    // Wait for compaction so that put won't stop
-    dbfull()->TEST_FlushMemTable(true);
+  if (ok) {
+    if (miter->Valid() != dbiter->Valid()) {
+      fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
+              step, miter->Valid(), dbiter->Valid());
+      ok = false;
+    }
   }
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ(NumTableFilesAtLevel(0), 4);
+  delete miter;
+  delete dbiter;
+  return ok;
+}
 
-  // Enable auto compaction and perform the same test, # of L0 files should be
-  // reduced after compaction.
-  ASSERT_OK(dbfull()->SetOptions({
-    {"disable_auto_compactions", "false"}
-  }));
-  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
+class DBTestRandomized : public DBTest,
+                         public ::testing::WithParamInterface<int> {
+ public:
+  virtual void SetUp() override { option_config_ = GetParam(); }
 
-  for (int i = 0; i < 4; ++i) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
-    // Wait for compaction so that put won't stop
-    dbfull()->TEST_FlushMemTable(true);
+  static std::vector<int> GenerateOptionConfigs() {
+    std::vector<int> option_configs;
+    // skip cuckoo hash as it does not support snapshot.
+    for (int option_config = kDefault; option_config < kEnd; ++option_config) {
+      if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst |
+                                                kSkipNoSeekToLast |
+                                                kSkipHashCuckoo)) {
+        option_configs.push_back(option_config);
+      }
+    }
+    option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
+    return option_configs;
   }
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_LT(NumTableFilesAtLevel(0), 4);
-}
-#endif  // ROCKSDB_LITE
+};
 
-TEST_F(DBTest, FileCreationRandomFailure) {
-  Options options;
-  options.env = env_;
-  options.create_if_missing = true;
-  options.write_buffer_size = 100000;  // Small write buffer
-  options.target_file_size_base = 200000;
-  options.max_bytes_for_level_base = 1000000;
-  options.max_bytes_for_level_multiplier = 2;
+INSTANTIATE_TEST_CASE_P(
+    DBTestRandomized, DBTestRandomized,
+    ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
 
+TEST_P(DBTestRandomized, Randomized) {
+  anon::OptionsOverride options_override;
+  options_override.skip_policy = kSkipNoSnapshot;
+  Options options = CurrentOptions(options_override);
   DestroyAndReopen(options);
-  Random rnd(301);
 
-  const int kCDTKeysPerBuffer = 4;
-  const int kTestSize = kCDTKeysPerBuffer * 4096;
-  const int kTotalIteration = 100;
-  // the second half of the test involves in random failure
-  // of file creation.
-  const int kRandomFailureTest = kTotalIteration / 2;
-  std::vector<std::string> values;
-  for (int i = 0; i < kTestSize; ++i) {
-    values.push_back("NOT_FOUND");
-  }
-  for (int j = 0; j < kTotalIteration; ++j) {
-    if (j == kRandomFailureTest) {
-      env_->non_writeable_rate_.store(90);
-    }
-    for (int k = 0; k < kTestSize; ++k) {
-      // here we expect some of the Put fails.
-      std::string value = RandomString(&rnd, 100);
-      Status s = Put(Key(k), Slice(value));
-      if (s.ok()) {
-        // update the latest successful put
-        values[k] = value;
+  Random rnd(test::RandomSeed() + GetParam());
+  ModelDB model(options);
+  const int N = 10000;
+  const Snapshot* model_snap = nullptr;
+  const Snapshot* db_snap = nullptr;
+  std::string k, v;
+  for (int step = 0; step < N; step++) {
+    // TODO(sanjay): Test Get() works
+    int p = rnd.Uniform(100);
+    int minimum = 0;
+    if (option_config_ == kHashSkipList || option_config_ == kHashLinkList ||
+        option_config_ == kHashCuckoo ||
+        option_config_ == kPlainTableFirstBytePrefix ||
+        option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
+        option_config_ == kBlockBasedTableWithPrefixHashIndex) {
+      minimum = 1;
+    }
+    if (p < 45) {  // Put
+      k = RandomKey(&rnd, minimum);
+      v = RandomString(&rnd,
+                       rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8));
+      ASSERT_OK(model.Put(WriteOptions(), k, v));
+      ASSERT_OK(db_->Put(WriteOptions(), k, v));
+    } else if (p < 90) {  // Delete
+      k = RandomKey(&rnd, minimum);
+      ASSERT_OK(model.Delete(WriteOptions(), k));
+      ASSERT_OK(db_->Delete(WriteOptions(), k));
+    } else {  // Multi-element batch
+      WriteBatch b;
+      const int num = rnd.Uniform(8);
+      for (int i = 0; i < num; i++) {
+        if (i == 0 || !rnd.OneIn(10)) {
+          k = RandomKey(&rnd, minimum);
+        } else {
+          // Periodically re-use the same key from the previous iter, so
+          // we have multiple entries in the write batch for the same key
+        }
+        if (rnd.OneIn(2)) {
+          v = RandomString(&rnd, rnd.Uniform(10));
+          b.Put(k, v);
+        } else {
+          b.Delete(k);
+        }
       }
-      // But everything before we simulate the failure-test should succeed.
-      if (j < kRandomFailureTest) {
-        ASSERT_OK(s);
+      ASSERT_OK(model.Write(WriteOptions(), &b));
+      ASSERT_OK(db_->Write(WriteOptions(), &b));
+    }
+
+    if ((step % 100) == 0) {
+      // For DB instances that use the hash index + block-based table, the
+      // iterator will be invalid right when seeking a non-existent key, right
+      // than return a key that is close to it.
+      if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
+          option_config_ != kBlockBasedTableWithPrefixHashIndex) {
+        ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
+        ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
       }
+
+      // Save a snapshot from each DB this time that we'll use next
+      // time we compare things, to make sure the current state is
+      // preserved with the snapshot
+      if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
+      if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
+
+      Reopen(options);
+      ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
+
+      model_snap = model.GetSnapshot();
+      db_snap = db_->GetSnapshot();
     }
   }
+  if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
+  if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
+}
+
+TEST_F(DBTest, MultiGetSimple) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "k1", "v1"));
+    ASSERT_OK(Put(1, "k2", "v2"));
+    ASSERT_OK(Put(1, "k3", "v3"));
+    ASSERT_OK(Put(1, "k4", "v4"));
+    ASSERT_OK(Delete(1, "k4"));
+    ASSERT_OK(Put(1, "k5", "v5"));
+    ASSERT_OK(Delete(1, "no_key"));
+
+    std::vector<Slice> keys({"k1", "k2", "k3", "k4", "k5", "no_key"});
+
+    std::vector<std::string> values(20, "Temporary data to be overwritten");
+    std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
+
+    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
+    ASSERT_EQ(values.size(), keys.size());
+    ASSERT_EQ(values[0], "v1");
+    ASSERT_EQ(values[1], "v2");
+    ASSERT_EQ(values[2], "v3");
+    ASSERT_EQ(values[4], "v5");
 
-  // If rocksdb does not do the correct job, internal assert will fail here.
-  dbfull()->TEST_WaitForFlushMemTable();
-  dbfull()->TEST_WaitForCompact();
+    ASSERT_OK(s[0]);
+    ASSERT_OK(s[1]);
+    ASSERT_OK(s[2]);
+    ASSERT_TRUE(s[3].IsNotFound());
+    ASSERT_OK(s[4]);
+    ASSERT_TRUE(s[5].IsNotFound());
+  } while (ChangeCompactOptions());
+}
 
-  // verify we have the latest successful update
-  for (int k = 0; k < kTestSize; ++k) {
-    auto v = Get(Key(k));
-    ASSERT_EQ(v, values[k]);
-  }
+TEST_F(DBTest, MultiGetEmpty) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    // Empty Key Set
+    std::vector<Slice> keys;
+    std::vector<std::string> values;
+    std::vector<ColumnFamilyHandle*> cfs;
+    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
+    ASSERT_EQ(s.size(), 0U);
 
-  // reopen and reverify we have the latest successful update
-  env_->non_writeable_rate_.store(0);
-  Reopen(options);
-  for (int k = 0; k < kTestSize; ++k) {
-    auto v = Get(Key(k));
-    ASSERT_EQ(v, values[k]);
-  }
+    // Empty Database, Empty Key Set
+    Options options = CurrentOptions();
+    options.create_if_missing = true;
+    DestroyAndReopen(options);
+    CreateAndReopenWithCF({"pikachu"}, options);
+    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
+    ASSERT_EQ(s.size(), 0U);
+
+    // Empty Database, Search for Keys
+    keys.resize(2);
+    keys[0] = "a";
+    keys[1] = "b";
+    cfs.push_back(handles_[0]);
+    cfs.push_back(handles_[1]);
+    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
+    ASSERT_EQ(static_cast<int>(s.size()), 2);
+    ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound());
+  } while (ChangeCompactOptions());
 }
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, DynamicMiscOptions) {
-  // Test max_sequential_skip_in_iterations
-  Options options;
-  options.env = env_;
-  options.create_if_missing = true;
-  options.max_sequential_skip_in_iterations = 16;
-  options.compression = kNoCompression;
-  options.statistics = rocksdb::CreateDBStatistics();
-  DestroyAndReopen(options);
+TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
+  // create a DB with block prefix index
+  BlockBasedTableOptions table_options;
+  Options options = CurrentOptions();
+  table_options.index_type = BlockBasedTableOptions::kHashSearch;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
 
-  auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
-    int key0 = key_start;
-    int key1 = key_start + 1;
-    int key2 = key_start + 2;
-    Random rnd(301);
-    ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
-    for (int i = 0; i < 10; ++i) {
-      ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
-    }
-    ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
-    std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
-    iter->Seek(Key(key1));
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Key(key1)), 0);
-    iter->Next();
-    ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(iter->key().compare(Key(key2)), 0);
-    ASSERT_EQ(num_reseek,
-              TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
-  };
-  // No reseek
-  assert_reseek_count(100, 0);
+  Reopen(options);
+  ASSERT_OK(Put("k1", "v1"));
+  Flush();
+  ASSERT_OK(Put("k2", "v2"));
 
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_sequential_skip_in_iterations", "4"}
-  }));
-  // Clear memtable and make new option effective
-  dbfull()->TEST_FlushMemTable(true);
-  // Trigger reseek
-  assert_reseek_count(200, 1);
+  // Reopen it without prefix extractor, make sure everything still works.
+  // RocksDB should just fall back to the binary index.
+  table_options.index_type = BlockBasedTableOptions::kBinarySearch;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  options.prefix_extractor.reset();
 
-  ASSERT_OK(dbfull()->SetOptions({
-    {"max_sequential_skip_in_iterations", "16"}
-  }));
-  // Clear memtable and make new option effective
-  dbfull()->TEST_FlushMemTable(true);
-  // No reseek
-  assert_reseek_count(300, 1);
+  Reopen(options);
+  ASSERT_EQ("v1", Get("k1"));
+  ASSERT_EQ("v2", Get("k2"));
 }
-#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, DontDeletePendingOutputs) {
-  Options options;
-  options.env = env_;
-  options.create_if_missing = true;
-  DestroyAndReopen(options);
+TEST_F(DBTest, ChecksumTest) {
+  BlockBasedTableOptions table_options;
+  Options options = CurrentOptions();
 
-  // Every time we write to a table file, call FOF/POF with full DB scan. This
-  // will make sure our pending_outputs_ protection work correctly
-  std::function<void()> purge_obsolete_files_function = [&]() {
-    JobContext job_context(0);
-    dbfull()->TEST_LockMutex();
-    dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
-    dbfull()->TEST_UnlockMutex();
-    dbfull()->PurgeObsoleteFiles(job_context);
-    job_context.Clean();
-  };
+  table_options.checksum = kCRC32c;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  Reopen(options);
+  ASSERT_OK(Put("a", "b"));
+  ASSERT_OK(Put("c", "d"));
+  ASSERT_OK(Flush());  // table with crc checksum
 
-  env_->table_write_callback_ = &purge_obsolete_files_function;
+  table_options.checksum = kxxHash;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  Reopen(options);
+  ASSERT_OK(Put("e", "f"));
+  ASSERT_OK(Put("g", "h"));
+  ASSERT_OK(Flush());  // table with xxhash checksum
 
-  for (int i = 0; i < 2; ++i) {
-    ASSERT_OK(Put("a", "begin"));
-    ASSERT_OK(Put("z", "end"));
-    ASSERT_OK(Flush());
-  }
+  table_options.checksum = kCRC32c;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  Reopen(options);
+  ASSERT_EQ("b", Get("a"));
+  ASSERT_EQ("d", Get("c"));
+  ASSERT_EQ("f", Get("e"));
+  ASSERT_EQ("h", Get("g"));
 
-  // If pending output guard does not work correctly, PurgeObsoleteFiles() will
-  // delete the file that Compaction is trying to create, causing this: error
-  // db/db_test.cc:975: IO error:
-  // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory
-  Compact("a", "b");
+  table_options.checksum = kCRC32c;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  Reopen(options);
+  ASSERT_EQ("b", Get("a"));
+  ASSERT_EQ("d", Get("c"));
+  ASSERT_EQ("f", Get("e"));
+  ASSERT_EQ("h", Get("g"));
 }
 
 #ifndef ROCKSDB_LITE
-TEST_F(DBTest, DontDeleteMovedFile) {
-  // This test triggers move compaction and verifies that the file is not
-  // deleted when it's part of move compaction
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.create_if_missing = true;
-  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
-  options.level0_file_num_compaction_trigger =
-      2;  // trigger compaction when we have 2 files
-  DestroyAndReopen(options);
+TEST_P(DBTestWithParam, FIFOCompactionTest) {
+  for (int iter = 0; iter < 2; ++iter) {
+    // first iteration -- auto compaction
+    // second iteration -- manual compaction
+    Options options;
+    options.compaction_style = kCompactionStyleFIFO;
+    options.write_buffer_size = 100 << 10;  // 100KB
+    options.arena_block_size = 4096;
+    options.compaction_options_fifo.max_table_files_size = 500 << 10;  // 500KB
+    options.compression = kNoCompression;
+    options.create_if_missing = true;
+    options.max_subcompactions = max_subcompactions_;
+    if (iter == 1) {
+      options.disable_auto_compactions = true;
+    }
+    options = CurrentOptions(options);
+    DestroyAndReopen(options);
 
-  Random rnd(301);
-  // Create two 1MB sst files
-  for (int i = 0; i < 2; ++i) {
-    // Create 1MB sst file
-    for (int j = 0; j < 100; ++j) {
-      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
+    Random rnd(301);
+    for (int i = 0; i < 6; ++i) {
+      for (int j = 0; j < 110; ++j) {
+        ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 980)));
+      }
+      // flush should happen here
+      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+    }
+    if (iter == 0) {
+      ASSERT_OK(dbfull()->TEST_WaitForCompact());
+    } else {
+      CompactRangeOptions cro;
+      cro.exclusive_manual_compaction = exclusive_manual_compaction_;
+      ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
+    }
+    // only 5 files should survive
+    ASSERT_EQ(NumTableFilesAtLevel(0), 5);
+    for (int i = 0; i < 50; ++i) {
+      // these keys should be deleted in previous compaction
+      ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
     }
-    ASSERT_OK(Flush());
   }
-  // this should execute both L0->L1 and L1->(move)->L2 compactions
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ("0,0,1", FilesPerLevel(0));
+}
+#endif  // ROCKSDB_LITE
 
-  // If the moved file is actually deleted (the move-safeguard in
-  // ~Version::Version() is not there), we get this failure:
-  // Corruption: Can't access /000009.sst
-  Reopen(options);
+// verify that we correctly deprecated timeout_hint_us
+TEST_F(DBTest, SimpleWriteTimeoutTest) {
+  WriteOptions write_opt;
+  write_opt.timeout_hint_us = 0;
+  ASSERT_OK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
+  write_opt.timeout_hint_us = 10;
+  ASSERT_NOK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
 }
 
-TEST_F(DBTest, OptimizeFiltersForHits) {
+#ifndef ROCKSDB_LITE
+/*
+ * This test is not reliable enough as it heavily depends on disk behavior.
+ */
+TEST_F(DBTest, RateLimitingTest) {
   Options options = CurrentOptions();
-  options.write_buffer_size = 64 * 1024;
-  options.arena_block_size = 4 * 1024;
-  options.target_file_size_base = 64 * 1024;
+  options.write_buffer_size = 1 << 20;  // 1MB
   options.level0_file_num_compaction_trigger = 2;
-  options.level0_slowdown_writes_trigger = 2;
-  options.level0_stop_writes_trigger = 4;
-  options.max_bytes_for_level_base = 256 * 1024;
-  options.max_write_buffer_number = 2;
-  options.max_background_compactions = 8;
-  options.max_background_flushes = 8;
+  options.target_file_size_base = 1 << 20;     // 1MB
+  options.max_bytes_for_level_base = 4 << 20;  // 4MB
+  options.max_bytes_for_level_multiplier = 4;
   options.compression = kNoCompression;
-  options.compaction_style = kCompactionStyleLevel;
-  options.level_compaction_dynamic_level_bytes = true;
-  BlockBasedTableOptions bbto;
-  bbto.cache_index_and_filter_blocks = true;
-  bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
-  bbto.whole_key_filtering = true;
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-  options.optimize_filters_for_hits = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  CreateAndReopenWithCF({"mypikachu"}, options);
+  options.create_if_missing = true;
+  options.env = env_;
+  options.IncreaseParallelism(4);
+  DestroyAndReopen(options);
 
-  int numkeys = 200000;
+  WriteOptions wo;
+  wo.disableWAL = true;
 
-  // Generate randomly shuffled keys, so the updates are almost
-  // random.
-  std::vector<int> keys;
-  keys.reserve(numkeys);
-  for (int i = 0; i < numkeys; i += 2) {
-    keys.push_back(i);
+  // # no rate limiting
+  Random rnd(301);
+  uint64_t start = env_->NowMicros();
+  // Write ~96M data
+  for (int64_t i = 0; i < (96 << 10); ++i) {
+    ASSERT_OK(
+        Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
   }
-  std::random_shuffle(std::begin(keys), std::end(keys));
+  uint64_t elapsed = env_->NowMicros() - start;
+  double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed;
+  Close();
+
+  // # rate limiting with 0.7 x threshold
+  options.rate_limiter.reset(
+      NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
+  env_->bytes_written_ = 0;
+  DestroyAndReopen(options);
+
+  start = env_->NowMicros();
+  // Write ~96M data
+  for (int64_t i = 0; i < (96 << 10); ++i) {
+    ASSERT_OK(
+        Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
+  }
+  elapsed = env_->NowMicros() - start;
+  Close();
+  ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
+  double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
+  fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
+  ASSERT_TRUE(ratio < 0.8);
+
+  // # rate limiting with half of the raw_rate
+  options.rate_limiter.reset(
+      NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
+  env_->bytes_written_ = 0;
+  DestroyAndReopen(options);
 
-  int num_inserted = 0;
-  for (int key : keys) {
-    ASSERT_OK(Put(1, Key(key), "val"));
-    if (++num_inserted % 1000 == 0) {
-      dbfull()->TEST_WaitForFlushMemTable();
-      dbfull()->TEST_WaitForCompact();
-    }
+  start = env_->NowMicros();
+  // Write ~96M data
+  for (int64_t i = 0; i < (96 << 10); ++i) {
+    ASSERT_OK(
+        Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
   }
-  ASSERT_OK(Put(1, Key(0), "val"));
-  ASSERT_OK(Put(1, Key(numkeys), "val"));
-  ASSERT_OK(Flush(1));
-  dbfull()->TEST_WaitForCompact();
+  elapsed = env_->NowMicros() - start;
+  Close();
+  ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
+  ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
+  fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
+  ASSERT_LT(ratio, 0.6);
+}
 
-  if (NumTableFilesAtLevel(0, 1) == 0) {
-    // No Level 0 file. Create one.
-    ASSERT_OK(Put(1, Key(0), "val"));
-    ASSERT_OK(Put(1, Key(numkeys), "val"));
-    ASSERT_OK(Flush(1));
-    dbfull()->TEST_WaitForCompact();
-  }
+TEST_F(DBTest, TableOptionsSanitizeTest) {
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
+  ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
 
-  for (int i = 1; i < numkeys; i += 2) {
-    ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
-  }
+  options.table_factory.reset(new PlainTableFactory());
+  options.prefix_extractor.reset(NewNoopTransform());
+  Destroy(options);
+  ASSERT_TRUE(!TryReopen(options).IsNotSupported());
 
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
+  // Test for check of prefix_extractor when hash index is used for
+  // block-based table
+  BlockBasedTableOptions to;
+  to.index_type = BlockBasedTableOptions::kHashSearch;
+  options = CurrentOptions();
+  options.create_if_missing = true;
+  options.table_factory.reset(NewBlockBasedTableFactory(to));
+  ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
+  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+  ASSERT_OK(TryReopen(options));
+}
 
-  // Now we have three sorted run, L0, L5 and L6 with most files in L6 have
-  // no bloom filter. Most keys be checked bloom filters twice.
-  ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2);
-  ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2);
+TEST_F(DBTest, MmapAndBufferOptions) {
+  Options options = CurrentOptions();
 
-  for (int i = 0; i < numkeys; i += 2) {
-    ASSERT_EQ(Get(1, Key(i)), "val");
-  }
+  // If allow_mmap_reads is on allow_os_buffer must also be on
+  // On Windows you can have either memory mapped file or a file
+  // with unbuffered access.
+#ifndef OS_WIN
+  options.allow_os_buffer = false;
+#endif
+  options.allow_mmap_reads = true;
+  ASSERT_NOK(TryReopen(options));
 
-  // Part 2 (read path): rewrite last level with blooms, then verify they get
-  // cached only if !optimize_filters_for_hits
-  options.disable_auto_compactions = true;
-  options.num_levels = 9;
-  options.optimize_filters_for_hits = false;
-  options.statistics = CreateDBStatistics();
-  bbto.block_cache.reset();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
-  MoveFilesToLevel(7 /* level */, 1 /* column family index */);
-
-  std::string value = Get(1, Key(0));
-  uint64_t prev_cache_filter_hits =
-      TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
-  value = Get(1, Key(0));
-  ASSERT_EQ(prev_cache_filter_hits + 1,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-
-  // Now that we know the filter blocks exist in the last level files, see if
-  // filter caching is skipped for this optimization
-  options.optimize_filters_for_hits = true;
-  options.statistics = CreateDBStatistics();
-  bbto.block_cache.reset();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
-
-  value = Get(1, Key(0));
-  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-  ASSERT_EQ(2 /* index and data block */,
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
-
-  // Check filter block ignored for files preloaded during DB::Open()
-  options.max_open_files = -1;
-  options.statistics = CreateDBStatistics();
-  bbto.block_cache.reset();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
-
-  uint64_t prev_cache_filter_misses =
-      TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
-  prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
-  Get(1, Key(0));
-  ASSERT_EQ(prev_cache_filter_misses,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(prev_cache_filter_hits,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-
-  // Check filter block ignored for file trivially-moved to bottom level
-  bbto.block_cache.reset();
-  options.max_open_files = 100;  // setting > -1 makes it not preload all files
-  options.statistics = CreateDBStatistics();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
-
-  ASSERT_OK(Put(1, Key(numkeys + 1), "val"));
-  ASSERT_OK(Flush(1));
+  // All other combinations are acceptable
+  options.allow_os_buffer = true;
+  ASSERT_OK(TryReopen(options));
 
-  int32_t trivial_move = 0;
-  int32_t non_trivial_move = 0;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::BackgroundCompaction:TrivialMove",
-      [&](void* arg) { trivial_move++; });
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::BackgroundCompaction:NonTrivial",
-      [&](void* arg) { non_trivial_move++; });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  options.allow_os_buffer = false;
+  options.allow_mmap_reads = false;
+  ASSERT_OK(TryReopen(options));
 
-  CompactRangeOptions compact_options;
-  compact_options.bottommost_level_compaction =
-      BottommostLevelCompaction::kSkip;
-  compact_options.change_level = true;
-  compact_options.target_level = 7;
-  db_->CompactRange(compact_options, handles_[1], nullptr, nullptr);
-
-  ASSERT_EQ(trivial_move, 1);
-  ASSERT_EQ(non_trivial_move, 0);
-
-  prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
-  prev_cache_filter_misses =
-      TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
-  value = Get(1, Key(numkeys + 1));
-  ASSERT_EQ(prev_cache_filter_hits,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-  ASSERT_EQ(prev_cache_filter_misses,
-            TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-
-  // Check filter block not cached for iterator
-  bbto.block_cache.reset();
-  options.statistics = CreateDBStatistics();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  ReopenWithColumnFamilies({"default", "mypikachu"}, options);
-
-  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions(), handles_[1]));
-  iter->SeekToFirst();
-  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
-  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
-  ASSERT_EQ(2 /* index and data block */,
-            TestGetTickerCount(options, BLOCK_CACHE_ADD));
+  options.allow_os_buffer = true;
+  ASSERT_OK(TryReopen(options));
 }
-#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, L0L1L2AndUpHitCounter) {
+TEST_F(DBTest, ConcurrentMemtableNotSupported) {
   Options options = CurrentOptions();
-  options.write_buffer_size = 32 * 1024;
-  options.target_file_size_base = 32 * 1024;
-  options.level0_file_num_compaction_trigger = 2;
-  options.level0_slowdown_writes_trigger = 2;
-  options.level0_stop_writes_trigger = 4;
-  options.max_bytes_for_level_base = 64 * 1024;
-  options.max_write_buffer_number = 2;
-  options.max_background_compactions = 8;
-  options.max_background_flushes = 8;
-  options.statistics = rocksdb::CreateDBStatistics();
-  CreateAndReopenWithCF({"mypikachu"}, options);
+  options.allow_concurrent_memtable_write = true;
+  options.soft_pending_compaction_bytes_limit = 0;
+  options.hard_pending_compaction_bytes_limit = 100;
+  options.create_if_missing = true;
 
-  int numkeys = 20000;
-  for (int i = 0; i < numkeys; i++) {
-    ASSERT_OK(Put(1, Key(i), "val"));
-  }
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
-  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
+  DestroyDB(dbname_, options);
+  options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
+  ASSERT_NOK(TryReopen(options));
 
-  ASSERT_OK(Flush(1));
-  dbfull()->TEST_WaitForCompact();
+  options.memtable_factory.reset(new SkipListFactory);
+  ASSERT_OK(TryReopen(options));
 
-  for (int i = 0; i < numkeys; i++) {
-    ASSERT_EQ(Get(1, Key(i)), "val");
-  }
+  ColumnFamilyOptions cf_options(options);
+  cf_options.memtable_factory.reset(
+      NewHashLinkListRepFactory(4, 0, 3, true, 4));
+  ColumnFamilyHandle* handle;
+  ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
+}
 
-  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
-  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
-  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);
+#endif  // ROCKSDB_LITE
 
-  ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
-                         TestGetTickerCount(options, GET_HIT_L1) +
-                         TestGetTickerCount(options, GET_HIT_L2_AND_UP));
-}
+TEST_F(DBTest, SanitizeNumThreads) {
+  for (int attempt = 0; attempt < 2; attempt++) {
+    const size_t kTotalTasks = 8;
+    test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
 
-TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
-  // iter 0 -- zlib
-  // iter 1 -- bzip2
-  // iter 2 -- lz4
-  // iter 3 -- lz4HC
-  CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
-                                    kLZ4Compression,  kLZ4HCCompression};
-  for (int iter = 0; iter < 4; ++iter) {
-    if (!CompressionTypeSupported(compressions[iter])) {
-      continue;
+    Options options = CurrentOptions();
+    if (attempt == 0) {
+      options.max_background_compactions = 3;
+      options.max_background_flushes = 2;
     }
-    // first_table_version 1 -- generate with table_version == 1, read with
-    // table_version == 2
-    // first_table_version 2 -- generate with table_version == 2, read with
-    // table_version == 1
-    for (int first_table_version = 1; first_table_version <= 2;
-         ++first_table_version) {
-      BlockBasedTableOptions table_options;
-      table_options.format_version = first_table_version;
-      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
-      Options options = CurrentOptions();
-      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-      options.create_if_missing = true;
-      options.compression = compressions[iter];
-      DestroyAndReopen(options);
+    options.create_if_missing = true;
+    DestroyAndReopen(options);
 
-      int kNumKeysWritten = 100000;
+    for (size_t i = 0; i < kTotalTasks; i++) {
+      // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
+      env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
+                     &sleeping_tasks[i],
+                     (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
+    }
 
-      Random rnd(301);
-      for (int i = 0; i < kNumKeysWritten; ++i) {
-        // compressible string
-        ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a')));
-      }
+    // Wait 100 milliseconds for they are scheduled.
+    env_->SleepForMicroseconds(100000);
 
-      table_options.format_version = first_table_version == 1 ? 2 : 1;
-      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-      Reopen(options);
-      for (int i = 0; i < kNumKeysWritten; ++i) {
-        auto r = Get(Key(i));
-        ASSERT_EQ(r.substr(128), std::string(128, 'a'));
-      }
+    // pool size 3, total task 4. Queue size should be 1.
+    ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
+    // pool size 2, total task 4. Queue size should be 2.
+    ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
+
+    for (size_t i = 0; i < kTotalTasks; i++) {
+      sleeping_tasks[i].WakeUp();
+      sleeping_tasks[i].WaitUntilDone();
     }
+
+    ASSERT_OK(Put("abc", "def"));
+    ASSERT_EQ("def", Get("abc"));
+    Flush();
+    ASSERT_EQ("def", Get("abc"));
   }
 }
 
-TEST_F(DBTest, MutexWaitStatsDisabledByDefault) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  CreateAndReopenWithCF({"pikachu"}, options);
-  const uint64_t kMutexWaitDelay = 100;
-  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT,
-                                       kMutexWaitDelay);
-  ASSERT_OK(Put("hello", "rocksdb"));
-  ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0);
-  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0);
-}
+TEST_F(DBTest, WriteSingleThreadEntry) {
+  std::vector<std::thread> threads;
+  dbfull()->TEST_LockMutex();
+  auto w = dbfull()->TEST_BeginWrite();
+  threads.emplace_back([&] { Put("a", "b"); });
+  env_->SleepForMicroseconds(10000);
+  threads.emplace_back([&] { Flush(); });
+  env_->SleepForMicroseconds(10000);
+  dbfull()->TEST_UnlockMutex();
+  dbfull()->TEST_LockMutex();
+  dbfull()->TEST_EndWrite(w);
+  dbfull()->TEST_UnlockMutex();
 
-TEST_F(DBTest, MutexWaitStats) {
-  Options options = CurrentOptions();
-  options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
-  options.statistics->stats_level_ = StatsLevel::kAll;
-  CreateAndReopenWithCF({"pikachu"}, options);
-  const uint64_t kMutexWaitDelay = 100;
-  ThreadStatusUtil::TEST_SetStateDelay(
-      ThreadStatus::STATE_MUTEX_WAIT, kMutexWaitDelay);
-  ASSERT_OK(Put("hello", "rocksdb"));
-  ASSERT_GE(TestGetTickerCount(
-            options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay);
-  ThreadStatusUtil::TEST_SetStateDelay(
-      ThreadStatus::STATE_MUTEX_WAIT, 0);
+  for (auto& t : threads) {
+    t.join();
+  }
 }
 
-#ifndef ROCKSDB_LITE
-// This reproduces a bug where we don't delete a file because when it was
-// supposed to be deleted, it was blocked by pending_outputs
-// Consider:
-// 1. current file_number is 13
-// 2. compaction (1) starts, blocks deletion of all files starting with 13
-// (pending outputs)
-// 3. file 13 is created by compaction (2)
-// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file
-// 13 has no references, it is put into VersionSet::obsolete_files_
-// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13
-// is deleted from obsolete_files_ set.
-// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by
-// pending outputs since compaction (1) is still running. It is not deleted and
-// it is not present in obsolete_files_ anymore. Therefore, we never delete it.
-TEST_F(DBTest, DeleteObsoleteFilesPendingOutputs) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.write_buffer_size = 2 * 1024 * 1024;     // 2 MB
-  options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
-  options.level0_file_num_compaction_trigger =
-      2;  // trigger compaction when we have 2 files
-  options.max_background_flushes = 2;
-  options.max_background_compactions = 2;
-
-  OnFileDeletionListener* listener = new OnFileDeletionListener();
-  options.listeners.emplace_back(listener);
-
-  Reopen(options);
-
-  Random rnd(301);
-  // Create two 1MB sst files
-  for (int i = 0; i < 2; ++i) {
-    // Create 1MB sst file
-    for (int j = 0; j < 100; ++j) {
-      ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
-    }
-    ASSERT_OK(Flush());
-  }
-  // this should execute both L0->L1 and L1->(move)->L2 compactions
-  dbfull()->TEST_WaitForCompact();
-  ASSERT_EQ("0,0,1", FilesPerLevel(0));
+TEST_F(DBTest, DisableDataSyncTest) {
+  env_->sync_counter_.store(0);
+  // iter 0 -- no sync
+  // iter 1 -- sync
+  for (int iter = 0; iter < 2; ++iter) {
+    Options options = CurrentOptions();
+    options.disableDataSync = iter == 0;
+    options.create_if_missing = true;
+    options.num_levels = 10;
+    options.env = env_;
+    Reopen(options);
+    CreateAndReopenWithCF({"pikachu"}, options);
 
-  test::SleepingBackgroundTask blocking_thread;
-  port::Mutex mutex_;
-  bool already_blocked(false);
+    MakeTables(10, "a", "z");
+    Compact("a", "z");
 
-  // block the flush
-  std::function<void()> block_first_time = [&]() {
-    bool blocking = false;
-    {
-      MutexLock l(&mutex_);
-      if (!already_blocked) {
-        blocking = true;
-        already_blocked = true;
-      }
-    }
-    if (blocking) {
-      blocking_thread.DoSleep();
+    if (iter == 0) {
+      ASSERT_EQ(env_->sync_counter_.load(), 0);
+    } else {
+      ASSERT_GT(env_->sync_counter_.load(), 0);
     }
-  };
-  env_->table_write_callback_ = &block_first_time;
-  // Create 1MB sst file
-  for (int j = 0; j < 256; ++j) {
-    ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024)));
+    Destroy(options);
   }
-  // this should trigger a flush, which is blocked with block_first_time
-  // pending_file is protecting all the files created after
+}
 
-  ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr));
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, DynamicMemtableOptions) {
+  const uint64_t k64KB = 1 << 16;
+  const uint64_t k128KB = 1 << 17;
+  const uint64_t k5KB = 5 * 1024;
+  const int kNumPutsBeforeWaitForFlush = 64;
+  Options options;
+  options.env = env_;
+  options.create_if_missing = true;
+  options.compression = kNoCompression;
+  options.max_background_compactions = 1;
+  options.write_buffer_size = k64KB;
+  options.arena_block_size = 16 * 1024;
+  options.max_write_buffer_number = 2;
+  // Don't trigger compact/slowdown/stop
+  options.level0_file_num_compaction_trigger = 1024;
+  options.level0_slowdown_writes_trigger = 1024;
+  options.level0_stop_writes_trigger = 1024;
+  DestroyAndReopen(options);
 
-  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
-  std::vector<LiveFileMetaData> metadata;
-  db_->GetLiveFilesMetaData(&metadata);
-  ASSERT_EQ(metadata.size(), 1U);
-  auto file_on_L2 = metadata[0].name;
-  listener->SetExpectedFileName(dbname_ + file_on_L2);
+  auto gen_l0_kb = [this, kNumPutsBeforeWaitForFlush](int size) {
+    Random rnd(301);
+    for (int i = 0; i < size; i++) {
+      ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
 
-  ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr,
-                                        true /* disallow trivial move */));
-  ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0));
+      // The following condition prevents a race condition between flush jobs
+      // acquiring work and this thread filling up multiple memtables. Without
+      // this, the flush might produce less files than expected because
+      // multiple memtables are flushed into a single L0 file. This race
+      // condition affects assertion (A).
+      if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
+        dbfull()->TEST_WaitForFlushMemTable();
+      }
+    }
+    dbfull()->TEST_WaitForFlushMemTable();
+  };
 
-  // finish the flush!
-  blocking_thread.WakeUp();
-  blocking_thread.WaitUntilDone();
-  dbfull()->TEST_WaitForFlushMemTable();
-  ASSERT_EQ("1,0,0,0,1", FilesPerLevel(0));
+  // Test write_buffer_size
+  gen_l0_kb(64);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
+  ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
+  ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
 
-  metadata.clear();
-  db_->GetLiveFilesMetaData(&metadata);
-  ASSERT_EQ(metadata.size(), 2U);
+  // Clean up L0
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  // This file should have been deleted during last compaction
-  ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2));
-  listener->VerifyMatchedCount(1);
-}
-#endif  // ROCKSDB_LITE
+  // Increase buffer size
+  ASSERT_OK(dbfull()->SetOptions({
+      {"write_buffer_size", "131072"},
+  }));
 
-TEST_F(DBTest, CloseSpeedup) {
-  Options options = CurrentOptions();
-  options.compaction_style = kCompactionStyleLevel;
-  options.write_buffer_size = 110 << 10;  // 110KB
-  options.arena_block_size = 4 << 10;
-  options.level0_file_num_compaction_trigger = 2;
-  options.num_levels = 4;
-  options.max_bytes_for_level_base = 400 * 1024;
-  options.max_write_buffer_number = 16;
+  // The existing memtable is still 64KB in size, after it becomes immutable,
+  // the next memtable will be 128KB in size. Write 256KB total, we should
+  // have a 64KB L0 file, a 128KB L0 file, and a memtable with 64KB data
+  gen_l0_kb(256);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 2);  // (A)
+  ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
+  ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
 
-  // Block background threads
+  // Test max_write_buffer_number
+  // Block compaction thread, which will also block the flushes because
+  // max_background_flushes == 0, so flushes are getting executed by the
+  // compaction thread
   env_->SetBackgroundThreads(1, Env::LOW);
-  env_->SetBackgroundThreads(1, Env::HIGH);
   test::SleepingBackgroundTask sleeping_task_low;
   env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                  Env::Priority::LOW);
-  test::SleepingBackgroundTask sleeping_task_high;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
-                 &sleeping_task_high, Env::Priority::HIGH);
-
-  std::vector<std::string> filenames;
-  env_->GetChildren(dbname_, &filenames);
-  // Delete archival files.
-  for (size_t i = 0; i < filenames.size(); ++i) {
-    env_->DeleteFile(dbname_ + "/" + filenames[i]);
-  }
-  env_->DeleteDir(dbname_);
+  // Start from scratch and disable compaction/flush. Flush can only happen
+  // during compaction but trigger is pretty high
+  options.max_background_flushes = 0;
+  options.disable_auto_compactions = true;
   DestroyAndReopen(options);
 
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-  env_->SetBackgroundThreads(1, Env::LOW);
-  env_->SetBackgroundThreads(1, Env::HIGH);
+  // Put until writes are stopped, bounded by 256 puts. We should see stop at
+  // ~128KB
+  int count = 0;
   Random rnd(301);
-  int key_idx = 0;
 
-  // First three 110KB files are not going to level 2
-  // After that, (100K, 200K)
-  for (int num = 0; num < 5; num++) {
-    GenerateNewFile(&rnd, &key_idx, true);
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::DelayWrite:Wait",
+      [&](void* arg) { sleeping_task_low.WakeUp(); });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+  while (!sleeping_task_low.WokenUp() && count < 256) {
+    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
+    count++;
   }
+  ASSERT_GT(static_cast<double>(count), 128 * 0.8);
+  ASSERT_LT(static_cast<double>(count), 128 * 1.2);
 
-  ASSERT_EQ(0, GetSstFileCount(dbname_));
+  sleeping_task_low.WaitUntilDone();
 
-  Close();
-  ASSERT_EQ(0, GetSstFileCount(dbname_));
+  // Increase
+  ASSERT_OK(dbfull()->SetOptions({
+      {"max_write_buffer_number", "8"},
+  }));
+  // Clean up memtable and L0
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
 
-  // Unblock background threads
-  sleeping_task_high.WakeUp();
-  sleeping_task_high.WaitUntilDone();
-  sleeping_task_low.WakeUp();
+  sleeping_task_low.Reset();
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  count = 0;
+  while (!sleeping_task_low.WokenUp() && count < 1024) {
+    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
+    count++;
+  }
+// Windows fails this test. Will tune in the future and figure out
+// approp number
+#ifndef OS_WIN
+  ASSERT_GT(static_cast<double>(count), 512 * 0.8);
+  ASSERT_LT(static_cast<double>(count), 512 * 1.2);
+#endif
   sleeping_task_low.WaitUntilDone();
 
-  Destroy(options);
-}
+  // Decrease
+  ASSERT_OK(dbfull()->SetOptions({
+      {"max_write_buffer_number", "4"},
+  }));
+  // Clean up memtable and L0
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
 
-class DelayedMergeOperator : public MergeOperator {
- private:
-  DBTest* db_test_;
+  sleeping_task_low.Reset();
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
 
- public:
-  explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
-  virtual bool FullMerge(const Slice& key, const Slice* existing_value,
-                         const std::deque<std::string>& operand_list,
-                         std::string* new_value,
-                         Logger* logger) const override {
-    db_test_->env_->addon_time_.fetch_add(1000);
-    *new_value = "";
-    return true;
+  count = 0;
+  while (!sleeping_task_low.WokenUp() && count < 1024) {
+    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
+    count++;
   }
+// Windows fails this test. Will tune in the future and figure out
+// approp number
+#ifndef OS_WIN
+  ASSERT_GT(static_cast<double>(count), 256 * 0.8);
+  ASSERT_LT(static_cast<double>(count), 266 * 1.2);
+#endif
+  sleeping_task_low.WaitUntilDone();
 
-  virtual const char* Name() const override { return "DelayedMergeOperator"; }
-};
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+}
+#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, MergeTestTime) {
-  std::string one, two, three;
-  PutFixed64(&one, 1);
-  PutFixed64(&two, 2);
-  PutFixed64(&three, 3);
+#if ROCKSDB_USING_THREAD_STATUS
+namespace {
+void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
+                          int expected_count) {
+  int op_count = 0;
+  std::vector<ThreadStatus> thread_list;
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  for (auto thread : thread_list) {
+    if (thread.operation_type == op_type) {
+      op_count++;
+    }
+  }
+  ASSERT_EQ(op_count, expected_count);
+}
+}  // namespace
 
-  // Enable time profiling
-  SetPerfLevel(kEnableTime);
-  this->env_->addon_time_.store(0);
-  this->env_->time_elapse_only_sleep_ = true;
-  this->env_->no_sleep_ = true;
-  Options options = CurrentOptions();
-  options.statistics = rocksdb::CreateDBStatistics();
-  options.merge_operator.reset(new DelayedMergeOperator(this));
-  DestroyAndReopen(options);
+TEST_F(DBTest, GetThreadStatus) {
+  Options options;
+  options.env = env_;
+  options.enable_thread_tracking = true;
+  TryReopen(options);
 
-  ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
-  db_->Put(WriteOptions(), "foo", one);
-  ASSERT_OK(Flush());
-  ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
-  ASSERT_OK(Flush());
-  ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
-  ASSERT_OK(Flush());
+  std::vector<ThreadStatus> thread_list;
+  Status s = env_->GetThreadList(&thread_list);
+
+  for (int i = 0; i < 2; ++i) {
+    // repeat the test with differet number of high / low priority threads
+    const int kTestCount = 3;
+    const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
+    const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
+    for (int test = 0; test < kTestCount; ++test) {
+      // Change the number of threads in high / low priority pool.
+      env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
+      env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
+      // Wait to ensure the all threads has been registered
+      env_->SleepForMicroseconds(100000);
+      s = env_->GetThreadList(&thread_list);
+      ASSERT_OK(s);
+      unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
+      memset(thread_type_counts, 0, sizeof(thread_type_counts));
+      for (auto thread : thread_list) {
+        ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
+        thread_type_counts[thread.thread_type]++;
+      }
+      // Verify the total number of threades
+      ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY] +
+                    thread_type_counts[ThreadStatus::LOW_PRIORITY],
+                kHighPriCounts[test] + kLowPriCounts[test]);
+      // Verify the number of high-priority threads
+      ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY],
+                kHighPriCounts[test]);
+      // Verify the number of low-priority threads
+      ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY],
+                kLowPriCounts[test]);
+    }
+    if (i == 0) {
+      // repeat the test with multiple column families
+      CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
+      env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
+                                                                     true);
+    }
+  }
+  db_->DropColumnFamily(handles_[2]);
+  delete handles_[2];
+  handles_.erase(handles_.begin() + 2);
+  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
+                                                                 true);
+  Close();
+  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
+                                                                 true);
+}
+
+TEST_F(DBTest, DisableThreadStatus) {
+  Options options;
+  options.env = env_;
+  options.enable_thread_tracking = false;
+  TryReopen(options);
+  CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
+  // Verify non of the column family info exists
+  env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
+                                                                 false);
+}
 
-  ReadOptions opt;
-  opt.verify_checksums = true;
-  opt.snapshot = nullptr;
-  std::string result;
-  db_->Get(opt, "foo", &result);
+TEST_F(DBTest, ThreadStatusFlush) {
+  Options options;
+  options.env = env_;
+  options.write_buffer_size = 100000;  // Small write buffer
+  options.enable_thread_tracking = true;
+  options = CurrentOptions(options);
 
-  ASSERT_EQ(1000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
+  rocksdb::SyncPoint::GetInstance()->LoadDependency({
+      {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
+      {"DBTest::ThreadStatusFlush:2",
+       "FlushJob::LogAndNotifyTableFileCreation()"},
+  });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-  ReadOptions read_options;
-  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
-  int count = 0;
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    ASSERT_OK(iter->status());
-    ++count;
-  }
+  CreateAndReopenWithCF({"pikachu"}, options);
+  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
 
-  ASSERT_EQ(1, count);
-  ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
-#if ROCKSDB_USING_THREAD_STATUS
-  ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0);
-#endif  // ROCKSDB_USING_THREAD_STATUS
-  this->env_->time_elapse_only_sleep_ = false;
-}
+  ASSERT_OK(Put(1, "foo", "v1"));
+  ASSERT_EQ("v1", Get(1, "foo"));
+  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
 
-#ifndef ROCKSDB_LITE
-TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
-  SetPerfLevel(kEnableTime);
-  Options options = CurrentOptions();
-  options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
-  options.statistics = rocksdb::CreateDBStatistics();
-  options.merge_operator.reset(new DelayedMergeOperator(this));
-  options.compaction_style = kCompactionStyleUniversal;
-  options.max_subcompactions = max_subcompactions_;
-  DestroyAndReopen(options);
+  uint64_t num_running_flushes = 0;
+  db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
+  ASSERT_EQ(num_running_flushes, 0);
 
-  for (int i = 0; i < 1000; i++) {
-    ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
-    ASSERT_OK(Flush());
-  }
-  dbfull()->TEST_WaitForFlushMemTable();
-  dbfull()->TEST_WaitForCompact();
+  Put(1, "k1", std::string(100000, 'x'));  // Fill memtable
+  Put(1, "k2", std::string(100000, 'y'));  // Trigger flush
 
-  ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
+  // The first sync point is to make sure there's one flush job
+  // running when we perform VerifyOperationCount().
+  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
+  VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
+  db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
+  ASSERT_EQ(num_running_flushes, 1);
+  // This second sync point is to ensure the flush job will not
+  // be completed until we already perform VerifyOperationCount().
+  TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
 }
 
-TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
-  Options options = CurrentOptions();
-  options.compaction_filter_factory =
-      std::make_shared<DelayFilterFactory>(this);
-  options.disable_auto_compactions = true;
+TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
+  const int kTestKeySize = 16;
+  const int kTestValueSize = 984;
+  const int kEntrySize = kTestKeySize + kTestValueSize;
+  const int kEntriesPerBuffer = 100;
+  Options options;
   options.create_if_missing = true;
-  options.statistics = rocksdb::CreateDBStatistics();
+  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
+  options.compaction_style = kCompactionStyleLevel;
+  options.target_file_size_base = options.write_buffer_size;
+  options.max_bytes_for_level_base = options.target_file_size_base * 2;
+  options.max_bytes_for_level_multiplier = 2;
+  options.compression = kNoCompression;
+  options = CurrentOptions(options);
+  options.env = env_;
+  options.enable_thread_tracking = true;
+  const int kNumL0Files = 4;
+  options.level0_file_num_compaction_trigger = kNumL0Files;
   options.max_subcompactions = max_subcompactions_;
-  DestroyAndReopen(options);
 
-  // put some data
-  for (int table = 0; table < 4; ++table) {
-    for (int i = 0; i < 10 + table; ++i) {
-      Put(ToString(table * 100 + i), "val");
+  rocksdb::SyncPoint::GetInstance()->LoadDependency({
+      {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
+      {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
+      {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
+  });
+  for (int tests = 0; tests < 2; ++tests) {
+    DestroyAndReopen(options);
+    rocksdb::SyncPoint::GetInstance()->ClearTrace();
+    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+
+    Random rnd(301);
+    // The Put Phase.
+    for (int file = 0; file < kNumL0Files; ++file) {
+      for (int key = 0; key < kEntriesPerBuffer; ++key) {
+        ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
+                      RandomString(&rnd, kTestValueSize)));
+      }
+      Flush();
     }
-    Flush();
-  }
+    // This makes sure a compaction won't be scheduled until
+    // we have done with the above Put Phase.
+    uint64_t num_running_compactions = 0;
+    db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
+                        &num_running_compactions);
+    ASSERT_EQ(num_running_compactions, 0);
+    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
+    ASSERT_GE(NumTableFilesAtLevel(0),
+              options.level0_file_num_compaction_trigger);
 
-  CompactRangeOptions cro;
-  cro.exclusive_manual_compaction = exclusive_manual_compaction_;
-  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
-  ASSERT_EQ(0U, CountLiveFiles());
+    // This makes sure at least one compaction is running.
+    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
 
-  Reopen(options);
+    if (options.enable_thread_tracking) {
+      // expecting one single L0 to L1 compaction
+      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
+    } else {
+      // If thread tracking is not enabled, compaction count should be 0.
+      VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
+    }
+    db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
+                        &num_running_compactions);
+    ASSERT_EQ(num_running_compactions, 1);
+    // TODO(yhchiang): adding assert to verify each compaction stage.
+    TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
 
-  Iterator* itr = db_->NewIterator(ReadOptions());
-  itr->SeekToFirst();
-  ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0);
-  delete itr;
+    // repeat the test with disabling thread tracking.
+    options.enable_thread_tracking = false;
+    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  }
 }
-#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, TestLogCleanup) {
+TEST_P(DBTestWithParam, PreShutdownManualCompaction) {
   Options options = CurrentOptions();
-  options.write_buffer_size = 64 * 1024;  // very small
-  // only two memtables allowed ==> only two log files
-  options.max_write_buffer_number = 2;
-  Reopen(options);
+  options.max_background_flushes = 0;
+  options.max_subcompactions = max_subcompactions_;
+  CreateAndReopenWithCF({"pikachu"}, options);
 
-  for (int i = 0; i < 100000; ++i) {
-    Put(Key(i), "val");
-    // only 2 memtables will be alive, so logs_to_free needs to always be below
-    // 2
-    ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
-  }
-}
+  // iter - 0 with 7 levels
+  // iter - 1 with 3 levels
+  for (int iter = 0; iter < 2; ++iter) {
+    MakeTables(3, "p", "q", 1);
+    ASSERT_EQ("1,1,1", FilesPerLevel(1));
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, EmptyCompactedDB) {
-  Options options = CurrentOptions();
-  options.max_open_files = -1;
-  Close();
-  ASSERT_OK(ReadOnlyReopen(options));
-  Status s = Put("new", "value");
-  ASSERT_TRUE(s.IsNotSupported());
-  Close();
-}
-#endif  // ROCKSDB_LITE
+    // Compaction range falls before files
+    Compact(1, "", "c");
+    ASSERT_EQ("1,1,1", FilesPerLevel(1));
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, SuggestCompactRangeTest) {
-  class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
-   public:
-    virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
-        const CompactionFilter::Context& context) override {
-      saved_context = context;
-      std::unique_ptr<CompactionFilter> empty_filter;
-      return empty_filter;
-    }
-    const char* Name() const override {
-      return "CompactionFilterFactoryGetContext";
-    }
-    static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
-      return reinterpret_cast<CompactionFilterFactoryGetContext*>(
-                 compaction_filter_factory)->saved_context.is_manual_compaction;
-    }
-    CompactionFilter::Context saved_context;
-  };
+    // Compaction range falls after files
+    Compact(1, "r", "z");
+    ASSERT_EQ("1,1,1", FilesPerLevel(1));
 
-  Options options = CurrentOptions();
-  options.memtable_factory.reset(
-      new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
-  options.compaction_style = kCompactionStyleLevel;
-  options.compaction_filter_factory.reset(
-      new CompactionFilterFactoryGetContext());
-  options.write_buffer_size = 200 << 10;
-  options.arena_block_size = 4 << 10;
-  options.level0_file_num_compaction_trigger = 4;
-  options.num_levels = 4;
-  options.compression = kNoCompression;
-  options.max_bytes_for_level_base = 450 << 10;
-  options.target_file_size_base = 98 << 10;
-  options.max_grandparent_overlap_factor = 1 << 20;  // inf
+    // Compaction range overlaps files
+    Compact(1, "p1", "p9");
+    ASSERT_EQ("0,0,1", FilesPerLevel(1));
 
-  Reopen(options);
+    // Populate a different range
+    MakeTables(3, "c", "e", 1);
+    ASSERT_EQ("1,1,2", FilesPerLevel(1));
 
-  Random rnd(301);
+    // Compact just the new range
+    Compact(1, "b", "f");
+    ASSERT_EQ("0,0,2", FilesPerLevel(1));
 
-  for (int num = 0; num < 3; num++) {
-    GenerateNewRandomFile(&rnd);
+    // Compact all
+    MakeTables(1, "a", "z", 1);
+    ASSERT_EQ("1,0,2", FilesPerLevel(1));
+    CancelAllBackgroundWork(db_);
+    db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
+    ASSERT_EQ("1,0,2", FilesPerLevel(1));
+
+    if (iter == 0) {
+      options = CurrentOptions();
+      options.max_background_flushes = 0;
+      options.num_levels = 3;
+      options.create_if_missing = true;
+      DestroyAndReopen(options);
+      CreateAndReopenWithCF({"pikachu"}, options);
+    }
   }
+}
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("0,4", FilesPerLevel(0));
-  ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
-                   options.compaction_filter_factory.get()));
-
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("1,4", FilesPerLevel(0));
+TEST_F(DBTest, PreShutdownFlush) {
+  Options options = CurrentOptions();
+  options.max_background_flushes = 0;
+  CreateAndReopenWithCF({"pikachu"}, options);
+  ASSERT_OK(Put(1, "key", "value"));
+  CancelAllBackgroundWork(db_);
+  Status s =
+      db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
+  ASSERT_TRUE(s.IsShutdownInProgress());
+}
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("2,4", FilesPerLevel(0));
+TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) {
+  const int kTestKeySize = 16;
+  const int kTestValueSize = 984;
+  const int kEntrySize = kTestKeySize + kTestValueSize;
+  const int kEntriesPerBuffer = 40;
+  const int kNumL0Files = 4;
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("3,4", FilesPerLevel(0));
+  const int kHighPriCount = 3;
+  const int kLowPriCount = 5;
+  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
+  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("0,4,4", FilesPerLevel(0));
+  Options options;
+  options.create_if_missing = true;
+  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
+  options.compaction_style = kCompactionStyleLevel;
+  options.target_file_size_base = options.write_buffer_size;
+  options.max_bytes_for_level_base =
+      options.target_file_size_base * kNumL0Files;
+  options.compression = kNoCompression;
+  options = CurrentOptions(options);
+  options.env = env_;
+  options.enable_thread_tracking = true;
+  options.level0_file_num_compaction_trigger = kNumL0Files;
+  options.max_bytes_for_level_multiplier = 2;
+  options.max_background_compactions = kLowPriCount;
+  options.level0_stop_writes_trigger = 1 << 10;
+  options.level0_slowdown_writes_trigger = 1 << 10;
+  options.max_subcompactions = max_subcompactions_;
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("1,4,4", FilesPerLevel(0));
+  TryReopen(options);
+  Random rnd(301);
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("2,4,4", FilesPerLevel(0));
+  std::vector<ThreadStatus> thread_list;
+  // Delay both flush and compaction
+  rocksdb::SyncPoint::GetInstance()->LoadDependency(
+      {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
+       {"CompactionJob::Run():Start",
+        "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
+       {"CompactionJob::Run():Start",
+        "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
+       {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
+        "CompactionJob::Run():End"},
+       {"CompactionJob::Run():End",
+        "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("3,4,4", FilesPerLevel(0));
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("0,4,8", FilesPerLevel(0));
+  // Make rocksdb busy
+  int key = 0;
+  // check how many threads are doing compaction using GetThreadList
+  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
+  for (int file = 0; file < 16 * kNumL0Files; ++file) {
+    for (int k = 0; k < kEntriesPerBuffer; ++k) {
+      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
+    }
 
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ("1,4,8", FilesPerLevel(0));
+    Status s = env_->GetThreadList(&thread_list);
+    for (auto thread : thread_list) {
+      operation_count[thread.operation_type]++;
+    }
 
-  // compact it three times
-  for (int i = 0; i < 3; ++i) {
-    ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
-    dbfull()->TEST_WaitForCompact();
+    // Speed up the test
+    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
+        operation_count[ThreadStatus::OP_COMPACTION] >
+            0.6 * options.max_background_compactions) {
+      break;
+    }
+    if (file == 15 * kNumL0Files) {
+      TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
+    }
   }
 
-  // All files are compacted
-  ASSERT_EQ(0, NumTableFilesAtLevel(0));
-  ASSERT_EQ(0, NumTableFilesAtLevel(1));
-
-  GenerateNewRandomFile(&rnd);
-  ASSERT_EQ(1, NumTableFilesAtLevel(0));
-
-  // nonoverlapping with the file on level 0
-  Slice start("a"), end("b");
-  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
-  dbfull()->TEST_WaitForCompact();
-
-  // should not compact the level 0 file
-  ASSERT_EQ(1, NumTableFilesAtLevel(0));
-
-  start = Slice("j");
-  end = Slice("m");
-  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
+  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
+  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
+  CancelAllBackgroundWork(db_);
+  TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
   dbfull()->TEST_WaitForCompact();
-  ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
-      options.compaction_filter_factory.get()));
-
-  // now it should compact the level 0 file
-  ASSERT_EQ(0, NumTableFilesAtLevel(0));
-  ASSERT_EQ(1, NumTableFilesAtLevel(1));
+  // Record the number of compactions at a time.
+  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
+    operation_count[i] = 0;
+  }
+  Status s = env_->GetThreadList(&thread_list);
+  for (auto thread : thread_list) {
+    operation_count[thread.operation_type]++;
+  }
+  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
 }
 
-TEST_F(DBTest, PromoteL0) {
-  Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.write_buffer_size = 10 * 1024 * 1024;
-  DestroyAndReopen(options);
+TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) {
+  const int kTestKeySize = 16;
+  const int kTestValueSize = 984;
+  const int kEntrySize = kTestKeySize + kTestValueSize;
+  const int kEntriesPerBuffer = 40;
+  const int kNumL0Files = 4;
 
-  // non overlapping ranges
-  std::vector<std::pair<int32_t, int32_t>> ranges = {
-      {81, 160}, {0, 80}, {161, 240}, {241, 320}};
+  const int kHighPriCount = 3;
+  const int kLowPriCount = 5;
+  env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
+  env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
 
-  int32_t value_size = 10 * 1024;  // 10 KB
+  Options options;
+  options.create_if_missing = true;
+  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
+  options.compaction_style = kCompactionStyleLevel;
+  options.target_file_size_base = options.write_buffer_size;
+  options.max_bytes_for_level_base =
+      options.target_file_size_base * kNumL0Files;
+  options.compression = kNoCompression;
+  options = CurrentOptions(options);
+  options.env = env_;
+  options.enable_thread_tracking = true;
+  options.level0_file_num_compaction_trigger = kNumL0Files;
+  options.max_bytes_for_level_multiplier = 2;
+  options.max_background_compactions = kLowPriCount;
+  options.level0_stop_writes_trigger = 1 << 10;
+  options.level0_slowdown_writes_trigger = 1 << 10;
+  options.max_subcompactions = max_subcompactions_;
 
+  TryReopen(options);
   Random rnd(301);
-  std::map<int32_t, std::string> values;
-  for (const auto& range : ranges) {
-    for (int32_t j = range.first; j < range.second; j++) {
-      values[j] = RandomString(&rnd, value_size);
-      ASSERT_OK(Put(Key(j), values[j]));
-    }
-    ASSERT_OK(Flush());
-  }
 
-  int32_t level0_files = NumTableFilesAtLevel(0, 0);
-  ASSERT_EQ(level0_files, ranges.size());
-  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1
+  std::vector<ThreadStatus> thread_list;
+  // Delay both flush and compaction
+  rocksdb::SyncPoint::GetInstance()->LoadDependency(
+      {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
+        "CompactionJob::Run():Inprogress"},
+       {"CompactionJob::Run():Start",
+        "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
+       {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
+       {"CompactionJob::Run():End",
+        "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
 
-  // Promote L0 level to L2.
-  ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
-  // We expect that all the files were trivially moved from L0 to L2
-  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
-  ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-  for (const auto& kv : values) {
-    ASSERT_EQ(Get(Key(kv.first)), kv.second);
-  }
-}
+  // Make rocksdb busy
+  int key = 0;
+  // check how many threads are doing compaction using GetThreadList
+  int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
+  for (int file = 0; file < 16 * kNumL0Files; ++file) {
+    for (int k = 0; k < kEntriesPerBuffer; ++k) {
+      ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
+    }
 
-TEST_F(DBTest, PromoteL0Failure) {
-  Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.write_buffer_size = 10 * 1024 * 1024;
-  DestroyAndReopen(options);
+    Status s = env_->GetThreadList(&thread_list);
+    for (auto thread : thread_list) {
+      operation_count[thread.operation_type]++;
+    }
 
-  // Produce two L0 files with overlapping ranges.
-  ASSERT_OK(Put(Key(0), ""));
-  ASSERT_OK(Put(Key(3), ""));
-  ASSERT_OK(Flush());
-  ASSERT_OK(Put(Key(1), ""));
-  ASSERT_OK(Flush());
+    // Speed up the test
+    if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
+        operation_count[ThreadStatus::OP_COMPACTION] >
+            0.6 * options.max_background_compactions) {
+      break;
+    }
+    if (file == 15 * kNumL0Files) {
+      TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
+    }
+  }
 
-  Status status;
-  // Fails because L0 has overlapping files.
-  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
-  ASSERT_TRUE(status.IsInvalidArgument());
+  ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
+  CancelAllBackgroundWork(db_);
+  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
+  TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
+  dbfull()->TEST_WaitForCompact();
+  // Record the number of compactions at a time.
+  for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
+    operation_count[i] = 0;
+  }
+  Status s = env_->GetThreadList(&thread_list);
+  for (auto thread : thread_list) {
+    operation_count[thread.operation_type]++;
+  }
+  ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
+}
 
-  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  // Now there is a file in L1.
-  ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
+#endif  // ROCKSDB_USING_THREAD_STATUS
 
-  ASSERT_OK(Put(Key(5), ""));
-  ASSERT_OK(Flush());
-  // Fails because L1 is non-empty.
-  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
-  ASSERT_TRUE(status.IsInvalidArgument());
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, FlushOnDestroy) {
+  WriteOptions wo;
+  wo.disableWAL = true;
+  ASSERT_OK(Put("foo", "v1", wo));
+  CancelAllBackgroundWork(db_);
 }
-#endif  // ROCKSDB_LITE
 
-// Github issue #596
-TEST_F(DBTest, HugeNumberOfLevels) {
-  Options options = CurrentOptions();
-  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
-  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
-  options.num_levels = 12;
-  options.max_background_compactions = 10;
-  options.max_bytes_for_level_multiplier = 2;
-  options.level_compaction_dynamic_level_bytes = true;
-  DestroyAndReopen(options);
+TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
+  if (!Snappy_Supported()) {
+    return;
+  }
+  const int kNKeys = 120;
+  int keys[kNKeys];
+  for (int i = 0; i < kNKeys; i++) {
+    keys[i] = i;
+  }
+  std::random_shuffle(std::begin(keys), std::end(keys));
 
   Random rnd(301);
-  for (int i = 0; i < 300000; ++i) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
-  }
+  Options options;
+  options.create_if_missing = true;
+  options.db_write_buffer_size = 20480;
+  options.write_buffer_size = 20480;
+  options.max_write_buffer_number = 2;
+  options.level0_file_num_compaction_trigger = 2;
+  options.level0_slowdown_writes_trigger = 2;
+  options.level0_stop_writes_trigger = 2;
+  options.target_file_size_base = 2048;
+  options.level_compaction_dynamic_level_bytes = true;
+  options.max_bytes_for_level_base = 102400;
+  options.max_bytes_for_level_multiplier = 4;
+  options.max_background_compactions = 1;
+  options.num_levels = 5;
 
-  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-}
+  options.compression_per_level.resize(3);
+  options.compression_per_level[0] = kNoCompression;
+  options.compression_per_level[1] = kNoCompression;
+  options.compression_per_level[2] = kSnappyCompression;
+
+  OnFileDeletionListener* listener = new OnFileDeletionListener();
+  options.listeners.emplace_back(listener);
 
-TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
-  Options options = CurrentOptions();
-  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
-  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
-  options.num_levels = 12;
-  options.max_background_compactions = 10;
-  options.max_bytes_for_level_multiplier = 2;
-  options.level_compaction_dynamic_level_bytes = true;
   DestroyAndReopen(options);
 
-  Random rnd(301);
-  for (int i = 0; i < 300000; ++i) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
+  // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
+  // be compressed, so total data size should be more than 80K.
+  for (int i = 0; i < 20; i++) {
+    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
   }
+  Flush();
+  dbfull()->TEST_WaitForCompact();
 
-  std::atomic<int> callback_count(0);
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::BackgroundCompaction()::Conflict",
-      [&](void* arg) { callback_count.fetch_add(1); });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-  CompactRangeOptions croptions;
-  croptions.exclusive_manual_compaction = false;
-  ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
-  ASSERT_GE(callback_count.load(), 1);
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-  for (int i = 0; i < 300000; ++i) {
-    ASSERT_NE("NOT_FOUND", Get(Key(i)));
+  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
+  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U);
+
+  // Insert 400KB. Some data will be compressed
+  for (int i = 21; i < 120; i++) {
+    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
   }
-}
+  Flush();
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
+  ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), 120U * 4000U);
+  // Make sure data in files in L3 is not compacted by removing all files
+  // in L4 and calculate number of rows
+  ASSERT_OK(dbfull()->SetOptions({
+      {"disable_auto_compactions", "true"},
+  }));
+  ColumnFamilyMetaData cf_meta;
+  db_->GetColumnFamilyMetaData(&cf_meta);
+  for (auto file : cf_meta.levels[4].files) {
+    listener->SetExpectedFileName(dbname_ + file.name);
+    ASSERT_OK(dbfull()->DeleteFile(file.name));
+  }
+  listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
 
-// Github issue #595
-// Large write batch with column families
-TEST_F(DBTest, LargeBatchWithColumnFamilies) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.write_buffer_size = 100000;  // Small write buffer
-  CreateAndReopenWithCF({"pikachu"}, options);
-  int64_t j = 0;
-  for (int i = 0; i < 5; i++) {
-    for (int pass = 1; pass <= 3; pass++) {
-      WriteBatch batch;
-      size_t write_size = 1024 * 1024 * (5 + i);
-      fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", (write_size / 1024 / 1024),
-              pass);
-      for (;;) {
-        std::string data(3000, j++ % 127 + 20);
-        data += ToString(j);
-        batch.Put(handles_[0], Slice(data), Slice(data));
-        if (batch.GetDataSize() > write_size) {
-          break;
-        }
-      }
-      fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", (batch.GetDataSize() / 1024 / 1024));
-      ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
-      fprintf(stderr, "done\n");
-    }
+  int num_keys = 0;
+  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    num_keys++;
   }
-  // make sure we can re-open it.
-  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
+  ASSERT_OK(iter->status());
+  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U);
 }
 
-// Make sure that Flushes can proceed in parallel with CompactRange()
-TEST_F(DBTest, FlushesInParallelWithCompactRange) {
-  // iter == 0 -- leveled
-  // iter == 1 -- leveled, but throw in a flush between two levels compacting
-  // iter == 2 -- universal
-  for (int iter = 0; iter < 3; ++iter) {
-    Options options = CurrentOptions();
-    if (iter < 2) {
-      options.compaction_style = kCompactionStyleLevel;
-    } else {
-      options.compaction_style = kCompactionStyleUniversal;
-    }
-    options.write_buffer_size = 110 << 10;
-    options.level0_file_num_compaction_trigger = 4;
-    options.num_levels = 4;
-    options.compression = kNoCompression;
-    options.max_bytes_for_level_base = 450 << 10;
-    options.target_file_size_base = 98 << 10;
-    options.max_write_buffer_number = 2;
-
-    DestroyAndReopen(options);
+TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
+  if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
+    return;
+  }
+  const int kNKeys = 500;
+  int keys[kNKeys];
+  for (int i = 0; i < kNKeys; i++) {
+    keys[i] = i;
+  }
+  std::random_shuffle(std::begin(keys), std::end(keys));
 
-    Random rnd(301);
-    for (int num = 0; num < 14; num++) {
-      GenerateNewRandomFile(&rnd);
-    }
+  Random rnd(301);
+  Options options;
+  options.create_if_missing = true;
+  options.db_write_buffer_size = 6000;
+  options.write_buffer_size = 6000;
+  options.max_write_buffer_number = 2;
+  options.level0_file_num_compaction_trigger = 2;
+  options.level0_slowdown_writes_trigger = 2;
+  options.level0_stop_writes_trigger = 2;
+  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
 
-    if (iter == 1) {
-    rocksdb::SyncPoint::GetInstance()->LoadDependency(
-        {{"DBImpl::RunManualCompaction()::1",
-          "DBTest::FlushesInParallelWithCompactRange:1"},
-         {"DBTest::FlushesInParallelWithCompactRange:2",
-          "DBImpl::RunManualCompaction()::2"}});
-    } else {
-      rocksdb::SyncPoint::GetInstance()->LoadDependency(
-          {{"CompactionJob::Run():Start",
-            "DBTest::FlushesInParallelWithCompactRange:1"},
-           {"DBTest::FlushesInParallelWithCompactRange:2",
-            "CompactionJob::Run():End"}});
-    }
-    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  // Use file size to distinguish levels
+  // L1: 10, L2: 20, L3 40, L4 80
+  // L0 is less than 30
+  options.target_file_size_base = 10;
+  options.target_file_size_multiplier = 2;
 
-    std::vector<std::thread> threads;
-    threads.emplace_back([&]() { Compact("a", "z"); });
+  options.level_compaction_dynamic_level_bytes = true;
+  options.max_bytes_for_level_base = 200;
+  options.max_bytes_for_level_multiplier = 8;
+  options.max_background_compactions = 1;
+  options.num_levels = 5;
+  std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
+  options.table_factory = mtf;
 
-    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
+  options.compression_per_level.resize(3);
+  options.compression_per_level[0] = kNoCompression;
+  options.compression_per_level[1] = kLZ4Compression;
+  options.compression_per_level[2] = kZlibCompression;
 
-    // this has to start a flush. if flushes are blocked, this will try to
-    // create
-    // 3 memtables, and that will fail because max_write_buffer_number is 2
-    for (int num = 0; num < 3; num++) {
-      GenerateNewRandomFile(&rnd, /* nowait */ true);
-    }
+  DestroyAndReopen(options);
+  // When base level is L4, L4 is LZ4.
+  std::atomic<int> num_zlib(0);
+  std::atomic<int> num_lz4(0);
+  std::atomic<int> num_no(0);
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
+        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
+        if (compaction->output_level() == 4) {
+          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
+          num_lz4.fetch_add(1);
+        }
+      });
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
+        auto* compression = reinterpret_cast<CompressionType*>(arg);
+        ASSERT_TRUE(*compression == kNoCompression);
+        num_no.fetch_add(1);
+      });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
+  for (int i = 0; i < 100; i++) {
+    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
 
-    for (auto& t : threads) {
-      t.join();
+    if (i % 25 == 0) {
+      dbfull()->TEST_WaitForFlushMemTable();
     }
-    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
   }
-}
-
-TEST_F(DBTest, DelayedWriteRate) {
-  const int kEntriesPerMemTable = 100;
-  const int kTotalFlushes = 20;
-
-  Options options = CurrentOptions();
-  env_->SetBackgroundThreads(1, Env::LOW);
-  options.env = env_;
-  env_->no_sleep_ = true;
-  options.write_buffer_size = 100000000;
-  options.max_write_buffer_number = 256;
-  options.max_background_compactions = 1;
-  options.level0_file_num_compaction_trigger = 3;
-  options.level0_slowdown_writes_trigger = 3;
-  options.level0_stop_writes_trigger = 999999;
-  options.delayed_write_rate = 20000000;  // Start with 200MB/s
-  options.memtable_factory.reset(
-      new SpecialSkipListFactory(kEntriesPerMemTable));
 
-  CreateAndReopenWithCF({"pikachu"}, options);
+  Flush();
+  dbfull()->TEST_WaitForFlushMemTable();
+  dbfull()->TEST_WaitForCompact();
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
 
-  // Block compactions
-  test::SleepingBackgroundTask sleeping_task_low;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
+  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
+  ASSERT_GT(NumTableFilesAtLevel(4), 0);
+  ASSERT_GT(num_no.load(), 2);
+  ASSERT_GT(num_lz4.load(), 0);
+  int prev_num_files_l4 = NumTableFilesAtLevel(4);
 
-  for (int i = 0; i < 3; i++) {
-    Put(Key(i), std::string(10000, 'x'));
-    Flush();
-  }
+  // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
+  num_lz4.store(0);
+  num_no.store(0);
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
+        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
+        if (compaction->output_level() == 4 && compaction->start_level() == 3) {
+          ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
+          num_zlib.fetch_add(1);
+        } else {
+          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
+          num_lz4.fetch_add(1);
+        }
+      });
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
+        auto* compression = reinterpret_cast<CompressionType*>(arg);
+        ASSERT_TRUE(*compression == kNoCompression);
+        num_no.fetch_add(1);
+      });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-  // These writes will be slowed down to 1KB/s
-  uint64_t estimated_sleep_time = 0;
-  Random rnd(301);
-  Put("", "");
-  uint64_t cur_rate = options.delayed_write_rate;
-  for (int i = 0; i < kTotalFlushes; i++) {
-    uint64_t size_memtable = 0;
-    for (int j = 0; j < kEntriesPerMemTable; j++) {
-      auto rand_num = rnd.Uniform(20);
-      // Spread the size range to more.
-      size_t entry_size = rand_num * rand_num * rand_num;
-      WriteOptions wo;
-      Put(Key(i), std::string(entry_size, 'x'), wo);
-      size_memtable += entry_size + 18;
-      // Occasionally sleep a while
-      if (rnd.Uniform(20) == 6) {
-        env_->SleepForMicroseconds(2666);
-      }
+  for (int i = 101; i < 500; i++) {
+    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
+    if (i % 100 == 99) {
+      Flush();
+      dbfull()->TEST_WaitForCompact();
     }
-    dbfull()->TEST_WaitForFlushMemTable();
-    estimated_sleep_time += size_memtable * 1000000u / cur_rate;
-    // Slow down twice. One for memtable switch and one for flush finishes.
-    cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) /
-                                     kSlowdownRatio / kSlowdownRatio);
   }
-  // Estimate the total sleep time fall into the rough range.
-  ASSERT_GT(env_->addon_time_.load(),
-            static_cast<int64_t>(estimated_sleep_time / 2));
-  ASSERT_LT(env_->addon_time_.load(),
-            static_cast<int64_t>(estimated_sleep_time * 2));
 
-  env_->no_sleep_ = false;
+  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
   rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilDone();
+  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
+  ASSERT_GT(NumTableFilesAtLevel(3), 0);
+  ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
+  ASSERT_GT(num_no.load(), 2);
+  ASSERT_GT(num_lz4.load(), 0);
+  ASSERT_GT(num_zlib.load(), 0);
 }
 
-TEST_F(DBTest, HardLimit) {
-  Options options = CurrentOptions();
+TEST_F(DBTest, DynamicCompactionOptions) {
+  // minimum write buffer size is enforced at 64KB
+  const uint64_t k32KB = 1 << 15;
+  const uint64_t k64KB = 1 << 16;
+  const uint64_t k128KB = 1 << 17;
+  const uint64_t k1MB = 1 << 20;
+  const uint64_t k4KB = 1 << 12;
+  Options options;
   options.env = env_;
-  env_->SetBackgroundThreads(1, Env::LOW);
-  options.max_write_buffer_number = 256;
-  options.write_buffer_size = 110 << 10;  // 110KB
-  options.arena_block_size = 4 * 1024;
-  options.level0_file_num_compaction_trigger = 4;
-  options.level0_slowdown_writes_trigger = 999999;
-  options.level0_stop_writes_trigger = 999999;
-  options.hard_pending_compaction_bytes_limit = 800 << 10;
-  options.max_bytes_for_level_base = 10000000000u;
-  options.max_background_compactions = 1;
-  options.memtable_factory.reset(
-      new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
+  options.create_if_missing = true;
+  options.compression = kNoCompression;
+  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
+  options.write_buffer_size = k64KB;
+  options.arena_block_size = 4 * k4KB;
+  options.max_write_buffer_number = 2;
+  // Compaction related options
+  options.level0_file_num_compaction_trigger = 3;
+  options.level0_slowdown_writes_trigger = 4;
+  options.level0_stop_writes_trigger = 8;
+  options.max_grandparent_overlap_factor = 10;
+  options.expanded_compaction_factor = 25;
+  options.source_compaction_factor = 1;
+  options.target_file_size_base = k64KB;
+  options.target_file_size_multiplier = 1;
+  options.max_bytes_for_level_base = k128KB;
+  options.max_bytes_for_level_multiplier = 4;
 
+  // Block flush thread and disable compaction thread
   env_->SetBackgroundThreads(1, Env::LOW);
-  test::SleepingBackgroundTask sleeping_task_low;
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
+  env_->SetBackgroundThreads(1, Env::HIGH);
+  DestroyAndReopen(options);
 
-  CreateAndReopenWithCF({"pikachu"}, options);
+  auto gen_l0_kb = [this](int start, int size, int stride) {
+    Random rnd(301);
+    for (int i = 0; i < size; i++) {
+      ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024)));
+    }
+    dbfull()->TEST_WaitForFlushMemTable();
+  };
 
-  std::atomic<int> callback_count(0);
-  rocksdb::SyncPoint::GetInstance()->SetCallBack("DBImpl::DelayWrite:Wait",
-                                                 [&](void* arg) {
-                                                   callback_count.fetch_add(1);
-                                                   sleeping_task_low.WakeUp();
-                                                 });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  // Write 3 files that have the same key range.
+  // Since level0_file_num_compaction_trigger is 3, compaction should be
+  // triggered. The compaction should result in one L1 file
+  gen_l0_kb(0, 64, 1);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
+  gen_l0_kb(0, 64, 1);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 2);
+  gen_l0_kb(0, 64, 1);
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ("0,1", FilesPerLevel());
+  std::vector<LiveFileMetaData> metadata;
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(1U, metadata.size());
+  ASSERT_LE(metadata[0].size, k64KB + k4KB);
+  ASSERT_GE(metadata[0].size, k64KB - k4KB);
 
-  Random rnd(301);
-  int key_idx = 0;
-  for (int num = 0; num < 5; num++) {
-    GenerateNewFile(&rnd, &key_idx, true);
-    dbfull()->TEST_WaitForFlushMemTable();
+  // Test compaction trigger and target_file_size_base
+  // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
+  // Writing to 64KB L0 files should trigger a compaction. Since these
+  // 2 L0 files have the same key range, compaction merge them and should
+  // result in 2 32KB L1 files.
+  ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
+                                  {"target_file_size_base", ToString(k32KB)}}));
+
+  gen_l0_kb(0, 64, 1);
+  ASSERT_EQ("1,1", FilesPerLevel());
+  gen_l0_kb(0, 64, 1);
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ("0,2", FilesPerLevel());
+  metadata.clear();
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(2U, metadata.size());
+  ASSERT_LE(metadata[0].size, k32KB + k4KB);
+  ASSERT_GE(metadata[0].size, k32KB - k4KB);
+  ASSERT_LE(metadata[1].size, k32KB + k4KB);
+  ASSERT_GE(metadata[1].size, k32KB - k4KB);
+
+  // Test max_bytes_for_level_base
+  // Increase level base size to 256KB and write enough data that will
+  // fill L1 and L2. L1 size should be around 256KB while L2 size should be
+  // around 256KB x 4.
+  ASSERT_OK(
+      dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}}));
+
+  // writing 96 x 64KB => 6 * 1024KB
+  // (L1 + L2) = (1 + 4) * 1024KB
+  for (int i = 0; i < 96; ++i) {
+    gen_l0_kb(i, 64, 96);
   }
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_GT(SizeAtLevel(1), k1MB / 2);
+  ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
 
-  ASSERT_EQ(0, callback_count.load());
+  // Within (0.5, 1.5) of 4MB.
+  ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
+  ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
 
-  for (int num = 0; num < 5; num++) {
-    GenerateNewFile(&rnd, &key_idx, true);
-    dbfull()->TEST_WaitForFlushMemTable();
+  // Test max_bytes_for_level_multiplier and
+  // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
+  // After filling enough data that can fit in L1 - L3, we should see L1 size
+  // reduces to 128KB from 256KB which was asserted previously. Same for L2.
+  ASSERT_OK(
+      dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"},
+                            {"max_bytes_for_level_base", ToString(k128KB)}}));
+
+  // writing 20 x 64KB = 10 x 128KB
+  // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
+  for (int i = 0; i < 20; ++i) {
+    gen_l0_kb(i, 64, 32);
+  }
+  dbfull()->TEST_WaitForCompact();
+  uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
+  ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
+
+  // Test level0_stop_writes_trigger.
+  // Clean up memtable and L0. Block compaction threads. If continue to write
+  // and flush memtables. We should see put stop after 8 memtable flushes
+  // since level0_stop_writes_trigger = 8
+  dbfull()->TEST_FlushMemTable(true);
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+  // Block compaction
+  test::SleepingBackgroundTask sleeping_task_low;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  sleeping_task_low.WaitUntilSleeping();
+  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
+  int count = 0;
+  Random rnd(301);
+  WriteOptions wo;
+  while (count < 64) {
+    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
+    dbfull()->TEST_FlushMemTable(true);
+    count++;
+    if (dbfull()->TEST_write_controler().IsStopped()) {
+      sleeping_task_low.WakeUp();
+      break;
+    }
   }
-  ASSERT_GE(callback_count.load(), 1);
-
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  // Stop trigger = 8
+  ASSERT_EQ(count, 8);
+  // Unblock
   sleeping_task_low.WaitUntilDone();
-}
-
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, SoftLimit) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.write_buffer_size = 100000;  // Small write buffer
-  options.max_write_buffer_number = 256;
-  options.level0_file_num_compaction_trigger = 1;
-  options.level0_slowdown_writes_trigger = 3;
-  options.level0_stop_writes_trigger = 999999;
-  options.delayed_write_rate = 20000;  // About 200KB/s limited rate
-  options.soft_pending_compaction_bytes_limit = 200000;
-  options.target_file_size_base = 99999999;  // All into one file
-  options.max_bytes_for_level_base = 50000;
-  options.max_bytes_for_level_multiplier = 10;
-  options.max_background_compactions = 1;
-  options.compression = kNoCompression;
 
-  Reopen(options);
-  Put(Key(0), "");
+  // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
+  // Block compaction thread again. Perform the put and memtable flushes
+  // until we see the stop after 6 memtable flushes.
+  ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}}));
+  dbfull()->TEST_FlushMemTable(true);
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  test::SleepingBackgroundTask sleeping_task_low;
-  // Block compactions
+  // Block compaction again
+  sleeping_task_low.Reset();
   env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                  Env::Priority::LOW);
   sleeping_task_low.WaitUntilSleeping();
-
-  // Create 3 L0 files, making score of L0 to be 3.
-  for (int i = 0; i < 3; i++) {
-    Put(Key(i), std::string(5000, 'x'));
-    Put(Key(100 - i), std::string(5000, 'x'));
-    // Flush the file. File size is around 30KB.
-    Flush();
+  count = 0;
+  while (count < 64) {
+    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
+    dbfull()->TEST_FlushMemTable(true);
+    count++;
+    if (dbfull()->TEST_write_controler().IsStopped()) {
+      sleeping_task_low.WakeUp();
+      break;
+    }
   }
-  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
-
-  sleeping_task_low.WakeUp();
+  ASSERT_EQ(count, 6);
+  // Unblock
   sleeping_task_low.WaitUntilDone();
-  sleeping_task_low.Reset();
-  dbfull()->TEST_WaitForCompact();
 
-  // Now there is one L1 file but doesn't trigger soft_rate_limit
-  // The L1 file size is around 30KB.
-  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
-  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
+  // Test disable_auto_compactions
+  // Compaction thread is unblocked but auto compaction is disabled. Write
+  // 4 L0 files and compaction should be triggered. If auto compaction is
+  // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
+  // L0 files do not change after the call.
+  ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  // Only allow one compactin going through.
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "BackgroundCallCompaction:0", [&](void* arg) {
-        // Schedule a sleeping task.
-        sleeping_task_low.Reset();
-        env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
-                       &sleeping_task_low, Env::Priority::LOW);
-      });
+  for (int i = 0; i < 4; ++i) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
+    // Wait for compaction so that put won't stop
+    dbfull()->TEST_FlushMemTable(true);
+  }
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_EQ(NumTableFilesAtLevel(0), 4);
 
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  // Enable auto compaction and perform the same test, # of L0 files should be
+  // reduced after compaction.
+  ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
+  dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
+  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
 
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
-                 Env::Priority::LOW);
-  sleeping_task_low.WaitUntilSleeping();
-  // Create 3 L0 files, making score of L0 to be 3
-  for (int i = 0; i < 3; i++) {
-    Put(Key(10 + i), std::string(5000, 'x'));
-    Put(Key(90 - i), std::string(5000, 'x'));
-    // Flush the file. File size is around 30KB.
-    Flush();
+  for (int i = 0; i < 4; ++i) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
+    // Wait for compaction so that put won't stop
+    dbfull()->TEST_FlushMemTable(true);
   }
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_LT(NumTableFilesAtLevel(0), 4);
+}
+#endif  // ROCKSDB_LITE
 
-  // Wake up sleep task to enable compaction to run and waits
-  // for it to go to sleep state again to make sure one compaction
-  // goes through.
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilSleeping();
+TEST_F(DBTest, FileCreationRandomFailure) {
+  Options options;
+  options.env = env_;
+  options.create_if_missing = true;
+  options.write_buffer_size = 100000;  // Small write buffer
+  options.target_file_size_base = 200000;
+  options.max_bytes_for_level_base = 1000000;
+  options.max_bytes_for_level_multiplier = 2;
 
-  // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
-  // Given level multiplier 10, estimated pending compaction is around 100KB
-  // doesn't trigger soft_pending_compaction_bytes_limit
-  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
-  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
+  DestroyAndReopen(options);
+  Random rnd(301);
 
-  // Create 3 L0 files, making score of L0 to be 3, higher than L0.
-  for (int i = 0; i < 3; i++) {
-    Put(Key(20 + i), std::string(5000, 'x'));
-    Put(Key(80 - i), std::string(5000, 'x'));
-    // Flush the file. File size is around 30KB.
-    Flush();
+  const int kCDTKeysPerBuffer = 4;
+  const int kTestSize = kCDTKeysPerBuffer * 4096;
+  const int kTotalIteration = 100;
+  // the second half of the test involves in random failure
+  // of file creation.
+  const int kRandomFailureTest = kTotalIteration / 2;
+  std::vector<std::string> values;
+  for (int i = 0; i < kTestSize; ++i) {
+    values.push_back("NOT_FOUND");
+  }
+  for (int j = 0; j < kTotalIteration; ++j) {
+    if (j == kRandomFailureTest) {
+      env_->non_writeable_rate_.store(90);
+    }
+    for (int k = 0; k < kTestSize; ++k) {
+      // here we expect some of the Put fails.
+      std::string value = RandomString(&rnd, 100);
+      Status s = Put(Key(k), Slice(value));
+      if (s.ok()) {
+        // update the latest successful put
+        values[k] = value;
+      }
+      // But everything before we simulate the failure-test should succeed.
+      if (j < kRandomFailureTest) {
+        ASSERT_OK(s);
+      }
+    }
   }
-  // Wake up sleep task to enable compaction to run and waits
-  // for it to go to sleep state again to make sure one compaction
-  // goes through.
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilSleeping();
 
-  // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
-  // Given level multiplier 10, estimated pending compaction is around 400KB
-  // triggerring soft_pending_compaction_bytes_limit
-  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
-  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
+  // If rocksdb does not do the correct job, internal assert will fail here.
+  dbfull()->TEST_WaitForFlushMemTable();
+  dbfull()->TEST_WaitForCompact();
 
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilSleeping();
+  // verify we have the latest successful update
+  for (int k = 0; k < kTestSize; ++k) {
+    auto v = Get(Key(k));
+    ASSERT_EQ(v, values[k]);
+  }
 
-  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
+  // reopen and reverify we have the latest successful update
+  env_->non_writeable_rate_.store(0);
+  Reopen(options);
+  for (int k = 0; k < kTestSize; ++k) {
+    auto v = Get(Key(k));
+    ASSERT_EQ(v, values[k]);
+  }
+}
 
-  // shrink level base so L2 will hit soft limit easier.
-  ASSERT_OK(dbfull()->SetOptions({
-      {"max_bytes_for_level_base", "5000"},
-  }));
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, DynamicMiscOptions) {
+  // Test max_sequential_skip_in_iterations
+  Options options;
+  options.env = env_;
+  options.create_if_missing = true;
+  options.max_sequential_skip_in_iterations = 16;
+  options.compression = kNoCompression;
+  options.statistics = rocksdb::CreateDBStatistics();
+  DestroyAndReopen(options);
 
-  Put("", "");
-  Flush();
-  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
+  auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
+    int key0 = key_start;
+    int key1 = key_start + 1;
+    int key2 = key_start + 2;
+    Random rnd(301);
+    ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
+    for (int i = 0; i < 10; ++i) {
+      ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
+    }
+    ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
+    std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
+    iter->Seek(Key(key1));
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Key(key1)), 0);
+    iter->Next();
+    ASSERT_TRUE(iter->Valid());
+    ASSERT_EQ(iter->key().compare(Key(key2)), 0);
+    ASSERT_EQ(num_reseek,
+              TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
+  };
+  // No reseek
+  assert_reseek_count(100, 0);
 
-  sleeping_task_low.WaitUntilSleeping();
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-  sleeping_task_low.WakeUp();
-  sleeping_task_low.WaitUntilDone();
+  ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}}));
+  // Clear memtable and make new option effective
+  dbfull()->TEST_FlushMemTable(true);
+  // Trigger reseek
+  assert_reseek_count(200, 1);
+
+  ASSERT_OK(
+      dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}}));
+  // Clear memtable and make new option effective
+  dbfull()->TEST_FlushMemTable(true);
+  // No reseek
+  assert_reseek_count(300, 1);
 }
+#endif  // ROCKSDB_LITE
+
+TEST_F(DBTest, L0L1L2AndUpHitCounter) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 32 * 1024;
+  options.target_file_size_base = 32 * 1024;
+  options.level0_file_num_compaction_trigger = 2;
+  options.level0_slowdown_writes_trigger = 2;
+  options.level0_stop_writes_trigger = 4;
+  options.max_bytes_for_level_base = 64 * 1024;
+  options.max_write_buffer_number = 2;
+  options.max_background_compactions = 8;
+  options.max_background_flushes = 8;
+  options.statistics = rocksdb::CreateDBStatistics();
+  CreateAndReopenWithCF({"mypikachu"}, options);
 
-TEST_F(DBTest, LastWriteBufferDelay) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  options.write_buffer_size = 100000;
-  options.max_write_buffer_number = 4;
-  options.delayed_write_rate = 20000;
-  options.compression = kNoCompression;
-  options.disable_auto_compactions = true;
-  int kNumKeysPerMemtable = 3;
-  options.memtable_factory.reset(
-      new SpecialSkipListFactory(kNumKeysPerMemtable));
+  int numkeys = 20000;
+  for (int i = 0; i < numkeys; i++) {
+    ASSERT_OK(Put(1, Key(i), "val"));
+  }
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
+  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
 
-  Reopen(options);
-  test::SleepingBackgroundTask sleeping_task;
-  // Block flushes
-  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
-                 Env::Priority::HIGH);
-  sleeping_task.WaitUntilSleeping();
+  ASSERT_OK(Flush(1));
+  dbfull()->TEST_WaitForCompact();
 
-  // Create 3 L0 files, making score of L0 to be 3.
-  for (int i = 0; i < 3; i++) {
-    // Fill one mem table
-    for (int j = 0; j < kNumKeysPerMemtable; j++) {
-      Put(Key(j), "");
-    }
-    ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
+  for (int i = 0; i < numkeys; i++) {
+    ASSERT_EQ(Get(1, Key(i)), "val");
   }
-  // Inserting a new entry would create a new mem table, triggering slow down.
-  Put(Key(0), "");
-  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
 
-  sleeping_task.WakeUp();
-  sleeping_task.WaitUntilDone();
+  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
+  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
+  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);
+
+  ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
+                         TestGetTickerCount(options, GET_HIT_L1) +
+                         TestGetTickerCount(options, GET_HIT_L2_AND_UP));
 }
-#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
+TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
+  // iter 0 -- zlib
+  // iter 1 -- bzip2
+  // iter 2 -- lz4
+  // iter 3 -- lz4HC
   CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
                                     kLZ4Compression,  kLZ4HCCompression};
   for (int iter = 0; iter < 4; ++iter) {
     if (!CompressionTypeSupported(compressions[iter])) {
-      // not supported, we should fail the Open()
+      continue;
+    }
+    // first_table_version 1 -- generate with table_version == 1, read with
+    // table_version == 2
+    // first_table_version 2 -- generate with table_version == 2, read with
+    // table_version == 1
+    for (int first_table_version = 1; first_table_version <= 2;
+         ++first_table_version) {
+      BlockBasedTableOptions table_options;
+      table_options.format_version = first_table_version;
+      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
       Options options = CurrentOptions();
+      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+      options.create_if_missing = true;
       options.compression = compressions[iter];
-      ASSERT_TRUE(!TryReopen(options).ok());
-      // Try if CreateColumnFamily also fails
-      options.compression = kNoCompression;
-      ASSERT_OK(TryReopen(options));
-      ColumnFamilyOptions cf_options(options);
-      cf_options.compression = compressions[iter];
-      ColumnFamilyHandle* handle;
-      ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
+      DestroyAndReopen(options);
+
+      int kNumKeysWritten = 100000;
+
+      Random rnd(301);
+      for (int i = 0; i < kNumKeysWritten; ++i) {
+        // compressible string
+        ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a')));
+      }
+
+      table_options.format_version = first_table_version == 1 ? 2 : 1;
+      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+      Reopen(options);
+      for (int i = 0; i < kNumKeysWritten; ++i) {
+        auto r = Get(Key(i));
+        ASSERT_EQ(r.substr(128), std::string(128, 'a'));
+      }
     }
   }
 }
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, RowCache) {
+TEST_F(DBTest, MutexWaitStatsDisabledByDefault) {
   Options options = CurrentOptions();
+  options.create_if_missing = true;
   options.statistics = rocksdb::CreateDBStatistics();
-  options.row_cache = NewLRUCache(8192);
-  DestroyAndReopen(options);
-
-  ASSERT_OK(Put("foo", "bar"));
-  ASSERT_OK(Flush());
-
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
-  ASSERT_EQ(Get("foo"), "bar");
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
-  ASSERT_EQ(Get("foo"), "bar");
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
-  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
+  CreateAndReopenWithCF({"pikachu"}, options);
+  const uint64_t kMutexWaitDelay = 100;
+  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT,
+                                       kMutexWaitDelay);
+  ASSERT_OK(Put("hello", "rocksdb"));
+  ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0);
+  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0);
 }
-#endif  // ROCKSDB_LITE
 
-// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
-//             return the biggest key which is smaller than the seek key.
-TEST_F(DBTest, PrevAfterMerge) {
-  Options options;
+TEST_F(DBTest, MutexWaitStats) {
+  Options options = CurrentOptions();
   options.create_if_missing = true;
-  options.merge_operator = MergeOperators::CreatePutOperator();
-  DestroyAndReopen(options);
-
-  // write three entries with different keys using Merge()
-  WriteOptions wopts;
-  db_->Merge(wopts, "1", "data1");
-  db_->Merge(wopts, "2", "data2");
-  db_->Merge(wopts, "3", "data3");
-
-  std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
-
-  it->Seek("2");
-  ASSERT_TRUE(it->Valid());
-  ASSERT_EQ("2", it->key().ToString());
-
-  it->Prev();
-  ASSERT_TRUE(it->Valid());
-  ASSERT_EQ("1", it->key().ToString());
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.statistics->stats_level_ = StatsLevel::kAll;
+  CreateAndReopenWithCF({"pikachu"}, options);
+  const uint64_t kMutexWaitDelay = 100;
+  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT,
+                                       kMutexWaitDelay);
+  ASSERT_OK(Put("hello", "rocksdb"));
+  ASSERT_GE(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay);
+  ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0);
 }
 
-TEST_F(DBTest, DeletingOldWalAfterDrop) {
-  rocksdb::SyncPoint::GetInstance()->LoadDependency(
-      { { "Test:AllowFlushes", "DBImpl::BGWorkFlush" },
-        { "DBImpl::BGWorkFlush:done", "Test:WaitForFlush"} });
-  rocksdb::SyncPoint::GetInstance()->ClearTrace();
-
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+TEST_F(DBTest, CloseSpeedup) {
   Options options = CurrentOptions();
-  options.max_total_wal_size = 8192;
-  options.compression = kNoCompression;
-  options.write_buffer_size = 1 << 20;
-  options.level0_file_num_compaction_trigger = (1<<30);
-  options.level0_slowdown_writes_trigger = (1<<30);
-  options.level0_stop_writes_trigger = (1<<30);
-  options.disable_auto_compactions = true;
-  DestroyAndReopen(options);
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-
-  CreateColumnFamilies({"cf1", "cf2"}, options);
-  ASSERT_OK(Put(0, "key1", DummyString(8192)));
-  ASSERT_OK(Put(0, "key2", DummyString(8192)));
-  // the oldest wal should now be getting_flushed
-  ASSERT_OK(db_->DropColumnFamily(handles_[0]));
-  // all flushes should now do nothing because their CF is dropped
-  TEST_SYNC_POINT("Test:AllowFlushes");
-  TEST_SYNC_POINT("Test:WaitForFlush");
-  uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
-  ASSERT_OK(Put(1, "key3", DummyString(8192)));
-  ASSERT_OK(Put(1, "key4", DummyString(8192)));
-  // new wal should have been created
-  uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
-  EXPECT_GT(lognum2, lognum1);
-}
-
-TEST_F(DBTest, DBWithSstFileManager) {
-  std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
-  auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+  options.compaction_style = kCompactionStyleLevel;
+  options.write_buffer_size = 110 << 10;  // 110KB
+  options.arena_block_size = 4 << 10;
+  options.level0_file_num_compaction_trigger = 2;
+  options.num_levels = 4;
+  options.max_bytes_for_level_base = 400 * 1024;
+  options.max_write_buffer_number = 16;
 
-  int files_added = 0;
-  int files_deleted = 0;
-  int files_moved = 0;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "SstFileManagerImpl::OnAddFile", [&](void* arg) { files_added++; });
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { files_deleted++; });
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "SstFileManagerImpl::OnMoveFile", [&](void* arg) { files_moved++; });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  // Block background threads
+  env_->SetBackgroundThreads(1, Env::LOW);
+  env_->SetBackgroundThreads(1, Env::HIGH);
+  test::SleepingBackgroundTask sleeping_task_low;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  test::SleepingBackgroundTask sleeping_task_high;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
+                 &sleeping_task_high, Env::Priority::HIGH);
 
-  Options options = CurrentOptions();
-  options.sst_file_manager = sst_file_manager;
+  std::vector<std::string> filenames;
+  env_->GetChildren(dbname_, &filenames);
+  // Delete archival files.
+  for (size_t i = 0; i < filenames.size(); ++i) {
+    env_->DeleteFile(dbname_ + "/" + filenames[i]);
+  }
+  env_->DeleteDir(dbname_);
   DestroyAndReopen(options);
 
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  env_->SetBackgroundThreads(1, Env::LOW);
+  env_->SetBackgroundThreads(1, Env::HIGH);
   Random rnd(301);
-  for (int i = 0; i < 25; i++) {
-    GenerateNewRandomFile(&rnd);
-    ASSERT_OK(Flush());
-    dbfull()->TEST_WaitForFlushMemTable();
-    dbfull()->TEST_WaitForCompact();
-    // Verify that we are tracking all sst files in dbname_
-    ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles());
-  }
-  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  int key_idx = 0;
 
-  auto files_in_db = GetAllSSTFiles();
-  // Verify that we are tracking all sst files in dbname_
-  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
-  // Verify the total files size
-  uint64_t total_files_size = 0;
-  for (auto& file_to_size : files_in_db) {
-    total_files_size += file_to_size.second;
-  }
-  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
-  // We flushed at least 25 files
-  ASSERT_GE(files_added, 25);
-  // Compaction must have deleted some files
-  ASSERT_GT(files_deleted, 0);
-  // No files were moved
-  ASSERT_EQ(files_moved, 0);
+  // First three 110KB files are not going to level 2
+  // After that, (100K, 200K)
+  for (int num = 0; num < 5; num++) {
+    GenerateNewFile(&rnd, &key_idx, true);
+  }
 
-  Close();
-  Reopen(options);
-  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
-  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
+  ASSERT_EQ(0, GetSstFileCount(dbname_));
 
-  // Verify that we track all the files again after the DB is closed and opened
   Close();
-  sst_file_manager.reset(NewSstFileManager(env_));
-  options.sst_file_manager = sst_file_manager;
-  sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
+  ASSERT_EQ(0, GetSstFileCount(dbname_));
 
-  Reopen(options);
-  ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
-  ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
+  // Unblock background threads
+  sleeping_task_high.WakeUp();
+  sleeping_task_high.WaitUntilDone();
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilDone();
 
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  Destroy(options);
 }
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, RateLimitedDelete) {
-  rocksdb::SyncPoint::GetInstance()->LoadDependency({
-      {"DBTest::RateLimitedDelete:1", "DeleteScheduler::BackgroundEmptyTrash"},
-  });
-
-  std::vector<uint64_t> penalties;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DeleteScheduler::BackgroundEmptyTrash:Wait",
-      [&](void* arg) { penalties.push_back(*(static_cast<int*>(arg))); });
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-
-  Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.env = env_;
-
-  std::string trash_dir = test::TmpDir(env_) + "/trash";
-  int64_t rate_bytes_per_sec = 1024 * 10;  // 10 Kbs / Sec
-  Status s;
-  options.sst_file_manager.reset(NewSstFileManager(
-      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
-  ASSERT_OK(s);
-  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
-
-  Destroy(last_options_);
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-  ASSERT_OK(TryReopen(options));
-  // Create 4 files in L0
-  for (char v = 'a'; v <= 'd'; v++) {
-    ASSERT_OK(Put("Key2", DummyString(1024, v)));
-    ASSERT_OK(Put("Key3", DummyString(1024, v)));
-    ASSERT_OK(Put("Key4", DummyString(1024, v)));
-    ASSERT_OK(Put("Key1", DummyString(1024, v)));
-    ASSERT_OK(Put("Key4", DummyString(1024, v)));
-    ASSERT_OK(Flush());
-  }
-  // We created 4 sst files in L0
-  ASSERT_EQ("4", FilesPerLevel(0));
-
-  std::vector<LiveFileMetaData> metadata;
-  db_->GetLiveFilesMetaData(&metadata);
-
-  // Compaction will move the 4 files in L0 to trash and create 1 L1 file
-  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ("0,1", FilesPerLevel(0));
-
-  uint64_t delete_start_time = env_->NowMicros();
-  // Hold BackgroundEmptyTrash
-  TEST_SYNC_POINT("DBTest::RateLimitedDelete:1");
-  sfm->WaitForEmptyTrash();
-  uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time;
+class DelayedMergeOperator : public MergeOperator {
+ private:
+  DBTest* db_test_;
 
-  uint64_t total_files_size = 0;
-  uint64_t expected_penlty = 0;
-  ASSERT_EQ(penalties.size(), metadata.size());
-  for (size_t i = 0; i < metadata.size(); i++) {
-    total_files_size += metadata[i].size;
-    expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec);
-    ASSERT_EQ(expected_penlty, penalties[i]);
+ public:
+  explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
+  virtual bool FullMerge(const Slice& key, const Slice* existing_value,
+                         const std::deque<std::string>& operand_list,
+                         std::string* new_value,
+                         Logger* logger) const override {
+    db_test_->env_->addon_time_.fetch_add(1000);
+    *new_value = "";
+    return true;
   }
-  ASSERT_GT(time_spent_deleting, expected_penlty * 0.9);
 
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
-}
+  virtual const char* Name() const override { return "DelayedMergeOperator"; }
+};
 
-// Create a DB with 2 db_paths, and generate multiple files in the 2
-// db_paths using CompactRangeOptions, make sure that files that were
-// deleted from first db_path were deleted using DeleteScheduler and
-// files in the second path were not.
-TEST_F(DBTest, DeleteSchedulerMultipleDBPaths) {
-  int bg_delete_file = 0;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DeleteScheduler::DeleteTrashFile:DeleteFile",
-      [&](void* arg) { bg_delete_file++; });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+TEST_F(DBTest, MergeTestTime) {
+  std::string one, two, three;
+  PutFixed64(&one, 1);
+  PutFixed64(&two, 2);
+  PutFixed64(&three, 3);
 
+  // Enable time profiling
+  SetPerfLevel(kEnableTime);
+  this->env_->addon_time_.store(0);
+  this->env_->time_elapse_only_sleep_ = true;
+  this->env_->no_sleep_ = true;
   Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.db_paths.emplace_back(dbname_, 1024 * 100);
-  options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100);
-  options.env = env_;
-
-  std::string trash_dir = test::TmpDir(env_) + "/trash";
-  int64_t rate_bytes_per_sec = 1024 * 1024;  // 1 Mb / Sec
-  Status s;
-  options.sst_file_manager.reset(NewSstFileManager(
-      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
-  ASSERT_OK(s);
-  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
-
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.merge_operator.reset(new DelayedMergeOperator(this));
   DestroyAndReopen(options);
 
-  // Create 4 files in L0
-  for (int i = 0; i < 4; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
-    ASSERT_OK(Flush());
-  }
-  // We created 4 sst files in L0
-  ASSERT_EQ("4", FilesPerLevel(0));
-  // Compaction will delete files from L0 in first db path and generate a new
-  // file in L1 in second db path
-  CompactRangeOptions compact_options;
-  compact_options.target_path_id = 1;
-  Slice begin("Key0");
-  Slice end("Key3");
-  ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
-  ASSERT_EQ("0,1", FilesPerLevel(0));
-
-  // Create 4 files in L0
-  for (int i = 4; i < 8; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B')));
-    ASSERT_OK(Flush());
-  }
-  ASSERT_EQ("4,1", FilesPerLevel(0));
-
-  // Compaction will delete files from L0 in first db path and generate a new
-  // file in L1 in second db path
-  begin = "Key4";
-  end  = "Key7";
-  ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
-  ASSERT_EQ("0,2", FilesPerLevel(0));
+  ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
+  db_->Put(WriteOptions(), "foo", one);
+  ASSERT_OK(Flush());
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
+  ASSERT_OK(Flush());
+  ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
+  ASSERT_OK(Flush());
 
-  sfm->WaitForEmptyTrash();
-  ASSERT_EQ(bg_delete_file, 8);
+  ReadOptions opt;
+  opt.verify_checksums = true;
+  opt.snapshot = nullptr;
+  std::string result;
+  db_->Get(opt, "foo", &result);
 
-  compact_options.bottommost_level_compaction =
-      BottommostLevelCompaction::kForce;
-  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
-  ASSERT_EQ("0,1", FilesPerLevel(0));
+  ASSERT_EQ(1000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
 
-  sfm->WaitForEmptyTrash();
-  ASSERT_EQ(bg_delete_file, 8);
+  ReadOptions read_options;
+  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
+  int count = 0;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    ASSERT_OK(iter->status());
+    ++count;
+  }
 
-  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  ASSERT_EQ(1, count);
+  ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
+#if ROCKSDB_USING_THREAD_STATUS
+  ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0);
+#endif  // ROCKSDB_USING_THREAD_STATUS
+  this->env_->time_elapse_only_sleep_ = false;
 }
 
-TEST_F(DBTest, DestroyDBWithRateLimitedDelete) {
-  int bg_delete_file = 0;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DeleteScheduler::DeleteTrashFile:DeleteFile",
-      [&](void* arg) { bg_delete_file++; });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-
+#ifndef ROCKSDB_LITE
+TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
+  SetPerfLevel(kEnableTime);
   Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.env = env_;
+  options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.merge_operator.reset(new DelayedMergeOperator(this));
+  options.compaction_style = kCompactionStyleUniversal;
+  options.max_subcompactions = max_subcompactions_;
   DestroyAndReopen(options);
 
-  // Create 4 files in L0
-  for (int i = 0; i < 4; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
+  for (int i = 0; i < 1000; i++) {
+    ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
     ASSERT_OK(Flush());
   }
-  // We created 4 sst files in L0
-  ASSERT_EQ("4", FilesPerLevel(0));
-
-  // Close DB and destroy it using DeleteScheduler
-  Close();
-  std::string trash_dir = test::TmpDir(env_) + "/trash";
-  int64_t rate_bytes_per_sec = 1024 * 1024;  // 1 Mb / Sec
-  Status s;
-  options.sst_file_manager.reset(NewSstFileManager(
-      env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
-  ASSERT_OK(s);
-  ASSERT_OK(DestroyDB(dbname_, options));
+  dbfull()->TEST_WaitForFlushMemTable();
+  dbfull()->TEST_WaitForCompact();
 
-  auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
-  sfm->WaitForEmptyTrash();
-  // We have deleted the 4 sst files in the delete_scheduler
-  ASSERT_EQ(bg_delete_file, 4);
+  ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
 }
-#endif  // ROCKSDB_LITE
-
-TEST_F(DBTest, DBWithMaxSpaceAllowed) {
-  std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
-  auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
 
+TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
   Options options = CurrentOptions();
-  options.sst_file_manager = sst_file_manager;
+  options.compaction_filter_factory =
+      std::make_shared<DelayFilterFactory>(this);
   options.disable_auto_compactions = true;
+  options.create_if_missing = true;
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.max_subcompactions = max_subcompactions_;
   DestroyAndReopen(options);
 
-  Random rnd(301);
-
-  // Generate a file containing 100 keys.
-  for (int i = 0; i < 100; i++) {
-    ASSERT_OK(Put(Key(i), RandomString(&rnd, 50)));
+  // put some data
+  for (int table = 0; table < 4; ++table) {
+    for (int i = 0; i < 10 + table; ++i) {
+      Put(ToString(table * 100 + i), "val");
+    }
+    Flush();
   }
-  ASSERT_OK(Flush());
 
-  uint64_t first_file_size = 0;
-  auto files_in_db = GetAllSSTFiles(&first_file_size);
-  ASSERT_EQ(sfm->GetTotalSize(), first_file_size);
+  CompactRangeOptions cro;
+  cro.exclusive_manual_compaction = exclusive_manual_compaction_;
+  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
+  ASSERT_EQ(0U, CountLiveFiles());
 
-  // Set the maximum allowed space usage to the current total size
-  sfm->SetMaxAllowedSpaceUsage(first_file_size + 1);
+  Reopen(options);
 
-  ASSERT_OK(Put("key1", "val1"));
-  // This flush will cause bg_error_ and will fail
-  ASSERT_NOK(Flush());
+  Iterator* itr = db_->NewIterator(ReadOptions());
+  itr->SeekToFirst();
+  ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0);
+  delete itr;
 }
+#endif  // ROCKSDB_LITE
 
-TEST_F(DBTest, DBWithMaxSpaceAllowedRandomized) {
-  // This test will set a maximum allowed space for the DB, then it will
-  // keep filling the DB until the limit is reached and bg_error_ is set.
-  // When bg_error_ is set we will verify that the DB size is greater
-  // than the limit.
-
-  std::vector<int> max_space_limits_mbs = {1, 2, 4, 8, 10};
-
-  bool bg_error_set = false;
-  uint64_t total_sst_files_size = 0;
-
-  int reached_max_space_on_flush = 0;
-  int reached_max_space_on_compaction = 0;
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
-      [&](void* arg) {
-        bg_error_set = true;
-        GetAllSSTFiles(&total_sst_files_size);
-        reached_max_space_on_flush++;
-      });
-
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached",
-      [&](void* arg) {
-        bg_error_set = true;
-        GetAllSSTFiles(&total_sst_files_size);
-        reached_max_space_on_compaction++;
-      });
-
-  for (auto limit_mb : max_space_limits_mbs) {
-    bg_error_set = false;
-    total_sst_files_size = 0;
-    rocksdb::SyncPoint::GetInstance()->ClearTrace();
-    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-    std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
-    auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
-
-    Options options = CurrentOptions();
-    options.sst_file_manager = sst_file_manager;
-    options.write_buffer_size = 1024 * 512;  // 512 Kb
-    DestroyAndReopen(options);
-    Random rnd(301);
-
-    sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024);
+TEST_F(DBTest, TestLogCleanup) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 64 * 1024;  // very small
+  // only two memtables allowed ==> only two log files
+  options.max_write_buffer_number = 2;
+  Reopen(options);
 
-    int keys_written = 0;
-    uint64_t estimated_db_size = 0;
-    while (true) {
-      auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50));
-      if (!s.ok()) {
-        break;
-      }
-      keys_written++;
-      // Check the estimated db size vs the db limit just to make sure we
-      // dont run into an infinite loop
-      estimated_db_size = keys_written * 60;  // ~60 bytes per key
-      ASSERT_LT(estimated_db_size, limit_mb * 1024 * 1024 * 2);
-    }
-    ASSERT_TRUE(bg_error_set);
-    ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024);
-    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  for (int i = 0; i < 100000; ++i) {
+    Put(Key(i), "val");
+    // only 2 memtables will be alive, so logs_to_free needs to always be below
+    // 2
+    ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
   }
-
-  ASSERT_GT(reached_max_space_on_flush, 0);
-  ASSERT_GT(reached_max_space_on_compaction, 0);
 }
 
-TEST_F(DBTest, UnsupportedManualSync) {
-  DestroyAndReopen(CurrentOptions());
-  env_->is_wal_sync_thread_safe_.store(false);
-  Status s = db_->SyncWAL();
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, EmptyCompactedDB) {
+  Options options = CurrentOptions();
+  options.max_open_files = -1;
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  Status s = Put("new", "value");
   ASSERT_TRUE(s.IsNotSupported());
+  Close();
 }
+#endif  // ROCKSDB_LITE
 
 #ifndef ROCKSDB_LITE
-TEST_F(DBTest, OpenDBWithInfiniteMaxOpenFiles) {
-  // Open DB with infinite max open files
-  //  - First iteration use 1 thread to open files
-  //  - Second iteration use 5 threads to open files
-  for (int iter = 0; iter < 2; iter++) {
-    Options options;
-    options.create_if_missing = true;
-    options.write_buffer_size = 100000;
-    options.disable_auto_compactions = true;
-    options.max_open_files = -1;
-    if (iter == 0) {
-      options.max_file_opening_threads = 1;
-    } else {
-      options.max_file_opening_threads = 5;
-    }
-    options = CurrentOptions(options);
-    DestroyAndReopen(options);
-
-    // Create 12 Files in L0 (then move then to L2)
-    for (int i = 0; i < 12; i++) {
-      std::string k = "L2_" + Key(i);
-      ASSERT_OK(Put(k, k + std::string(1000, 'a')));
-      ASSERT_OK(Flush());
-    }
-    CompactRangeOptions compact_options;
-    compact_options.change_level = true;
-    compact_options.target_level = 2;
-    db_->CompactRange(compact_options, nullptr, nullptr);
-
-    // Create 12 Files in L0
-    for (int i = 0; i < 12; i++) {
-      std::string k = "L0_" + Key(i);
-      ASSERT_OK(Put(k, k + std::string(1000, 'a')));
-      ASSERT_OK(Flush());
+TEST_F(DBTest, SuggestCompactRangeTest) {
+  class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
+   public:
+    virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+        const CompactionFilter::Context& context) override {
+      saved_context = context;
+      std::unique_ptr<CompactionFilter> empty_filter;
+      return empty_filter;
     }
-    Close();
-
-    // Reopening the DB will load all exisitng files
-    Reopen(options);
-    ASSERT_EQ("12,0,12", FilesPerLevel(0));
-    std::vector<std::vector<FileMetaData>> files;
-    dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
-
-    for (const auto& level : files) {
-      for (const auto& file : level) {
-        ASSERT_TRUE(file.table_reader_handle != nullptr);
-      }
+    const char* Name() const override {
+      return "CompactionFilterFactoryGetContext";
     }
-
-    for (int i = 0; i < 12; i++) {
-      ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a'));
-      ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a'));
+    static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
+      return reinterpret_cast<CompactionFilterFactoryGetContext*>(
+                 compaction_filter_factory)
+          ->saved_context.is_manual_compaction;
     }
-  }
-}
+    CompactionFilter::Context saved_context;
+  };
 
-TEST_F(DBTest, GetTotalSstFilesSize) {
   Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
+  options.memtable_factory.reset(
+      new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
+  options.compaction_style = kCompactionStyleLevel;
+  options.compaction_filter_factory.reset(
+      new CompactionFilterFactoryGetContext());
+  options.write_buffer_size = 200 << 10;
+  options.arena_block_size = 4 << 10;
+  options.level0_file_num_compaction_trigger = 4;
+  options.num_levels = 4;
   options.compression = kNoCompression;
-  DestroyAndReopen(options);
-  // Generate 5 files in L0
-  for (int i = 0; i < 5; i++) {
-    for (int j = 0; j < 10; j++) {
-      std::string val = "val_file_" + ToString(i);
-      ASSERT_OK(Put(Key(j), val));
-    }
-    Flush();
-  }
-  ASSERT_EQ("5", FilesPerLevel(0));
-
-  std::vector<LiveFileMetaData> live_files_meta;
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 5);
-  uint64_t single_file_size = live_files_meta[0].size;
-
-  uint64_t live_sst_files_size = 0;
-  uint64_t total_sst_files_size = 0;
-  for (const auto& file_meta : live_files_meta) {
-    live_sst_files_size += file_meta.size;
-  }
-
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 5
-  // Total SST files = 5
-  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
-  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
-
-  // hold current version
-  std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
+  options.max_bytes_for_level_base = 450 << 10;
+  options.target_file_size_base = 98 << 10;
+  options.max_grandparent_overlap_factor = 1 << 20;  // inf
 
-  // Compact 5 files into 1 file in L0
-  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ("0,1", FilesPerLevel(0));
+  Reopen(options);
 
-  live_files_meta.clear();
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 1);
+  Random rnd(301);
 
-  live_sst_files_size = 0;
-  total_sst_files_size = 0;
-  for (const auto& file_meta : live_files_meta) {
-    live_sst_files_size += file_meta.size;
+  for (int num = 0; num < 3; num++) {
+    GenerateNewRandomFile(&rnd);
   }
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 1 (compacted file)
-  // Total SST files = 6 (5 original files + compacted file)
-  ASSERT_EQ(live_sst_files_size, 1 * single_file_size);
-  ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
 
-  // hold current version
-  std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("0,4", FilesPerLevel(0));
+  ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
+      options.compaction_filter_factory.get()));
 
-  // Delete all keys and compact, this will delete all live files
-  for (int i = 0; i < 10; i++) {
-    ASSERT_OK(Delete(Key(i)));
-  }
-  Flush();
-  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ("", FilesPerLevel(0));
-
-  live_files_meta.clear();
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 0);
-
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 0
-  // Total SST files = 6 (5 original files + compacted file)
-  ASSERT_EQ(total_sst_files_size, 6 * single_file_size);
-
-  iter1.reset();
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 0
-  // Total SST files = 1 (compacted file)
-  ASSERT_EQ(total_sst_files_size, 1 * single_file_size);
-
-  iter2.reset();
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 0
-  // Total SST files = 0
-  ASSERT_EQ(total_sst_files_size, 0);
-}
-
-TEST_F(DBTest, GetTotalSstFilesSizeVersionsFilesShared) {
-  Options options = CurrentOptions();
-  options.disable_auto_compactions = true;
-  options.compression = kNoCompression;
-  DestroyAndReopen(options);
-  // Generate 5 files in L0
-  for (int i = 0; i < 5; i++) {
-    ASSERT_OK(Put(Key(i), "val"));
-    Flush();
-  }
-  ASSERT_EQ("5", FilesPerLevel(0));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("1,4", FilesPerLevel(0));
 
-  std::vector<LiveFileMetaData> live_files_meta;
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 5);
-  uint64_t single_file_size = live_files_meta[0].size;
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("2,4", FilesPerLevel(0));
 
-  uint64_t live_sst_files_size = 0;
-  uint64_t total_sst_files_size = 0;
-  for (const auto& file_meta : live_files_meta) {
-    live_sst_files_size += file_meta.size;
-  }
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("3,4", FilesPerLevel(0));
+
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("0,4,4", FilesPerLevel(0));
 
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("1,4,4", FilesPerLevel(0));
 
-  // Live SST files = 5
-  // Total SST files = 5
-  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
-  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("2,4,4", FilesPerLevel(0));
 
-  // hold current version
-  std::unique_ptr<Iterator> iter1(dbfull()->NewIterator(ReadOptions()));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("3,4,4", FilesPerLevel(0));
 
-  // Compaction will do trivial move from L0 to L1
-  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ("0,5", FilesPerLevel(0));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("0,4,8", FilesPerLevel(0));
 
-  live_files_meta.clear();
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 5);
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ("1,4,8", FilesPerLevel(0));
 
-  live_sst_files_size = 0;
-  total_sst_files_size = 0;
-  for (const auto& file_meta : live_files_meta) {
-    live_sst_files_size += file_meta.size;
+  // compact it three times
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
+    dbfull()->TEST_WaitForCompact();
   }
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 5
-  // Total SST files = 5 (used in 2 version)
-  ASSERT_EQ(live_sst_files_size, 5 * single_file_size);
-  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
 
-  // hold current version
-  std::unique_ptr<Iterator> iter2(dbfull()->NewIterator(ReadOptions()));
+  // All files are compacted
+  ASSERT_EQ(0, NumTableFilesAtLevel(0));
+  ASSERT_EQ(0, NumTableFilesAtLevel(1));
 
-  // Delete all keys and compact, this will delete all live files
-  for (int i = 0; i < 5; i++) {
-    ASSERT_OK(Delete(Key(i)));
-  }
-  Flush();
-  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ("", FilesPerLevel(0));
+  GenerateNewRandomFile(&rnd);
+  ASSERT_EQ(1, NumTableFilesAtLevel(0));
 
-  live_files_meta.clear();
-  dbfull()->GetLiveFilesMetaData(&live_files_meta);
-  ASSERT_EQ(live_files_meta.size(), 0);
+  // nonoverlapping with the file on level 0
+  Slice start("a"), end("b");
+  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
+  dbfull()->TEST_WaitForCompact();
 
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 0
-  // Total SST files = 5 (used in 2 version)
-  ASSERT_EQ(total_sst_files_size, 5 * single_file_size);
+  // should not compact the level 0 file
+  ASSERT_EQ(1, NumTableFilesAtLevel(0));
 
-  iter1.reset();
-  iter2.reset();
+  start = Slice("j");
+  end = Slice("m");
+  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
+  dbfull()->TEST_WaitForCompact();
+  ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
+      options.compaction_filter_factory.get()));
 
-  ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size",
-                                       &total_sst_files_size));
-  // Live SST files = 0
-  // Total SST files = 0
-  ASSERT_EQ(total_sst_files_size, 0);
+  // now it should compact the level 0 file
+  ASSERT_EQ(0, NumTableFilesAtLevel(0));
+  ASSERT_EQ(1, NumTableFilesAtLevel(1));
 }
 
-TEST_F(DBTest, AddExternalSstFile) {
-  do {
-    std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
-    env_->CreateDir(sst_files_folder);
-    Options options = CurrentOptions();
-    options.env = env_;
-    const ImmutableCFOptions ioptions(options);
-
-    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
-
-    // file1.sst (0 => 99)
-    std::string file1 = sst_files_folder + "file1.sst";
-    ASSERT_OK(sst_file_writer.Open(file1));
-    for (int k = 0; k < 100; k++) {
-      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
-    }
-    ExternalSstFileInfo file1_info;
-    Status s = sst_file_writer.Finish(&file1_info);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(file1_info.file_path, file1);
-    ASSERT_EQ(file1_info.num_entries, 100);
-    ASSERT_EQ(file1_info.smallest_key, Key(0));
-    ASSERT_EQ(file1_info.largest_key, Key(99));
-    // sst_file_writer already finished, cannot add this value
-    s = sst_file_writer.Add(Key(100), "bad_val");
-    ASSERT_FALSE(s.ok()) << s.ToString();
-
-    // file2.sst (100 => 199)
-    std::string file2 = sst_files_folder + "file2.sst";
-    ASSERT_OK(sst_file_writer.Open(file2));
-    for (int k = 100; k < 200; k++) {
-      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
-    }
-    // Cannot add this key because it's not after last added key
-    s = sst_file_writer.Add(Key(99), "bad_val");
-    ASSERT_FALSE(s.ok()) << s.ToString();
-    ExternalSstFileInfo file2_info;
-    s = sst_file_writer.Finish(&file2_info);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(file2_info.file_path, file2);
-    ASSERT_EQ(file2_info.num_entries, 100);
-    ASSERT_EQ(file2_info.smallest_key, Key(100));
-    ASSERT_EQ(file2_info.largest_key, Key(199));
-
-    // file3.sst (195 => 299)
-    // This file values overlap with file2 values
-    std::string file3 = sst_files_folder + "file3.sst";
-    ASSERT_OK(sst_file_writer.Open(file3));
-    for (int k = 195; k < 300; k++) {
-      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
-    }
-    ExternalSstFileInfo file3_info;
-    s = sst_file_writer.Finish(&file3_info);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(file3_info.file_path, file3);
-    ASSERT_EQ(file3_info.num_entries, 105);
-    ASSERT_EQ(file3_info.smallest_key, Key(195));
-    ASSERT_EQ(file3_info.largest_key, Key(299));
-
-    // file4.sst (30 => 39)
-    // This file values overlap with file1 values
-    std::string file4 = sst_files_folder + "file4.sst";
-    ASSERT_OK(sst_file_writer.Open(file4));
-    for (int k = 30; k < 40; k++) {
-      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
-    }
-    ExternalSstFileInfo file4_info;
-    s = sst_file_writer.Finish(&file4_info);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(file4_info.file_path, file4);
-    ASSERT_EQ(file4_info.num_entries, 10);
-    ASSERT_EQ(file4_info.smallest_key, Key(30));
-    ASSERT_EQ(file4_info.largest_key, Key(39));
-
-    // file5.sst (400 => 499)
-    std::string file5 = sst_files_folder + "file5.sst";
-    ASSERT_OK(sst_file_writer.Open(file5));
-    for (int k = 400; k < 500; k++) {
-      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
-    }
-    ExternalSstFileInfo file5_info;
-    s = sst_file_writer.Finish(&file5_info);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(file5_info.file_path, file5);
-    ASSERT_EQ(file5_info.num_entries, 100);
-    ASSERT_EQ(file5_info.smallest_key, Key(400));
-    ASSERT_EQ(file5_info.largest_key, Key(499));
-
-    // Cannot create an empty sst file
-    std::string file_empty = sst_files_folder + "file_empty.sst";
-    ExternalSstFileInfo file_empty_info;
-    s = sst_file_writer.Finish(&file_empty_info);
-    ASSERT_NOK(s);
+TEST_F(DBTest, PromoteL0) {
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.write_buffer_size = 10 * 1024 * 1024;
+  DestroyAndReopen(options);
 
-    DestroyAndReopen(options);
-    // Add file using file path
-    s = db_->AddFile(file1);
-    ASSERT_TRUE(s.ok()) << s.ToString();
-    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
-    for (int k = 0; k < 100; k++) {
-      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
-    }
+  // non overlapping ranges
+  std::vector<std::pair<int32_t, int32_t>> ranges = {
+      {81, 160}, {0, 80}, {161, 240}, {241, 320}};
 
-    // Add file while holding a snapshot will fail
-    const Snapshot* s1 = db_->GetSnapshot();
-    if (s1 != nullptr) {
-      ASSERT_NOK(db_->AddFile(&file2_info));
-      db_->ReleaseSnapshot(s1);
-    }
-    // We can add the file after releaseing the snapshot
-    ASSERT_OK(db_->AddFile(&file2_info));
+  int32_t value_size = 10 * 1024;  // 10 KB
 
-    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
-    for (int k = 0; k < 200; k++) {
-      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
+  Random rnd(301);
+  std::map<int32_t, std::string> values;
+  for (const auto& range : ranges) {
+    for (int32_t j = range.first; j < range.second; j++) {
+      values[j] = RandomString(&rnd, value_size);
+      ASSERT_OK(Put(Key(j), values[j]));
     }
+    ASSERT_OK(Flush());
+  }
 
-    // This file have overlapping values with the exisitng data
-    s = db_->AddFile(file3);
-    ASSERT_FALSE(s.ok()) << s.ToString();
-
-    // This file have overlapping values with the exisitng data
-    s = db_->AddFile(&file4_info);
-    ASSERT_FALSE(s.ok()) << s.ToString();
-
-    // Overwrite values of keys divisible by 5
-    for (int k = 0; k < 200; k += 5) {
-      ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
-    }
-    ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
+  int32_t level0_files = NumTableFilesAtLevel(0, 0);
+  ASSERT_EQ(level0_files, ranges.size());
+  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1
 
-    // Key range of file5 (400 => 499) dont overlap with any keys in DB
-    ASSERT_OK(db_->AddFile(file5));
+  // Promote L0 level to L2.
+  ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
+  // We expect that all the files were trivially moved from L0 to L2
+  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
+  ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
 
-    // Make sure values are correct before and after flush/compaction
-    for (int i = 0; i < 2; i++) {
-      for (int k = 0; k < 200; k++) {
-        std::string value = Key(k) + "_val";
-        if (k % 5 == 0) {
-          value += "_new";
-        }
-        ASSERT_EQ(Get(Key(k)), value);
-      }
-      for (int k = 400; k < 500; k++) {
-        std::string value = Key(k) + "_val";
-        ASSERT_EQ(Get(Key(k)), value);
-      }
-      ASSERT_OK(Flush());
-      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-    }
+  for (const auto& kv : values) {
+    ASSERT_EQ(Get(Key(kv.first)), kv.second);
+  }
+}
 
-    Close();
-    options.disable_auto_compactions = true;
-    Reopen(options);
+TEST_F(DBTest, PromoteL0Failure) {
+  Options options = CurrentOptions();
+  options.disable_auto_compactions = true;
+  options.write_buffer_size = 10 * 1024 * 1024;
+  DestroyAndReopen(options);
 
-    // Delete keys in range (400 => 499)
-    for (int k = 400; k < 500; k++) {
-      ASSERT_OK(Delete(Key(k)));
-    }
-    // We deleted range (400 => 499) but cannot add file5 because
-    // of the range tombstones
-    ASSERT_NOK(db_->AddFile(file5));
+  // Produce two L0 files with overlapping ranges.
+  ASSERT_OK(Put(Key(0), ""));
+  ASSERT_OK(Put(Key(3), ""));
+  ASSERT_OK(Flush());
+  ASSERT_OK(Put(Key(1), ""));
+  ASSERT_OK(Flush());
 
-    // Compacting the DB will remove the tombstones
-    ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  Status status;
+  // Fails because L0 has overlapping files.
+  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
+  ASSERT_TRUE(status.IsInvalidArgument());
 
-    // Now we can add the file
-    ASSERT_OK(db_->AddFile(file5));
+  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  // Now there is a file in L1.
+  ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
 
-    // Verify values of file5 in DB
-    for (int k = 400; k < 500; k++) {
-      std::string value = Key(k) + "_val";
-      ASSERT_EQ(Get(Key(k)), value);
-    }
-  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
-                         kSkipFIFOCompaction));
+  ASSERT_OK(Put(Key(5), ""));
+  ASSERT_OK(Flush());
+  // Fails because L1 is non-empty.
+  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
+  ASSERT_TRUE(status.IsInvalidArgument());
 }
+#endif  // ROCKSDB_LITE
 
-// This test reporduce a bug that can happen in some cases if the DB started
-// purging obsolete files when we are adding an external sst file.
-// This situation may result in deleting the file while it's being added.
-TEST_F(DBTest, AddExternalSstFilePurgeObsoleteFilesBug) {
-  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
-  env_->CreateDir(sst_files_folder);
+// Github issue #596
+TEST_F(DBTest, HugeNumberOfLevels) {
   Options options = CurrentOptions();
-  options.env = env_;
-  const ImmutableCFOptions ioptions(options);
-
-  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
+  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
+  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
+  options.num_levels = 12;
+  options.max_background_compactions = 10;
+  options.max_bytes_for_level_multiplier = 2;
+  options.level_compaction_dynamic_level_bytes = true;
+  DestroyAndReopen(options);
 
-  // file1.sst (0 => 500)
-  std::string sst_file_path = sst_files_folder + "file1.sst";
-  Status s = sst_file_writer.Open(sst_file_path);
-  ASSERT_OK(s);
-  for (int i = 0; i < 500; i++) {
-    std::string k = Key(i);
-    s = sst_file_writer.Add(k, k + "_val");
-    ASSERT_OK(s);
+  Random rnd(301);
+  for (int i = 0; i < 300000; ++i) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
   }
 
-  ExternalSstFileInfo sst_file_info;
-  s = sst_file_writer.Finish(&sst_file_info);
-  ASSERT_OK(s);
+  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+}
 
-  options.delete_obsolete_files_period_micros = 0;
-  options.disable_auto_compactions = true;
+TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
+  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
+  options.num_levels = 12;
+  options.max_background_compactions = 10;
+  options.max_bytes_for_level_multiplier = 2;
+  options.level_compaction_dynamic_level_bytes = true;
   DestroyAndReopen(options);
 
-  rocksdb::SyncPoint::GetInstance()->SetCallBack(
-      "DBImpl::AddFile:FileCopied", [&](void* arg) {
-        ASSERT_OK(Put("aaa", "bbb"));
-        ASSERT_OK(Flush());
-        ASSERT_OK(Put("aaa", "xxx"));
-        ASSERT_OK(Flush());
-        db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-      });
-  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
-
-  s = db_->AddFile(sst_file_path);
-  ASSERT_OK(s);
-
-  for (int i = 0; i < 500; i++) {
-    std::string k = Key(i);
-    std::string v = k + "_val";
-    ASSERT_EQ(Get(k), v);
+  Random rnd(301);
+  for (int i = 0; i < 300000; ++i) {
+    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
   }
 
+  std::atomic<int> callback_count(0);
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "DBImpl::BackgroundCompaction()::Conflict",
+      [&](void* arg) { callback_count.fetch_add(1); });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
+  CompactRangeOptions croptions;
+  croptions.exclusive_manual_compaction = false;
+  ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
+  ASSERT_GE(callback_count.load(), 1);
   rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  for (int i = 0; i < 300000; ++i) {
+    ASSERT_NE("NOT_FOUND", Get(Key(i)));
+  }
 }
 
-TEST_F(DBTest, AddExternalSstFileNoCopy) {
-  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
-  env_->CreateDir(sst_files_folder);
+// Github issue #595
+// Large write batch with column families
+TEST_F(DBTest, LargeBatchWithColumnFamilies) {
   Options options = CurrentOptions();
   options.env = env_;
-  const ImmutableCFOptions ioptions(options);
-
-  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
-
-  // file1.sst (0 => 99)
-  std::string file1 = sst_files_folder + "file1.sst";
-  ASSERT_OK(sst_file_writer.Open(file1));
-  for (int k = 0; k < 100; k++) {
-    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
-  }
-  ExternalSstFileInfo file1_info;
-  Status s = sst_file_writer.Finish(&file1_info);
-  ASSERT_TRUE(s.ok()) << s.ToString();
-  ASSERT_EQ(file1_info.file_path, file1);
-  ASSERT_EQ(file1_info.num_entries, 100);
-  ASSERT_EQ(file1_info.smallest_key, Key(0));
-  ASSERT_EQ(file1_info.largest_key, Key(99));
-
-  // file2.sst (100 => 299)
-  std::string file2 = sst_files_folder + "file2.sst";
-  ASSERT_OK(sst_file_writer.Open(file2));
-  for (int k = 100; k < 300; k++) {
-    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
-  }
-  ExternalSstFileInfo file2_info;
-  s = sst_file_writer.Finish(&file2_info);
-  ASSERT_TRUE(s.ok()) << s.ToString();
-  ASSERT_EQ(file2_info.file_path, file2);
-  ASSERT_EQ(file2_info.num_entries, 200);
-  ASSERT_EQ(file2_info.smallest_key, Key(100));
-  ASSERT_EQ(file2_info.largest_key, Key(299));
-
-  // file3.sst (110 => 124) .. overlap with file2.sst
-  std::string file3 = sst_files_folder + "file3.sst";
-  ASSERT_OK(sst_file_writer.Open(file3));
-  for (int k = 110; k < 125; k++) {
-    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
-  }
-  ExternalSstFileInfo file3_info;
-  s = sst_file_writer.Finish(&file3_info);
-  ASSERT_TRUE(s.ok()) << s.ToString();
-  ASSERT_EQ(file3_info.file_path, file3);
-  ASSERT_EQ(file3_info.num_entries, 15);
-  ASSERT_EQ(file3_info.smallest_key, Key(110));
-  ASSERT_EQ(file3_info.largest_key, Key(124));
-
-  s = db_->AddFile(&file1_info, true /* move file */);
-  ASSERT_TRUE(s.ok()) << s.ToString();
-  ASSERT_EQ(Status::NotFound(), env_->FileExists(file1));
-
-  s = db_->AddFile(&file2_info, false /* copy file */);
-  ASSERT_TRUE(s.ok()) << s.ToString();
-  ASSERT_OK(env_->FileExists(file2));
-
-  // This file have overlapping values with the exisitng data
-  s = db_->AddFile(&file3_info, true /* move file */);
-  ASSERT_FALSE(s.ok()) << s.ToString();
-  ASSERT_OK(env_->FileExists(file3));
-
-  for (int k = 0; k < 300; k++) {
-    ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
-  }
-}
-
-TEST_F(DBTest, AddExternalSstFileMultiThreaded) {
-  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
-  // Bulk load 10 files every file contain 1000 keys
-  int num_files = 10;
-  int keys_per_file = 1000;
-
-  // Generate file names
-  std::vector<std::string> file_names;
-  for (int i = 0; i < num_files; i++) {
-    std::string file_name = "file_" + ToString(i) + ".sst";
-    file_names.push_back(sst_files_folder + file_name);
-  }
-
-  do {
-    env_->CreateDir(sst_files_folder);
-    Options options = CurrentOptions();
-    const ImmutableCFOptions ioptions(options);
-
-    std::atomic<int> thread_num(0);
-    std::function<void()> write_file_func = [&]() {
-      int file_idx = thread_num.fetch_add(1);
-      int range_start = file_idx * keys_per_file;
-      int range_end = range_start + keys_per_file;
-
-      SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
-
-      ASSERT_OK(sst_file_writer.Open(file_names[file_idx]));
-
-      for (int k = range_start; k < range_end; k++) {
-        ASSERT_OK(sst_file_writer.Add(Key(k), Key(k)));
+  options.write_buffer_size = 100000;  // Small write buffer
+  CreateAndReopenWithCF({"pikachu"}, options);
+  int64_t j = 0;
+  for (int i = 0; i < 5; i++) {
+    for (int pass = 1; pass <= 3; pass++) {
+      WriteBatch batch;
+      size_t write_size = 1024 * 1024 * (5 + i);
+      fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n",
+              (write_size / 1024 / 1024), pass);
+      for (;;) {
+        std::string data(3000, j++ % 127 + 20);
+        data += ToString(j);
+        batch.Put(handles_[0], Slice(data), Slice(data));
+        if (batch.GetDataSize() > write_size) {
+          break;
+        }
       }
-
-      Status s = sst_file_writer.Finish();
-      ASSERT_TRUE(s.ok()) << s.ToString();
-    };
-    // Write num_files files in parallel
-    std::vector<std::thread> sst_writer_threads;
-    for (int i = 0; i < num_files; ++i) {
-      sst_writer_threads.emplace_back(write_file_func);
+      fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n",
+              (batch.GetDataSize() / 1024 / 1024));
+      ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
+      fprintf(stderr, "done\n");
     }
+  }
+  // make sure we can re-open it.
+  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
+}
 
-    for (auto& t : sst_writer_threads) {
-      t.join();
+// Make sure that Flushes can proceed in parallel with CompactRange()
+TEST_F(DBTest, FlushesInParallelWithCompactRange) {
+  // iter == 0 -- leveled
+  // iter == 1 -- leveled, but throw in a flush between two levels compacting
+  // iter == 2 -- universal
+  for (int iter = 0; iter < 3; ++iter) {
+    Options options = CurrentOptions();
+    if (iter < 2) {
+      options.compaction_style = kCompactionStyleLevel;
+    } else {
+      options.compaction_style = kCompactionStyleUniversal;
     }
+    options.write_buffer_size = 110 << 10;
+    options.level0_file_num_compaction_trigger = 4;
+    options.num_levels = 4;
+    options.compression = kNoCompression;
+    options.max_bytes_for_level_base = 450 << 10;
+    options.target_file_size_base = 98 << 10;
+    options.max_write_buffer_number = 2;
 
-    fprintf(stderr, "Wrote %d files (%d keys)\n", num_files,
-            num_files * keys_per_file);
-
-    thread_num.store(0);
-    std::atomic<int> files_added(0);
-    std::function<void()> load_file_func = [&]() {
-      // We intentionally add every file twice, and assert that it was added
-      // only once and the other add failed
-      int thread_id = thread_num.fetch_add(1);
-      int file_idx = thread_id / 2;
-      // sometimes we use copy, sometimes link .. the result should be the same
-      bool move_file = (thread_id % 3 == 0);
-
-      Status s = db_->AddFile(file_names[file_idx], move_file);
-      if (s.ok()) {
-        files_added++;
-      }
-    };
-    // Bulk load num_files files in parallel
-    std::vector<std::thread> add_file_threads;
     DestroyAndReopen(options);
-    for (int i = 0; i < num_files * 2; ++i) {
-      add_file_threads.emplace_back(load_file_func);
-    }
-
-    for (auto& t : add_file_threads) {
-      t.join();
-    }
-    ASSERT_EQ(files_added.load(), num_files);
-    fprintf(stderr, "Loaded %d files (%d keys)\n", num_files,
-            num_files * keys_per_file);
-
-    // Overwrite values of keys divisible by 100
-    for (int k = 0; k < num_files * keys_per_file; k += 100) {
-      std::string key = Key(k);
-      Status s = Put(key, key + "_new");
-      ASSERT_TRUE(s.ok());
-    }
 
-    for (int i = 0; i < 2; i++) {
-      // Make sure the values are correct before and after flush/compaction
-      for (int k = 0; k < num_files * keys_per_file; ++k) {
-        std::string key = Key(k);
-        std::string value = (k % 100 == 0) ? (key + "_new") : key;
-        ASSERT_EQ(Get(key), value);
-      }
-      ASSERT_OK(Flush());
-      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+    Random rnd(301);
+    for (int num = 0; num < 14; num++) {
+      GenerateNewRandomFile(&rnd);
     }
 
-    fprintf(stderr, "Verified %d values\n", num_files * keys_per_file);
-  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
-                         kSkipFIFOCompaction));
-}
-
-TEST_F(DBTest, AddExternalSstFileOverlappingRanges) {
-  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
-  Random rnd(301);
-  do {
-    env_->CreateDir(sst_files_folder);
-    Options options = CurrentOptions();
-    DestroyAndReopen(options);
-    const ImmutableCFOptions ioptions(options);
-    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
-
-    printf("Option config = %d\n", option_config_);
-    std::vector<std::pair<int, int>> key_ranges;
-    for (int i = 0; i < 500; i++) {
-      int range_start = rnd.Uniform(20000);
-      int keys_per_range = 10 + rnd.Uniform(41);
-
-      key_ranges.emplace_back(range_start, range_start + keys_per_range);
+    if (iter == 1) {
+      rocksdb::SyncPoint::GetInstance()->LoadDependency(
+          {{"DBImpl::RunManualCompaction()::1",
+            "DBTest::FlushesInParallelWithCompactRange:1"},
+           {"DBTest::FlushesInParallelWithCompactRange:2",
+            "DBImpl::RunManualCompaction()::2"}});
+    } else {
+      rocksdb::SyncPoint::GetInstance()->LoadDependency(
+          {{"CompactionJob::Run():Start",
+            "DBTest::FlushesInParallelWithCompactRange:1"},
+           {"DBTest::FlushesInParallelWithCompactRange:2",
+            "CompactionJob::Run():End"}});
     }
+    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-    int memtable_add = 0;
-    int success_add_file = 0;
-    int failed_add_file = 0;
-    std::map<std::string, std::string> true_data;
-    for (size_t i = 0; i < key_ranges.size(); i++) {
-      int range_start = key_ranges[i].first;
-      int range_end = key_ranges[i].second;
-
-      Status s;
-      std::string range_val = "range_" + ToString(i);
-
-      // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile
-      if (i && i % 5 == 0) {
-        // Use DB::Put to insert range (insert into memtable)
-        range_val += "_put";
-        for (int k = range_start; k <= range_end; k++) {
-          s = Put(Key(k), range_val);
-          ASSERT_OK(s);
-        }
-        memtable_add++;
-      } else {
-        // Use DB::AddFile to insert range
-        range_val += "_add_file";
-
-        // Generate the file containing the range
-        std::string file_name = sst_files_folder + env_->GenerateUniqueId();
-        ASSERT_OK(sst_file_writer.Open(file_name));
-        for (int k = range_start; k <= range_end; k++) {
-          s = sst_file_writer.Add(Key(k), range_val);
-          ASSERT_OK(s);
-        }
-        ExternalSstFileInfo file_info;
-        s = sst_file_writer.Finish(&file_info);
-        ASSERT_OK(s);
-
-        // Insert the generated file
-        s = db_->AddFile(&file_info);
-
-        auto it = true_data.lower_bound(Key(range_start));
-        if (it != true_data.end() && it->first <= Key(range_end)) {
-          // This range overlap with data already exist in DB
-          ASSERT_NOK(s);
-          failed_add_file++;
-        } else {
-          ASSERT_OK(s);
-          success_add_file++;
-        }
-      }
-
-      if (s.ok()) {
-        // Update true_data map to include the new inserted data
-        for (int k = range_start; k <= range_end; k++) {
-          true_data[Key(k)] = range_val;
-        }
-      }
+    std::vector<std::thread> threads;
+    threads.emplace_back([&]() { Compact("a", "z"); });
 
-      // Flush / Compact the DB
-      if (i && i % 50 == 0) {
-        Flush();
-      }
-      if (i && i % 75 == 0) {
-        db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-      }
+    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
+
+    // this has to start a flush. if flushes are blocked, this will try to
+    // create
+    // 3 memtables, and that will fail because max_write_buffer_number is 2
+    for (int num = 0; num < 3; num++) {
+      GenerateNewRandomFile(&rnd, /* nowait */ true);
     }
 
-    printf(
-        "Total: %zu ranges\n"
-        "AddFile()|Success: %d ranges\n"
-        "AddFile()|RangeConflict: %d ranges\n"
-        "Put(): %d ranges\n",
-        key_ranges.size(), success_add_file, failed_add_file, memtable_add);
+    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
 
-    // Verify the correctness of the data
-    for (const auto& kv : true_data) {
-      ASSERT_EQ(Get(kv.first), kv.second);
+    for (auto& t : threads) {
+      t.join();
     }
-    printf("keys/values verified\n");
-  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
-                         kSkipFIFOCompaction));
+    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  }
 }
 
-#endif  // ROCKSDB_LITE
+TEST_F(DBTest, DelayedWriteRate) {
+  const int kEntriesPerMemTable = 100;
+  const int kTotalFlushes = 20;
 
-// 1 Create some SST files by inserting K-V pairs into DB
-// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file
-// 3 Open DB and check if all key can be read
-TEST_F(DBTest, SSTsWithLdbSuffixHandling) {
   Options options = CurrentOptions();
-  options.write_buffer_size = 110 << 10;  // 110KB
-  options.num_levels = 4;
-  DestroyAndReopen(options);
+  env_->SetBackgroundThreads(1, Env::LOW);
+  options.env = env_;
+  env_->no_sleep_ = true;
+  options.write_buffer_size = 100000000;
+  options.max_write_buffer_number = 256;
+  options.max_background_compactions = 1;
+  options.level0_file_num_compaction_trigger = 3;
+  options.level0_slowdown_writes_trigger = 3;
+  options.level0_stop_writes_trigger = 999999;
+  options.delayed_write_rate = 20000000;  // Start with 200MB/s
+  options.memtable_factory.reset(
+      new SpecialSkipListFactory(kEntriesPerMemTable));
 
-  Random rnd(301);
-  int key_id = 0;
-  for (int i = 0; i < 10; ++i) {
-    GenerateNewFile(&rnd, &key_id, false);
+  CreateAndReopenWithCF({"pikachu"}, options);
+
+  // Block compactions
+  test::SleepingBackgroundTask sleeping_task_low;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+
+  for (int i = 0; i < 3; i++) {
+    Put(Key(i), std::string(10000, 'x'));
+    Flush();
   }
-  Flush();
-  Close();
-  int const num_files = GetSstFileCount(dbname_);
-  ASSERT_GT(num_files, 0);
 
-  std::vector<std::string> filenames;
-  GetSstFiles(dbname_, &filenames);
-  int num_ldb_files = 0;
-  for (size_t i = 0; i < filenames.size(); ++i) {
-    if (i & 1) {
-      continue;
+  // These writes will be slowed down to 1KB/s
+  uint64_t estimated_sleep_time = 0;
+  Random rnd(301);
+  Put("", "");
+  uint64_t cur_rate = options.delayed_write_rate;
+  for (int i = 0; i < kTotalFlushes; i++) {
+    uint64_t size_memtable = 0;
+    for (int j = 0; j < kEntriesPerMemTable; j++) {
+      auto rand_num = rnd.Uniform(20);
+      // Spread the size range to more.
+      size_t entry_size = rand_num * rand_num * rand_num;
+      WriteOptions wo;
+      Put(Key(i), std::string(entry_size, 'x'), wo);
+      size_memtable += entry_size + 18;
+      // Occasionally sleep a while
+      if (rnd.Uniform(20) == 6) {
+        env_->SleepForMicroseconds(2666);
+      }
     }
-    std::string const rdb_name = dbname_ + "/" + filenames[i];
-    std::string const ldb_name = Rocks2LevelTableFileName(rdb_name);
-    ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok());
-    ++num_ldb_files;
+    dbfull()->TEST_WaitForFlushMemTable();
+    estimated_sleep_time += size_memtable * 1000000u / cur_rate;
+    // Slow down twice. One for memtable switch and one for flush finishes.
+    cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) /
+                                     kSlowdownRatio / kSlowdownRatio);
   }
-  ASSERT_GT(num_ldb_files, 0);
-  ASSERT_EQ(num_files, GetSstFileCount(dbname_));
+  // Estimate the total sleep time fall into the rough range.
+  ASSERT_GT(env_->addon_time_.load(),
+            static_cast<int64_t>(estimated_sleep_time / 2));
+  ASSERT_LT(env_->addon_time_.load(),
+            static_cast<int64_t>(estimated_sleep_time * 2));
 
-  Reopen(options);
-  for (int k = 0; k < key_id; ++k) {
-    ASSERT_NE("NOT_FOUND", Get(Key(k)));
-  }
-  Destroy(options);
+  env_->no_sleep_ = false;
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilDone();
 }
 
-TEST_F(DBTest, PinnedDataIteratorRandomized) {
-  enum TestConfig {
-    NORMAL,
-    CLOSE_AND_OPEN,
-    COMPACT_BEFORE_READ,
-    FLUSH_EVERY_1000,
-    MAX
-  };
-
-  // Generate Random data
-  Random rnd(301);
+TEST_F(DBTest, HardLimit) {
+  Options options = CurrentOptions();
+  options.env = env_;
+  env_->SetBackgroundThreads(1, Env::LOW);
+  options.max_write_buffer_number = 256;
+  options.write_buffer_size = 110 << 10;  // 110KB
+  options.arena_block_size = 4 * 1024;
+  options.level0_file_num_compaction_trigger = 4;
+  options.level0_slowdown_writes_trigger = 999999;
+  options.level0_stop_writes_trigger = 999999;
+  options.hard_pending_compaction_bytes_limit = 800 << 10;
+  options.max_bytes_for_level_base = 10000000000u;
+  options.max_background_compactions = 1;
+  options.memtable_factory.reset(
+      new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
 
-  int puts = 100000;
-  int key_pool = static_cast<int>(puts * 0.7);
-  int key_size = 100;
-  int val_size = 1000;
-  int seeks_percentage = 20;   // 20% of keys will be used to test seek()
-  int delete_percentage = 20;  // 20% of keys will be deleted
-  int merge_percentage = 20;   // 20% of keys will be added using Merge()
+  env_->SetBackgroundThreads(1, Env::LOW);
+  test::SleepingBackgroundTask sleeping_task_low;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
 
-  for (int run_config = 0; run_config < TestConfig::MAX; run_config++) {
-    Options options = CurrentOptions();
-    BlockBasedTableOptions table_options;
-    table_options.use_delta_encoding = false;
-    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-    options.merge_operator = MergeOperators::CreatePutOperator();
-    DestroyAndReopen(options);
+  CreateAndReopenWithCF({"pikachu"}, options);
 
-    std::vector<std::string> generated_keys(key_pool);
-    for (int i = 0; i < key_pool; i++) {
-      generated_keys[i] = RandomString(&rnd, key_size);
-    }
+  std::atomic<int> callback_count(0);
+  rocksdb::SyncPoint::GetInstance()->SetCallBack("DBImpl::DelayWrite:Wait",
+                                                 [&](void* arg) {
+                                                   callback_count.fetch_add(1);
+                                                   sleeping_task_low.WakeUp();
+                                                 });
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-    std::map<std::string, std::string> true_data;
-    std::vector<std::string> random_keys;
-    std::vector<std::string> deleted_keys;
-    for (int i = 0; i < puts; i++) {
-      auto& k = generated_keys[rnd.Next() % key_pool];
-      auto v = RandomString(&rnd, val_size);
-
-      // Insert data to true_data map and to DB
-      true_data[k] = v;
-      if (rnd.OneIn(static_cast<int>(100.0 / merge_percentage))) {
-        ASSERT_OK(db_->Merge(WriteOptions(), k, v));
-      } else {
-        ASSERT_OK(Put(k, v));
-      }
+  Random rnd(301);
+  int key_idx = 0;
+  for (int num = 0; num < 5; num++) {
+    GenerateNewFile(&rnd, &key_idx, true);
+    dbfull()->TEST_WaitForFlushMemTable();
+  }
 
-      // Pick random keys to be used to test Seek()
-      if (rnd.OneIn(static_cast<int>(100.0 / seeks_percentage))) {
-        random_keys.push_back(k);
-      }
+  ASSERT_EQ(0, callback_count.load());
 
-      // Delete some random keys
-      if (rnd.OneIn(static_cast<int>(100.0 / delete_percentage))) {
-        deleted_keys.push_back(k);
-        true_data.erase(k);
-        ASSERT_OK(Delete(k));
-      }
+  for (int num = 0; num < 5; num++) {
+    GenerateNewFile(&rnd, &key_idx, true);
+    dbfull()->TEST_WaitForFlushMemTable();
+  }
+  ASSERT_GE(callback_count.load(), 1);
 
-      if (run_config == TestConfig::FLUSH_EVERY_1000) {
-        if (i && i % 1000 == 0) {
-          Flush();
-        }
-      }
-    }
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  sleeping_task_low.WaitUntilDone();
+}
 
-    if (run_config == TestConfig::CLOSE_AND_OPEN) {
-      Close();
-      Reopen(options);
-    } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
-      db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
-    }
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, SoftLimit) {
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.write_buffer_size = 100000;  // Small write buffer
+  options.max_write_buffer_number = 256;
+  options.level0_file_num_compaction_trigger = 1;
+  options.level0_slowdown_writes_trigger = 3;
+  options.level0_stop_writes_trigger = 999999;
+  options.delayed_write_rate = 20000;  // About 200KB/s limited rate
+  options.soft_pending_compaction_bytes_limit = 200000;
+  options.target_file_size_base = 99999999;  // All into one file
+  options.max_bytes_for_level_base = 50000;
+  options.max_bytes_for_level_multiplier = 10;
+  options.max_background_compactions = 1;
+  options.compression = kNoCompression;
 
-    ReadOptions ro;
-    ro.pin_data = true;
-    auto iter = db_->NewIterator(ro);
+  Reopen(options);
+  Put(Key(0), "");
 
-    {
-      // Test Seek to random keys
-      printf("Testing seek on %zu keys\n", random_keys.size());
-      std::vector<Slice> keys_slices;
-      std::vector<std::string> true_keys;
-      for (auto& k : random_keys) {
-        iter->Seek(k);
-        if (!iter->Valid()) {
-          ASSERT_EQ(true_data.lower_bound(k), true_data.end());
-          continue;
-        }
-        std::string prop_value;
-        ASSERT_OK(
-            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-        ASSERT_EQ("1", prop_value);
-        keys_slices.push_back(iter->key());
-        true_keys.push_back(true_data.lower_bound(k)->first);
-      }
+  test::SleepingBackgroundTask sleeping_task_low;
+  // Block compactions
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  sleeping_task_low.WaitUntilSleeping();
 
-      for (size_t i = 0; i < keys_slices.size(); i++) {
-        ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
-      }
-    }
+  // Create 3 L0 files, making score of L0 to be 3.
+  for (int i = 0; i < 3; i++) {
+    Put(Key(i), std::string(5000, 'x'));
+    Put(Key(100 - i), std::string(5000, 'x'));
+    // Flush the file. File size is around 30KB.
+    Flush();
+  }
+  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
 
-    {
-      // Test iterating all data forward
-      printf("Testing iterating forward on all keys\n");
-      std::vector<Slice> all_keys;
-      for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-        std::string prop_value;
-        ASSERT_OK(
-            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-        ASSERT_EQ("1", prop_value);
-        all_keys.push_back(iter->key());
-      }
-      ASSERT_EQ(all_keys.size(), true_data.size());
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilDone();
+  sleeping_task_low.Reset();
+  dbfull()->TEST_WaitForCompact();
 
-      // Verify that all keys slices are valid
-      auto data_iter = true_data.begin();
-      for (size_t i = 0; i < all_keys.size(); i++) {
-        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
-        data_iter++;
-      }
-    }
+  // Now there is one L1 file but doesn't trigger soft_rate_limit
+  // The L1 file size is around 30KB.
+  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
+  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
 
-    {
-      // Test iterating all data backward
-      printf("Testing iterating backward on all keys\n");
-      std::vector<Slice> all_keys;
-      for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
-        std::string prop_value;
-        ASSERT_OK(
-            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-        ASSERT_EQ("1", prop_value);
-        all_keys.push_back(iter->key());
-      }
-      ASSERT_EQ(all_keys.size(), true_data.size());
+  // Only allow one compactin going through.
+  rocksdb::SyncPoint::GetInstance()->SetCallBack(
+      "BackgroundCallCompaction:0", [&](void* arg) {
+        // Schedule a sleeping task.
+        sleeping_task_low.Reset();
+        env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
+                       &sleeping_task_low, Env::Priority::LOW);
+      });
 
-      // Verify that all keys slices are valid (backward)
-      auto data_iter = true_data.rbegin();
-      for (size_t i = 0; i < all_keys.size(); i++) {
-        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
-        data_iter++;
-      }
-    }
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-    delete iter;
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
+                 Env::Priority::LOW);
+  sleeping_task_low.WaitUntilSleeping();
+  // Create 3 L0 files, making score of L0 to be 3
+  for (int i = 0; i < 3; i++) {
+    Put(Key(10 + i), std::string(5000, 'x'));
+    Put(Key(90 - i), std::string(5000, 'x'));
+    // Flush the file. File size is around 30KB.
+    Flush();
   }
-}
 
-#ifndef ROCKSDB_LITE
-TEST_F(DBTest, PinnedDataIteratorMultipleFiles) {
-  Options options = CurrentOptions();
-  BlockBasedTableOptions table_options;
-  table_options.use_delta_encoding = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.disable_auto_compactions = true;
-  options.write_buffer_size = 1024 * 1024 * 10;  // 10 Mb
-  DestroyAndReopen(options);
+  // Wake up sleep task to enable compaction to run and waits
+  // for it to go to sleep state again to make sure one compaction
+  // goes through.
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilSleeping();
 
-  std::map<std::string, std::string> true_data;
+  // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
+  // Given level multiplier 10, estimated pending compaction is around 100KB
+  // doesn't trigger soft_pending_compaction_bytes_limit
+  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
+  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
 
-  // Generate 4 sst files in L2
-  Random rnd(301);
-  for (int i = 1; i <= 1000; i++) {
-    std::string k = Key(i * 3);
-    std::string v = RandomString(&rnd, 100);
-    ASSERT_OK(Put(k, v));
-    true_data[k] = v;
-    if (i % 250 == 0) {
-      ASSERT_OK(Flush());
-    }
-  }
-  ASSERT_EQ(FilesPerLevel(0), "4");
-  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
-  ASSERT_EQ(FilesPerLevel(0), "0,4");
-
-  // Generate 4 sst files in L0
-  for (int i = 1; i <= 1000; i++) {
-    std::string k = Key(i * 2);
-    std::string v = RandomString(&rnd, 100);
-    ASSERT_OK(Put(k, v));
-    true_data[k] = v;
-    if (i % 250 == 0) {
-      ASSERT_OK(Flush());
-    }
+  // Create 3 L0 files, making score of L0 to be 3, higher than L0.
+  for (int i = 0; i < 3; i++) {
+    Put(Key(20 + i), std::string(5000, 'x'));
+    Put(Key(80 - i), std::string(5000, 'x'));
+    // Flush the file. File size is around 30KB.
+    Flush();
   }
-  ASSERT_EQ(FilesPerLevel(0), "4,4");
+  // Wake up sleep task to enable compaction to run and waits
+  // for it to go to sleep state again to make sure one compaction
+  // goes through.
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilSleeping();
+
+  // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
+  // Given level multiplier 10, estimated pending compaction is around 400KB
+  // triggerring soft_pending_compaction_bytes_limit
+  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
+  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
 
-  // Add some keys/values in memtables
-  for (int i = 1; i <= 1000; i++) {
-    std::string k = Key(i);
-    std::string v = RandomString(&rnd, 100);
-    ASSERT_OK(Put(k, v));
-    true_data[k] = v;
-  }
-  ASSERT_EQ(FilesPerLevel(0), "4,4");
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilSleeping();
 
-  ReadOptions ro;
-  ro.pin_data = true;
-  auto iter = db_->NewIterator(ro);
+  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
 
-  std::vector<std::pair<Slice, std::string>> results;
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    std::string prop_value;
-    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-    ASSERT_EQ("1", prop_value);
-    results.emplace_back(iter->key(), iter->value().ToString());
-  }
+  // shrink level base so L2 will hit soft limit easier.
+  ASSERT_OK(dbfull()->SetOptions({
+      {"max_bytes_for_level_base", "5000"},
+  }));
 
-  ASSERT_EQ(results.size(), true_data.size());
-  auto data_iter = true_data.begin();
-  for (size_t i = 0; i < results.size(); i++, data_iter++) {
-    auto& kv = results[i];
-    ASSERT_EQ(kv.first, data_iter->first);
-    ASSERT_EQ(kv.second, data_iter->second);
-  }
+  Put("", "");
+  Flush();
+  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
 
-  delete iter;
+  sleeping_task_low.WaitUntilSleeping();
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  sleeping_task_low.WakeUp();
+  sleeping_task_low.WaitUntilDone();
 }
-#endif
 
-TEST_F(DBTest, PinnedDataIteratorMergeOperator) {
+TEST_F(DBTest, LastWriteBufferDelay) {
   Options options = CurrentOptions();
-  BlockBasedTableOptions table_options;
-  table_options.use_delta_encoding = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.merge_operator = MergeOperators::CreateUInt64AddOperator();
-  DestroyAndReopen(options);
-
-  std::string numbers[7];
-  for (int val = 0; val <= 6; val++) {
-    PutFixed64(numbers + val, val);
-  }
-
-  // +1 all keys in range [ 0 => 999]
-  for (int i = 0; i < 1000; i++) {
-    WriteOptions wo;
-    ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
-  }
+  options.env = env_;
+  options.write_buffer_size = 100000;
+  options.max_write_buffer_number = 4;
+  options.delayed_write_rate = 20000;
+  options.compression = kNoCompression;
+  options.disable_auto_compactions = true;
+  int kNumKeysPerMemtable = 3;
+  options.memtable_factory.reset(
+      new SpecialSkipListFactory(kNumKeysPerMemtable));
 
-  // +2 all keys divisible by 2 in range [ 0 => 999]
-  for (int i = 0; i < 1000; i += 2) {
-    WriteOptions wo;
-    ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
-  }
+  Reopen(options);
+  test::SleepingBackgroundTask sleeping_task;
+  // Block flushes
+  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
+                 Env::Priority::HIGH);
+  sleeping_task.WaitUntilSleeping();
 
-  // +3 all keys divisible by 5 in range [ 0 => 999]
-  for (int i = 0; i < 1000; i += 5) {
-    WriteOptions wo;
-    ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
+  // Create 3 L0 files, making score of L0 to be 3.
+  for (int i = 0; i < 3; i++) {
+    // Fill one mem table
+    for (int j = 0; j < kNumKeysPerMemtable; j++) {
+      Put(Key(j), "");
+    }
+    ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
   }
+  // Inserting a new entry would create a new mem table, triggering slow down.
+  Put(Key(0), "");
+  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
 
-  ReadOptions ro;
-  ro.pin_data = true;
-  auto iter = db_->NewIterator(ro);
+  sleeping_task.WakeUp();
+  sleeping_task.WaitUntilDone();
+}
+#endif  // ROCKSDB_LITE
 
-  std::vector<std::pair<Slice, std::string>> results;
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    std::string prop_value;
-    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-    ASSERT_EQ("1", prop_value);
-    results.emplace_back(iter->key(), iter->value().ToString());
-  }
-
-  ASSERT_EQ(results.size(), 1000);
-  for (size_t i = 0; i < results.size(); i++) {
-    auto& kv = results[i];
-    ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
-    int expected_val = 1;
-    if (i % 2 == 0) {
-      expected_val += 2;
-    }
-    if (i % 5 == 0) {
-      expected_val += 3;
+TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
+  CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
+                                    kLZ4Compression, kLZ4HCCompression};
+  for (int iter = 0; iter < 4; ++iter) {
+    if (!CompressionTypeSupported(compressions[iter])) {
+      // not supported, we should fail the Open()
+      Options options = CurrentOptions();
+      options.compression = compressions[iter];
+      ASSERT_TRUE(!TryReopen(options).ok());
+      // Try if CreateColumnFamily also fails
+      options.compression = kNoCompression;
+      ASSERT_OK(TryReopen(options));
+      ColumnFamilyOptions cf_options(options);
+      cf_options.compression = compressions[iter];
+      ColumnFamilyHandle* handle;
+      ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
     }
-    ASSERT_EQ(kv.second, numbers[expected_val]);
   }
-
-  delete iter;
 }
 
-TEST_F(DBTest, PinnedDataIteratorReadAfterUpdate) {
+#ifndef ROCKSDB_LITE
+TEST_F(DBTest, RowCache) {
   Options options = CurrentOptions();
-  BlockBasedTableOptions table_options;
-  table_options.use_delta_encoding = false;
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.write_buffer_size = 100000;
+  options.statistics = rocksdb::CreateDBStatistics();
+  options.row_cache = NewLRUCache(8192);
   DestroyAndReopen(options);
 
-  Random rnd(301);
-
-  std::map<std::string, std::string> true_data;
-  for (int i = 0; i < 1000; i++) {
-    std::string k = RandomString(&rnd, 10);
-    std::string v = RandomString(&rnd, 1000);
-    ASSERT_OK(Put(k, v));
-    true_data[k] = v;
-  }
+  ASSERT_OK(Put("foo", "bar"));
+  ASSERT_OK(Flush());
 
-  ReadOptions ro;
-  ro.pin_data = true;
-  auto iter = db_->NewIterator(ro);
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
+  ASSERT_EQ(Get("foo"), "bar");
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
+  ASSERT_EQ(Get("foo"), "bar");
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
+  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
+}
+#endif  // ROCKSDB_LITE
 
-  // Delete 50% of the keys and update the other 50%
-  for (auto& kv : true_data) {
-    if (rnd.OneIn(2)) {
-      ASSERT_OK(Delete(kv.first));
-    } else {
-      std::string new_val = RandomString(&rnd, 1000);
-      ASSERT_OK(Put(kv.first, new_val));
-    }
-  }
+TEST_F(DBTest, DeletingOldWalAfterDrop) {
+  rocksdb::SyncPoint::GetInstance()->LoadDependency(
+      {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"},
+       {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}});
+  rocksdb::SyncPoint::GetInstance()->ClearTrace();
 
-  std::vector<std::pair<Slice, std::string>> results;
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    std::string prop_value;
-    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
-    ASSERT_EQ("1", prop_value);
-    results.emplace_back(iter->key(), iter->value().ToString());
-  }
+  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
+  Options options = CurrentOptions();
+  options.max_total_wal_size = 8192;
+  options.compression = kNoCompression;
+  options.write_buffer_size = 1 << 20;
+  options.level0_file_num_compaction_trigger = (1 << 30);
+  options.level0_slowdown_writes_trigger = (1 << 30);
+  options.level0_stop_writes_trigger = (1 << 30);
+  options.disable_auto_compactions = true;
+  DestroyAndReopen(options);
+  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
 
-  auto data_iter = true_data.begin();
-  for (size_t i = 0; i < results.size(); i++, data_iter++) {
-    auto& kv = results[i];
-    ASSERT_EQ(kv.first, data_iter->first);
-    ASSERT_EQ(kv.second, data_iter->second);
-  }
+  CreateColumnFamilies({"cf1", "cf2"}, options);
+  ASSERT_OK(Put(0, "key1", DummyString(8192)));
+  ASSERT_OK(Put(0, "key2", DummyString(8192)));
+  // the oldest wal should now be getting_flushed
+  ASSERT_OK(db_->DropColumnFamily(handles_[0]));
+  // all flushes should now do nothing because their CF is dropped
+  TEST_SYNC_POINT("Test:AllowFlushes");
+  TEST_SYNC_POINT("Test:WaitForFlush");
+  uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
+  ASSERT_OK(Put(1, "key3", DummyString(8192)));
+  ASSERT_OK(Put(1, "key4", DummyString(8192)));
+  // new wal should have been created
+  uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
+  EXPECT_GT(lognum2, lognum1);
+}
 
-  delete iter;
+TEST_F(DBTest, UnsupportedManualSync) {
+  DestroyAndReopen(CurrentOptions());
+  env_->is_wal_sync_thread_safe_.store(false);
+  Status s = db_->SyncWAL();
+  ASSERT_TRUE(s.IsNotSupported());
 }
 
 INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
@@ -10138,326 +5919,8 @@ TEST_F(DBTest, PauseBackgroundWorkTest) {
   ASSERT_EQ(true, done.load());
 }
 
-class SliceTransformLimitedDomain : public SliceTransform {
-  const char* Name() const override { return "SliceTransformLimitedDomain"; }
-
-  Slice Transform(const Slice& src) const override {
-    return Slice(src.data(), 5);
-  }
-
-  bool InDomain(const Slice& src) const override {
-    // prefix will be x????
-    return src.size() >= 5 && src[0] == 'x';
-  }
-
-  bool InRange(const Slice& dst) const override {
-    // prefix will be x????
-    return dst.size() == 5 && dst[0] == 'x';
-  }
-};
-
-TEST_F(DBTest, PrefixExtractorFullFilter) {
-  BlockBasedTableOptions bbto;
-  // Full Filter Block
-  bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
-  bbto.whole_key_filtering = false;
-
-  Options options = CurrentOptions();
-  options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  DestroyAndReopen(options);
-
-  ASSERT_OK(Put("x1111_AAAA", "val1"));
-  ASSERT_OK(Put("x1112_AAAA", "val2"));
-  ASSERT_OK(Put("x1113_AAAA", "val3"));
-  ASSERT_OK(Put("x1114_AAAA", "val4"));
-  // Not in domain, wont be added to filter
-  ASSERT_OK(Put("zzzzz_AAAA", "val5"));
-
-  ASSERT_OK(Flush());
-
-  ASSERT_EQ(Get("x1111_AAAA"), "val1");
-  ASSERT_EQ(Get("x1112_AAAA"), "val2");
-  ASSERT_EQ(Get("x1113_AAAA"), "val3");
-  ASSERT_EQ(Get("x1114_AAAA"), "val4");
-  // Was not added to filter but rocksdb will try to read it from the filter
-  ASSERT_EQ(Get("zzzzz_AAAA"), "val5");
-}
-
-TEST_F(DBTest, PrefixExtractorBlockFilter) {
-  BlockBasedTableOptions bbto;
-  // Block Filter Block
-  bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
-
-  Options options = CurrentOptions();
-  options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
-  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
-
-  DestroyAndReopen(options);
-
-  ASSERT_OK(Put("x1113_AAAA", "val3"));
-  ASSERT_OK(Put("x1114_AAAA", "val4"));
-  // Not in domain, wont be added to filter
-  ASSERT_OK(Put("zzzzz_AAAA", "val1"));
-  ASSERT_OK(Put("zzzzz_AAAB", "val2"));
-  ASSERT_OK(Put("zzzzz_AAAC", "val3"));
-  ASSERT_OK(Put("zzzzz_AAAD", "val4"));
-
-  ASSERT_OK(Flush());
-
-  std::vector<std::string> iter_res;
-  auto iter = db_->NewIterator(ReadOptions());
-  // Seek to a key that was not in Domain
-  for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) {
-    iter_res.emplace_back(iter->value().ToString());
-  }
-
-  std::vector<std::string> expected_res = {"val1", "val2", "val3", "val4"};
-  ASSERT_EQ(iter_res, expected_res);
-  delete iter;
-}
-
-TEST_F(DBTest, IteratorWithLocalStatistics) {
-  Options options = CurrentOptions();
-  options.statistics = rocksdb::CreateDBStatistics();
-  DestroyAndReopen(options);
-
-  Random rnd(301);
-  for (int i = 0; i < 1000; i++) {
-    // Key 10 bytes / Value 10 bytes
-    ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
-  }
-
-  std::atomic<uint64_t> total_next(0);
-  std::atomic<uint64_t> total_next_found(0);
-  std::atomic<uint64_t> total_prev(0);
-  std::atomic<uint64_t> total_prev_found(0);
-  std::atomic<uint64_t> total_bytes(0);
-
-  std::vector<std::thread> threads;
-  std::function<void()> reader_func_next = [&]() {
-    Iterator* iter = db_->NewIterator(ReadOptions());
-
-    iter->SeekToFirst();
-    // Seek will bump ITER_BYTES_READ
-    total_bytes += iter->key().size();
-    total_bytes += iter->value().size();
-    while (true) {
-      iter->Next();
-      total_next++;
-
-      if (!iter->Valid()) {
-        break;
-      }
-      total_next_found++;
-      total_bytes += iter->key().size();
-      total_bytes += iter->value().size();
-    }
-
-    delete iter;
-  };
-
-  std::function<void()> reader_func_prev = [&]() {
-    Iterator* iter = db_->NewIterator(ReadOptions());
-
-    iter->SeekToLast();
-    // Seek will bump ITER_BYTES_READ
-    total_bytes += iter->key().size();
-    total_bytes += iter->value().size();
-    while (true) {
-      iter->Prev();
-      total_prev++;
-
-      if (!iter->Valid()) {
-        break;
-      }
-      total_prev_found++;
-      total_bytes += iter->key().size();
-      total_bytes += iter->value().size();
-    }
-
-    delete iter;
-  };
-
-  for (int i = 0; i < 10; i++) {
-    threads.emplace_back(reader_func_next);
-  }
-  for (int i = 0; i < 15; i++) {
-    threads.emplace_back(reader_func_prev);
-  }
-
-  for (auto& t : threads) {
-    t.join();
-  }
-
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next);
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
-            total_next_found);
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev);
-  ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
-            total_prev_found);
-  ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes);
-}
-
-#ifndef ROCKSDB_LITE
-class BloomStatsTestWithParam
-    : public DBTest,
-      public testing::WithParamInterface<std::tuple<bool, bool>> {
- public:
-  BloomStatsTestWithParam() {
-    use_block_table_ = std::get<0>(GetParam());
-    use_block_based_builder_ = std::get<1>(GetParam());
-
-    options_.create_if_missing = true;
-    options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
-    options_.memtable_prefix_bloom_bits = 8 * 1024;
-    if (use_block_table_) {
-      BlockBasedTableOptions table_options;
-      table_options.hash_index_allow_collision = false;
-      table_options.filter_policy.reset(
-          NewBloomFilterPolicy(10, use_block_based_builder_));
-      options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
-    } else {
-      PlainTableOptions table_options;
-      options_.table_factory.reset(NewPlainTableFactory(table_options));
-    }
-
-    perf_context.Reset();
-    DestroyAndReopen(options_);
-  }
-
-  ~BloomStatsTestWithParam() {
-    perf_context.Reset();
-    Destroy(options_);
-  }
-
-  // Required if inheriting from testing::WithParamInterface<>
-  static void SetUpTestCase() {}
-  static void TearDownTestCase() {}
-
-  bool use_block_table_;
-  bool use_block_based_builder_;
-  Options options_;
-};
-
-// 1 Insert 2 K-V pairs into DB
-// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2
-// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1
-// 4 Call Flush() to create SST
-// 5 Call Get() for both keys - expext SST bloom hit stat to be 2
-// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1
-// Test both: block and plain SST
-TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
-  std::string key1("AAAA");
-  std::string key2("RXDB");  // not in DB
-  std::string key3("ZBRA");
-  std::string value1("Value1");
-  std::string value3("Value3");
-
-  ASSERT_OK(Put(key1, value1, WriteOptions()));
-  ASSERT_OK(Put(key3, value3, WriteOptions()));
-
-  // check memtable bloom stats
-  ASSERT_EQ(value1, Get(key1));
-  ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
-  ASSERT_EQ(value3, Get(key3));
-  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
-  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
-
-  ASSERT_EQ("NOT_FOUND", Get(key2));
-  ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
-  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
-
-  // sanity checks
-  ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
-  ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
-
-  Flush();
-
-  // sanity checks
-  ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
-  ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
-
-  // check SST bloom stats
-  // NOTE: hits per get differs because of code paths differences
-  // in BlockBasedTable::Get()
-  int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1;
-  ASSERT_EQ(value1, Get(key1));
-  ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count);
-  ASSERT_EQ(value3, Get(key3));
-  ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count);
-
-  ASSERT_EQ("NOT_FOUND", Get(key2));
-  ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
-}
-
-// Same scenario as in BloomStatsTest but using an iterator
-TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
-  std::string key1("AAAA");
-  std::string key2("RXDB");  // not in DB
-  std::string key3("ZBRA");
-  std::string value1("Value1");
-  std::string value3("Value3");
-
-  ASSERT_OK(Put(key1, value1, WriteOptions()));
-  ASSERT_OK(Put(key3, value3, WriteOptions()));
-
-  unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
-
-  // check memtable bloom stats
-  iter->Seek(key1);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(iter->Valid());
-  ASSERT_EQ(value1, iter->value().ToString());
-  ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
-  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
-
-  iter->Seek(key3);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(iter->Valid());
-  ASSERT_EQ(value3, iter->value().ToString());
-  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
-  ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
-
-  iter->Seek(key2);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(!iter->Valid());
-  ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
-  ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
-
-  Flush();
-
-  iter.reset(dbfull()->NewIterator(ReadOptions()));
-
-  // Check SST bloom stats
-  iter->Seek(key1);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(iter->Valid());
-  ASSERT_EQ(value1, iter->value().ToString());
-  ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
-
-  iter->Seek(key3);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(iter->Valid());
-  ASSERT_EQ(value3, iter->value().ToString());
-  ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
-
-  iter->Seek(key2);
-  ASSERT_OK(iter->status());
-  ASSERT_TRUE(!iter->Valid());
-  ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
-  ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
-}
-
-INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
-                        ::testing::Values(std::make_tuple(true, true),
-                                          std::make_tuple(true, false),
-                                          std::make_tuple(false, false)));
-#endif  // ROCKSDB_LITE
 }  // namespace rocksdb
 
-
 int main(int argc, char** argv) {
   rocksdb::port::InstallStackTraceHandler();
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/db/db_test2.cc b/db/db_test2.cc
index 7c02a6ffe..32487b1c4 100644
--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@@ -13,11 +13,6 @@
 
 namespace rocksdb {
 
-static uint64_t TestGetTickerCount(const Options& options,
-                                   Tickers ticker_type) {
-  return options.statistics->getTickerCount(ticker_type);
-}
-
 class DBTest2 : public DBTestBase {
  public:
   DBTest2() : DBTestBase("/db_test2") {}
diff --git a/db/db_test_util.cc b/db/db_test_util.cc
index 1af09e286..7e2adaa78 100644
--- a/db/db_test_util.cc
+++ b/db/db_test_util.cc
@@ -1043,4 +1043,34 @@ std::unordered_map<std::string, uint64_t> DBTestBase::GetAllSSTFiles(
   return res;
 }
 
+std::vector<std::uint64_t> DBTestBase::ListTableFiles(Env* env,
+                                                      const std::string& path) {
+  std::vector<std::string> files;
+  std::vector<uint64_t> file_numbers;
+  env->GetChildren(path, &files);
+  uint64_t number;
+  FileType type;
+  for (size_t i = 0; i < files.size(); ++i) {
+    if (ParseFileName(files[i], &number, &type)) {
+      if (type == kTableFile) {
+        file_numbers.push_back(number);
+      }
+    }
+  }
+  return file_numbers;
+}
+
+#ifndef ROCKSDB_LITE
+uint64_t DBTestBase::GetNumberOfSstFilesForColumnFamily(
+    DB* db, std::string column_family_name) {
+  std::vector<LiveFileMetaData> metadata;
+  db->GetLiveFilesMetaData(&metadata);
+  uint64_t result = 0;
+  for (auto& fileMetadata : metadata) {
+    result += (fileMetadata.column_family_name == column_family_name);
+  }
+  return result;
+}
+#endif  // ROCKSDB_LITE
+
 }  // namespace rocksdb
diff --git a/db/db_test_util.h b/db/db_test_util.h
index ca2b466e9..adda26230 100644
--- a/db/db_test_util.h
+++ b/db/db_test_util.h
@@ -39,6 +39,7 @@
 #include "rocksdb/filter_policy.h"
 #include "rocksdb/options.h"
 #include "rocksdb/slice.h"
+#include "rocksdb/statistics.h"
 #include "rocksdb/table.h"
 #include "rocksdb/utilities/checkpoint.h"
 #include "table/block_based_table_factory.h"
@@ -491,6 +492,31 @@ class SpecialEnv : public EnvWrapper {
   std::atomic<bool> is_wal_sync_thread_safe_{true};
 };
 
+class OnFileDeletionListener : public EventListener {
+ public:
+  OnFileDeletionListener() : matched_count_(0), expected_file_name_("") {}
+
+  void SetExpectedFileName(const std::string file_name) {
+    expected_file_name_ = file_name;
+  }
+
+  void VerifyMatchedCount(size_t expected_value) {
+    ASSERT_EQ(matched_count_, expected_value);
+  }
+
+  void OnTableFileDeleted(const TableFileDeletionInfo& info) override {
+    if (expected_file_name_ != "") {
+      ASSERT_EQ(expected_file_name_, info.file_path);
+      expected_file_name_ = "";
+      matched_count_++;
+    }
+  }
+
+ private:
+  size_t matched_count_;
+  std::string expected_file_name_;
+};
+
 class DBTestBase : public testing::Test {
  protected:
   // Sequence of option configurations to try
@@ -756,6 +782,17 @@ class DBTestBase : public testing::Test {
 
   std::unordered_map<std::string, uint64_t> GetAllSSTFiles(
       uint64_t* total_size = nullptr);
+
+  std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path);
+
+#ifndef ROCKSDB_LITE
+  uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
+                                              std::string column_family_name);
+#endif  // ROCKSDB_LITE
+
+  uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) {
+    return options.statistics->getTickerCount(ticker_type);
+  }
 };
 
 }  // namespace rocksdb
diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc
index ded8ed9b4..868039a5c 100644
--- a/db/db_universal_compaction_test.cc
+++ b/db/db_universal_compaction_test.cc
@@ -14,11 +14,6 @@
 
 namespace rocksdb {
 
-static uint64_t TestGetTickerCount(const Options& options,
-                                   Tickers ticker_type) {
-  return options.statistics->getTickerCount(ticker_type);
-}
-
 static std::string CompressibleString(Random* rnd, int len) {
   std::string r;
   test::CompressibleString(rnd, 0.8, len, &r);
diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc
index 14b9e2ffd..bb944735d 100644
--- a/db/db_wal_test.cc
+++ b/db/db_wal_test.cc
@@ -127,6 +127,634 @@ TEST_F(DBWALTest, SyncWALNotWaitWrite) {
   ASSERT_EQ(Get("foo2"), "bar2");
   rocksdb::SyncPoint::GetInstance()->DisableProcessing();
 }
+
+TEST_F(DBWALTest, Recover) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "foo", "v1"));
+    ASSERT_OK(Put(1, "baz", "v5"));
+
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_EQ("v1", Get(1, "foo"));
+
+    ASSERT_EQ("v1", Get(1, "foo"));
+    ASSERT_EQ("v5", Get(1, "baz"));
+    ASSERT_OK(Put(1, "bar", "v2"));
+    ASSERT_OK(Put(1, "foo", "v3"));
+
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_EQ("v3", Get(1, "foo"));
+    ASSERT_OK(Put(1, "foo", "v4"));
+    ASSERT_EQ("v4", Get(1, "foo"));
+    ASSERT_EQ("v2", Get(1, "bar"));
+    ASSERT_EQ("v5", Get(1, "baz"));
+  } while (ChangeOptions());
+}
+
+TEST_F(DBWALTest, RecoverWithTableHandle) {
+  do {
+    Options options = CurrentOptions();
+    options.create_if_missing = true;
+    options.disable_auto_compactions = true;
+    DestroyAndReopen(options);
+    CreateAndReopenWithCF({"pikachu"}, options);
+
+    ASSERT_OK(Put(1, "foo", "v1"));
+    ASSERT_OK(Put(1, "bar", "v2"));
+    ASSERT_OK(Flush(1));
+    ASSERT_OK(Put(1, "foo", "v3"));
+    ASSERT_OK(Put(1, "bar", "v4"));
+    ASSERT_OK(Flush(1));
+    ASSERT_OK(Put(1, "big", std::string(100, 'a')));
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+
+    std::vector<std::vector<FileMetaData>> files;
+    dbfull()->TEST_GetFilesMetaData(handles_[1], &files);
+    size_t total_files = 0;
+    for (const auto& level : files) {
+      total_files += level.size();
+    }
+    ASSERT_EQ(total_files, 3);
+    for (const auto& level : files) {
+      for (const auto& file : level) {
+        if (kInfiniteMaxOpenFiles == option_config_) {
+          ASSERT_TRUE(file.table_reader_handle != nullptr);
+        } else {
+          ASSERT_TRUE(file.table_reader_handle == nullptr);
+        }
+      }
+    }
+  } while (ChangeOptions());
+}
+
+TEST_F(DBWALTest, IgnoreRecoveredLog) {
+  std::string backup_logs = dbname_ + "/backup_logs";
+
+  // delete old files in backup_logs directory
+  env_->CreateDirIfMissing(backup_logs);
+  std::vector<std::string> old_files;
+  env_->GetChildren(backup_logs, &old_files);
+  for (auto& file : old_files) {
+    if (file != "." && file != "..") {
+      env_->DeleteFile(backup_logs + "/" + file);
+    }
+  }
+
+  do {
+    Options options = CurrentOptions();
+    options.create_if_missing = true;
+    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
+    options.wal_dir = dbname_ + "/logs";
+    DestroyAndReopen(options);
+
+    // fill up the DB
+    std::string one, two;
+    PutFixed64(&one, 1);
+    PutFixed64(&two, 2);
+    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
+    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
+    ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one)));
+
+    // copy the logs to backup
+    std::vector<std::string> logs;
+    env_->GetChildren(options.wal_dir, &logs);
+    for (auto& log : logs) {
+      if (log != ".." && log != ".") {
+        CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log);
+      }
+    }
+
+    // recover the DB
+    Reopen(options);
+    ASSERT_EQ(two, Get("foo"));
+    ASSERT_EQ(one, Get("bar"));
+    Close();
+
+    // copy the logs from backup back to wal dir
+    for (auto& log : logs) {
+      if (log != ".." && log != ".") {
+        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
+      }
+    }
+    // this should ignore the log files, recovery should not happen again
+    // if the recovery happens, the same merge operator would be called twice,
+    // leading to incorrect results
+    Reopen(options);
+    ASSERT_EQ(two, Get("foo"));
+    ASSERT_EQ(one, Get("bar"));
+    Close();
+    Destroy(options);
+    Reopen(options);
+    Close();
+
+    // copy the logs from backup back to wal dir
+    env_->CreateDirIfMissing(options.wal_dir);
+    for (auto& log : logs) {
+      if (log != ".." && log != ".") {
+        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
+      }
+    }
+    // assert that we successfully recovered only from logs, even though we
+    // destroyed the DB
+    Reopen(options);
+    ASSERT_EQ(two, Get("foo"));
+    ASSERT_EQ(one, Get("bar"));
+
+    // Recovery will fail if DB directory doesn't exist.
+    Destroy(options);
+    // copy the logs from backup back to wal dir
+    env_->CreateDirIfMissing(options.wal_dir);
+    for (auto& log : logs) {
+      if (log != ".." && log != ".") {
+        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
+        // we won't be needing this file no more
+        env_->DeleteFile(backup_logs + "/" + log);
+      }
+    }
+    Status s = TryReopen(options);
+    ASSERT_TRUE(!s.ok());
+  } while (ChangeOptions(kSkipHashCuckoo));
+}
+
+TEST_F(DBWALTest, RecoveryWithEmptyLog) {
+  do {
+    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "foo", "v1"));
+    ASSERT_OK(Put(1, "foo", "v2"));
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_OK(Put(1, "foo", "v3"));
+    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
+    ASSERT_EQ("v3", Get(1, "foo"));
+  } while (ChangeOptions());
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBWALTest, RecoverWithLargeLog) {
+  do {
+    {
+      Options options = CurrentOptions();
+      CreateAndReopenWithCF({"pikachu"}, options);
+      ASSERT_OK(Put(1, "big1", std::string(200000, '1')));
+      ASSERT_OK(Put(1, "big2", std::string(200000, '2')));
+      ASSERT_OK(Put(1, "small3", std::string(10, '3')));
+      ASSERT_OK(Put(1, "small4", std::string(10, '4')));
+      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
+    }
+
+    // Make sure that if we re-open with a small write buffer size that
+    // we flush table files in the middle of a large log file.
+    Options options;
+    options.write_buffer_size = 100000;
+    options = CurrentOptions(options);
+    ReopenWithColumnFamilies({"default", "pikachu"}, options);
+    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3);
+    ASSERT_EQ(std::string(200000, '1'), Get(1, "big1"));
+    ASSERT_EQ(std::string(200000, '2'), Get(1, "big2"));
+    ASSERT_EQ(std::string(10, '3'), Get(1, "small3"));
+    ASSERT_EQ(std::string(10, '4'), Get(1, "small4"));
+    ASSERT_GT(NumTableFilesAtLevel(0, 1), 1);
+  } while (ChangeCompactOptions());
+}
+
+// In https://reviews.facebook.net/D20661 we change
+// recovery behavior: previously for each log file each column family
+// memtable was flushed, even it was empty. Now it's changed:
+// we try to create the smallest number of table files by merging
+// updates from multiple logs
+TEST_F(DBWALTest, RecoverCheckFileAmountWithSmallWriteBuffer) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 5000000;
+  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
+
+  // Since we will reopen DB with smaller write_buffer_size,
+  // each key will go to new SST file
+  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
+  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
+  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
+  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
+
+  ASSERT_OK(Put(3, Key(10), DummyString(1)));
+  // Make 'dobrynia' to be flushed and new WAL file to be created
+  ASSERT_OK(Put(2, Key(10), DummyString(7500000)));
+  ASSERT_OK(Put(2, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
+  {
+    auto tables = ListTableFiles(env_, dbname_);
+    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
+    // Make sure 'dobrynia' was flushed: check sst files amount
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+  }
+  // New WAL file
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(3, Key(10), DummyString(1)));
+  ASSERT_OK(Put(3, Key(10), DummyString(1)));
+  ASSERT_OK(Put(3, Key(10), DummyString(1)));
+
+  options.write_buffer_size = 4096;
+  options.arena_block_size = 4096;
+  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
+                           options);
+  {
+    // No inserts => default is empty
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(0));
+    // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(5));
+    // 1 SST for big key + 1 SST for small one
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(2));
+    // 1 SST for all keys
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(1));
+  }
+}
+
+// In https://reviews.facebook.net/D20661 we change
+// recovery behavior: previously for each log file each column family
+// memtable was flushed, even it wasn't empty. Now it's changed:
+// we try to create the smallest number of table files by merging
+// updates from multiple logs
+TEST_F(DBWALTest, RecoverCheckFileAmount) {
+  Options options = CurrentOptions();
+  options.write_buffer_size = 100000;
+  options.arena_block_size = 4 * 1024;
+  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
+
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(2, Key(1), DummyString(1)));
+
+  // Make 'nikitich' memtable to be flushed
+  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
+  ASSERT_OK(Put(3, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  // 4 memtable are not flushed, 1 sst file
+  {
+    auto tables = ListTableFiles(env_, dbname_);
+    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(1));
+  }
+  // Memtable for 'nikitich' has flushed, new WAL file has opened
+  // 4 memtable still not flushed
+
+  // Write to new WAL file
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(2, Key(1), DummyString(1)));
+
+  // Fill up 'nikitich' one more time
+  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
+  // make it flush
+  ASSERT_OK(Put(3, Key(1), DummyString(1)));
+  dbfull()->TEST_WaitForFlushMemTable(handles_[3]);
+  // There are still 4 memtable not flushed, and 2 sst tables
+  ASSERT_OK(Put(0, Key(1), DummyString(1)));
+  ASSERT_OK(Put(1, Key(1), DummyString(1)));
+  ASSERT_OK(Put(2, Key(1), DummyString(1)));
+
+  {
+    auto tables = ListTableFiles(env_, dbname_);
+    ASSERT_EQ(tables.size(), static_cast<size_t>(2));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(2));
+  }
+
+  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
+                           options);
+  {
+    std::vector<uint64_t> table_files = ListTableFiles(env_, dbname_);
+    // Check, that records for 'default', 'dobrynia' and 'pikachu' from
+    // first, second and third WALs  went to the same SST.
+    // So, there is 6 SSTs: three  for 'nikitich', one for 'default', one for
+    // 'dobrynia', one for 'pikachu'
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
+              static_cast<uint64_t>(3));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
+              static_cast<uint64_t>(1));
+    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
+              static_cast<uint64_t>(1));
+  }
+}
+
+TEST_F(DBWALTest, SyncMultipleLogs) {
+  const uint64_t kNumBatches = 2;
+  const int kBatchSize = 1000;
+
+  Options options = CurrentOptions();
+  options.create_if_missing = true;
+  options.write_buffer_size = 4096;
+  Reopen(options);
+
+  WriteBatch batch;
+  WriteOptions wo;
+  wo.sync = true;
+
+  for (uint64_t b = 0; b < kNumBatches; b++) {
+    batch.Clear();
+    for (int i = 0; i < kBatchSize; i++) {
+      batch.Put(Key(i), DummyString(128));
+    }
+
+    dbfull()->Write(wo, &batch);
+  }
+
+  ASSERT_OK(dbfull()->SyncWAL());
+}
+
+//
+// Test WAL recovery for the various modes available
+//
+class RecoveryTestHelper {
+ public:
+  // Number of WAL files to generate
+  static const int kWALFilesCount = 10;
+  // Starting number for the WAL file name like 00010.log
+  static const int kWALFileOffset = 10;
+  // Keys to be written per WAL file
+  static const int kKeysPerWALFile = 1024;
+  // Size of the value
+  static const int kValueSize = 10;
+
+  // Create WAL files with values filled in
+  static void FillData(DBWALTest* test, const Options& options,
+                       const size_t wal_count, size_t* count) {
+    const DBOptions& db_options = options;
+
+    *count = 0;
+
+    shared_ptr<Cache> table_cache = NewLRUCache(50000, 16);
+    EnvOptions env_options;
+    WriteBuffer write_buffer(db_options.db_write_buffer_size);
+
+    unique_ptr<VersionSet> versions;
+    unique_ptr<WalManager> wal_manager;
+    WriteController write_controller;
+
+    versions.reset(new VersionSet(test->dbname_, &db_options, env_options,
+                                  table_cache.get(), &write_buffer,
+                                  &write_controller));
+
+    wal_manager.reset(new WalManager(db_options, env_options));
+
+    std::unique_ptr<log::Writer> current_log_writer;
+
+    for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) {
+      uint64_t current_log_number = j;
+      std::string fname = LogFileName(test->dbname_, current_log_number);
+      unique_ptr<WritableFile> file;
+      ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options));
+      unique_ptr<WritableFileWriter> file_writer(
+          new WritableFileWriter(std::move(file), env_options));
+      current_log_writer.reset(
+          new log::Writer(std::move(file_writer), current_log_number,
+                          db_options.recycle_log_file_num > 0));
+
+      for (int i = 0; i < kKeysPerWALFile; i++) {
+        std::string key = "key" + ToString((*count)++);
+        std::string value = test->DummyString(kValueSize);
+        assert(current_log_writer.get() != nullptr);
+        uint64_t seq = versions->LastSequence() + 1;
+        WriteBatch batch;
+        batch.Put(key, value);
+        WriteBatchInternal::SetSequence(&batch, seq);
+        current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch));
+        versions->SetLastSequence(seq);
+      }
+    }
+  }
+
+  // Recreate and fill the store with some data
+  static size_t FillData(DBWALTest* test, Options* options) {
+    options->create_if_missing = true;
+    test->DestroyAndReopen(*options);
+    test->Close();
+
+    size_t count = 0;
+    FillData(test, *options, kWALFilesCount, &count);
+    return count;
+  }
+
+  // Read back all the keys we wrote and return the number of keys found
+  static size_t GetData(DBWALTest* test) {
+    size_t count = 0;
+    for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) {
+      if (test->Get("key" + ToString(i)) != "NOT_FOUND") {
+        ++count;
+      }
+    }
+    return count;
+  }
+
+  // Manuall corrupt the specified WAL
+  static void CorruptWAL(DBWALTest* test, const Options& options,
+                         const double off, const double len,
+                         const int wal_file_id, const bool trunc = false) {
+    Env* env = options.env;
+    std::string fname = LogFileName(test->dbname_, wal_file_id);
+    uint64_t size;
+    ASSERT_OK(env->GetFileSize(fname, &size));
+    ASSERT_GT(size, 0);
+#ifdef OS_WIN
+    // Windows disk cache behaves differently. When we truncate
+    // the original content is still in the cache due to the original
+    // handle is still open. Generally, in Windows, one prohibits
+    // shared access to files and it is not needed for WAL but we allow
+    // it to induce corruption at various tests.
+    test->Close();
+#endif
+    if (trunc) {
+      ASSERT_EQ(0, truncate(fname.c_str(), static_cast<int64_t>(size * off)));
+    } else {
+      InduceCorruption(fname, static_cast<size_t>(size * off),
+                       static_cast<size_t>(size * len));
+    }
+  }
+
+  // Overwrite data with 'a' from offset for length len
+  static void InduceCorruption(const std::string& filename, size_t offset,
+                               size_t len) {
+    ASSERT_GT(len, 0U);
+
+    int fd = open(filename.c_str(), O_RDWR);
+
+    ASSERT_GT(fd, 0);
+    ASSERT_EQ(offset, lseek(fd, offset, SEEK_SET));
+
+    void* buf = alloca(len);
+    memset(buf, 'a', len);
+    ASSERT_EQ(len, write(fd, buf, static_cast<unsigned int>(len)));
+
+    close(fd);
+  }
+};
+
+// Test scope:
+// - We expect to open the data store when there is incomplete trailing writes
+// at the end of any of the logs
+// - We do not expect to open the data store for corruption
+TEST_F(DBWALTest, kTolerateCorruptedTailRecords) {
+  const int jstart = RecoveryTestHelper::kWALFileOffset;
+  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
+
+  for (auto trunc : {true, false}) {        /* Corruption style */
+    for (int i = 0; i < 4; i++) {           /* Corruption offset position */
+      for (int j = jstart; j < jend; j++) { /* WAL file */
+        // Fill data for testing
+        Options options = CurrentOptions();
+        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
+        // test checksum failure or parsing
+        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
+                                       /*len%=*/.1, /*wal=*/j, trunc);
+
+        if (trunc) {
+          options.wal_recovery_mode =
+              WALRecoveryMode::kTolerateCorruptedTailRecords;
+          options.create_if_missing = false;
+          ASSERT_OK(TryReopen(options));
+          const size_t recovered_row_count = RecoveryTestHelper::GetData(this);
+          ASSERT_TRUE(i == 0 || recovered_row_count > 0);
+          ASSERT_LT(recovered_row_count, row_count);
+        } else {
+          options.wal_recovery_mode =
+              WALRecoveryMode::kTolerateCorruptedTailRecords;
+          ASSERT_NOK(TryReopen(options));
+        }
+      }
+    }
+  }
+}
+
+// Test scope:
+// We don't expect the data store to be opened if there is any corruption
+// (leading, middle or trailing -- incomplete writes or corruption)
+TEST_F(DBWALTest, kAbsoluteConsistency) {
+  const int jstart = RecoveryTestHelper::kWALFileOffset;
+  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
+
+  // Verify clean slate behavior
+  Options options = CurrentOptions();
+  const size_t row_count = RecoveryTestHelper::FillData(this, &options);
+  options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
+  options.create_if_missing = false;
+  ASSERT_OK(TryReopen(options));
+  ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count);
+
+  for (auto trunc : {true, false}) { /* Corruption style */
+    for (int i = 0; i < 4; i++) {    /* Corruption offset position */
+      if (trunc && i == 0) {
+        continue;
+      }
+
+      for (int j = jstart; j < jend; j++) { /* wal files */
+        // fill with new date
+        RecoveryTestHelper::FillData(this, &options);
+        // corrupt the wal
+        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
+                                       /*len%=*/.1, j, trunc);
+        // verify
+        options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
+        options.create_if_missing = false;
+        ASSERT_NOK(TryReopen(options));
+      }
+    }
+  }
+}
+
+// Test scope:
+// - We expect to open data store under all circumstances
+// - We expect only data upto the point where the first error was encountered
+TEST_F(DBWALTest, kPointInTimeRecovery) {
+  const int jstart = RecoveryTestHelper::kWALFileOffset;
+  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
+  const int maxkeys =
+      RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile;
+
+  for (auto trunc : {true, false}) {        /* Corruption style */
+    for (int i = 0; i < 4; i++) {           /* Offset of corruption */
+      for (int j = jstart; j < jend; j++) { /* WAL file */
+        // Fill data for testing
+        Options options = CurrentOptions();
+        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
+
+        // Corrupt the wal
+        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
+                                       /*len%=*/.1, j, trunc);
+
+        // Verify
+        options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
+        options.create_if_missing = false;
+        ASSERT_OK(TryReopen(options));
+
+        // Probe data for invariants
+        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
+        ASSERT_LT(recovered_row_count, row_count);
+
+        bool expect_data = true;
+        for (size_t k = 0; k < maxkeys; ++k) {
+          bool found = Get("key" + ToString(i)) != "NOT_FOUND";
+          if (expect_data && !found) {
+            expect_data = false;
+          }
+          ASSERT_EQ(found, expect_data);
+        }
+
+        const size_t min = RecoveryTestHelper::kKeysPerWALFile *
+                           (j - RecoveryTestHelper::kWALFileOffset);
+        ASSERT_GE(recovered_row_count, min);
+        if (!trunc && i != 0) {
+          const size_t max = RecoveryTestHelper::kKeysPerWALFile *
+                             (j - RecoveryTestHelper::kWALFileOffset + 1);
+          ASSERT_LE(recovered_row_count, max);
+        }
+      }
+    }
+  }
+}
+
+// Test scope:
+// - We expect to open the data store under all scenarios
+// - We expect to have recovered records past the corruption zone
+TEST_F(DBWALTest, kSkipAnyCorruptedRecords) {
+  const int jstart = RecoveryTestHelper::kWALFileOffset;
+  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
+
+  for (auto trunc : {true, false}) {        /* Corruption style */
+    for (int i = 0; i < 4; i++) {           /* Corruption offset */
+      for (int j = jstart; j < jend; j++) { /* wal files */
+        // Fill data for testing
+        Options options = CurrentOptions();
+        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
+
+        // Corrupt the WAL
+        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
+                                       /*len%=*/.1, j, trunc);
+
+        // Verify behavior
+        options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords;
+        options.create_if_missing = false;
+        ASSERT_OK(TryReopen(options));
+
+        // Probe data for invariants
+        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
+        ASSERT_LT(recovered_row_count, row_count);
+
+        if (!trunc) {
+          ASSERT_TRUE(i != 0 || recovered_row_count > 0);
+        }
+      }
+    }
+  }
+}
+
+#endif  // ROCKSDB_LITE
+
 }  // namespace rocksdb
 
 int main(int argc, char** argv) {
diff --git a/src.mk b/src.mk
index 9f3671305..c53627cef 100644
--- a/src.mk
+++ b/src.mk
@@ -198,13 +198,17 @@ TEST_BENCH_SOURCES =                                                    \
   db/dbformat_test.cc                                                   \
   db/db_iter_test.cc                                                    \
   db/db_test.cc                                                         \
+	db/db_block_cache_test.cc																							\
+  db/db_bloom_filter_test.cc                                            \
   db/db_compaction_filter_test.cc                                       \
   db/db_compaction_test.cc                                              \
   db/db_dynamic_level_test.cc                                           \
   db/db_inplace_update_test.cc                                          \
+	db/db_iterator_test.cc																								\
   db/db_log_iter_test.cc                                                \
-  db/db_universal_compaction_test.cc                                    \
+	db/db_sst_test.cc																											\
   db/db_tailing_iter_test.cc                                            \
+  db/db_universal_compaction_test.cc                                    \
   db/db_wal_test.cc                                                     \
   db/db_table_properties_test.cc                                        \
   db/deletefile_test.cc                                                 \