diff --git a/.gitignore b/.gitignore
index a3a70ee31..995046089 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ build_config.mk
 *.jar
 *.*jnilib*
 *.d-e
+*.o-*
 
 ldb
 manifest_dump
diff --git a/HISTORY.md b/HISTORY.md
index 831d3ccb1..ca65e8c18 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -9,6 +9,7 @@
 * Column family support
 
 ### Public API changes
+* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
 
 ## 2.8.0 (04/04/2014)
 
diff --git a/Makefile b/Makefile
index 80b819304..3e62211f4 100644
--- a/Makefile
+++ b/Makefile
@@ -195,7 +195,7 @@ check: $(PROGRAMS) $(TESTS) $(TOOLS)
 ldb_tests: all $(PROGRAMS) $(TESTS) $(TOOLS)
 	python tools/ldb_test.py
 
-crash_test: blackbox_crash_test whitebox_crash_test
+crash_test: whitebox_crash_test blackbox_crash_test
 
 blackbox_crash_test: db_stress
 	python -u tools/db_crashtest.py
diff --git a/db/c.cc b/db/c.cc
index e3a0a29a0..915a3e80e 100644
--- a/db/c.cc
+++ b/db/c.cc
@@ -1230,23 +1230,12 @@ void rocksdb_readoptions_set_fill_cache(
   opt->rep.fill_cache = v;
 }
 
-void rocksdb_readoptions_set_prefix_seek(
-    rocksdb_readoptions_t* opt, unsigned char v) {
-  opt->rep.prefix_seek = v;
-}
-
 void rocksdb_readoptions_set_snapshot(
     rocksdb_readoptions_t* opt,
     const rocksdb_snapshot_t* snap) {
   opt->rep.snapshot = (snap ? snap->rep : nullptr);
 }
 
-void rocksdb_readoptions_set_prefix(
-    rocksdb_readoptions_t* opt, const char* key, size_t keylen) {
-  Slice prefix = Slice(key, keylen);
-  opt->rep.prefix = &prefix;
-}
-
 void rocksdb_readoptions_set_read_tier(
     rocksdb_readoptions_t* opt, int v) {
   opt->rep.read_tier = static_cast<rocksdb::ReadTier>(v);
diff --git a/db/c_test.c b/db/c_test.c
index cd9299bec..8ebce9085 100644
--- a/db/c_test.c
+++ b/db/c_test.c
@@ -461,8 +461,6 @@ int main(int argc, char** argv) {
     rocksdb_put(db, woptions, "bar3", 4, "bar", 3, &err);
     CheckNoError(err);
 
-    rocksdb_readoptions_set_prefix_seek(roptions, 1);
-
     rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions);
     CheckCondition(!rocksdb_iter_valid(iter));
 
diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc
index b7ec66d96..a8700bbbc 100644
--- a/db/compaction_picker.cc
+++ b/db/compaction_picker.cc
@@ -180,7 +180,8 @@ bool CompactionPicker::ExpandWhileOverlapping(Compaction* c) {
   int parent_index = -1;
   if (c->inputs_[0].empty()) {
     Log(options_->info_log,
-        "ExpandWhileOverlapping() failure because zero input files");
+        "[%s] ExpandWhileOverlapping() failure because zero input files",
+        c->column_family_data()->GetName().c_str());
   }
   if (c->inputs_[0].empty() || FilesInCompaction(c->inputs_[0]) ||
       (c->level() != c->output_level() &&
@@ -275,9 +276,10 @@ void CompactionPicker::SetupOtherInputs(Compaction* c) {
       if (expanded1.size() == c->inputs_[1].size() &&
           !FilesInCompaction(expanded1)) {
         Log(options_->info_log,
-            "Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu bytes)"
-            "\n",
-            (unsigned long)level, (unsigned long)(c->inputs_[0].size()),
+            "[%s] Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu "
+            "bytes)\n",
+            c->column_family_data()->GetName().c_str(), (unsigned long)level,
+            (unsigned long)(c->inputs_[0].size()),
             (unsigned long)(c->inputs_[1].size()), (unsigned long)inputs0_size,
             (unsigned long)inputs1_size, (unsigned long)(expanded0.size()),
             (unsigned long)(expanded1.size()), (unsigned long)expanded0_size,
@@ -345,7 +347,9 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level,
   c->inputs_[0] = inputs;
   if (ExpandWhileOverlapping(c) == false) {
     delete c;
-    Log(options_->info_log, "Could not compact due to expansion failure.\n");
+    Log(options_->info_log,
+        "[%s] Could not compact due to expansion failure.\n",
+        version->cfd_->GetName().c_str());
     return nullptr;
   }
 
@@ -515,10 +519,6 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version,
       nextIndex = i;
     }
 
-    //if (i > Version::number_of_files_to_sort_) {
-    //  Log(options_->info_log, "XXX Looking at index %d", i);
-    //}
-
     // Do not pick this file if its parents at level+1 are being compacted.
     // Maybe we can avoid redoing this work in SetupOtherInputs
     int parent_index = -1;
@@ -553,19 +553,21 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
 
   if ((version->files_[level].size() <
        (unsigned int)options_->level0_file_num_compaction_trigger)) {
-    LogToBuffer(log_buffer, "Universal: nothing to do\n");
+    LogToBuffer(log_buffer, "[%s] Universal: nothing to do\n",
+                version->cfd_->GetName().c_str());
     return nullptr;
   }
   Version::FileSummaryStorage tmp;
-  LogToBuffer(log_buffer, "Universal: candidate files(%zu): %s\n",
-              version->files_[level].size(),
+  LogToBuffer(log_buffer, "[%s] Universal: candidate files(%zu): %s\n",
+              version->cfd_->GetName().c_str(), version->files_[level].size(),
               version->LevelFileSummary(&tmp, 0));
 
   // Check for size amplification first.
   Compaction* c;
   if ((c = PickCompactionUniversalSizeAmp(version, score, log_buffer)) !=
       nullptr) {
-    LogToBuffer(log_buffer, "Universal: compacting for size amp\n");
+    LogToBuffer(log_buffer, "[%s] Universal: compacting for size amp\n",
+                version->cfd_->GetName().c_str());
   } else {
     // Size amplification is within limits. Try reducing read
     // amplification while maintaining file size ratios.
@@ -573,7 +575,8 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
 
     if ((c = PickCompactionUniversalReadAmp(version, score, ratio, UINT_MAX,
                                             log_buffer)) != nullptr) {
-      LogToBuffer(log_buffer, "Universal: compacting for size ratio\n");
+      LogToBuffer(log_buffer, "[%s] Universal: compacting for size ratio\n",
+                  version->cfd_->GetName().c_str());
     } else {
       // Size amplification and file size ratios are within configured limits.
       // If max read amplification is exceeding configured limits, then force
@@ -583,7 +586,8 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
                                options_->level0_file_num_compaction_trigger;
       if ((c = PickCompactionUniversalReadAmp(
                version, score, UINT_MAX, num_files, log_buffer)) != nullptr) {
-        LogToBuffer(log_buffer, "Universal: compacting for file num\n");
+        LogToBuffer(log_buffer, "[%s] Universal: compacting for file num\n",
+                    version->cfd_->GetName().c_str());
       }
     }
   }
@@ -671,9 +675,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
         candidate_count = 1;
         break;
       }
-      LogToBuffer(log_buffer,
-                  "Universal: file %lu[%d] being compacted, skipping",
-                  (unsigned long)f->number, loop);
+      LogToBuffer(
+          log_buffer, "[%s] Universal: file %lu[%d] being compacted, skipping",
+          version->cfd_->GetName().c_str(), (unsigned long)f->number, loop);
       f = nullptr;
     }
 
@@ -681,8 +685,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
     // first candidate to be compacted.
     uint64_t candidate_size =  f != nullptr? f->file_size : 0;
     if (f != nullptr) {
-      LogToBuffer(log_buffer, "Universal: Possible candidate file %lu[%d].",
-                  (unsigned long)f->number, loop);
+      LogToBuffer(
+          log_buffer, "[%s] Universal: Possible candidate file %lu[%d].",
+          version->cfd_->GetName().c_str(), (unsigned long)f->number, loop);
     }
 
     // Check if the suceeding files need compaction.
@@ -733,9 +738,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
        int index = file_by_time[i];
        FileMetaData* f = version->files_[level][index];
        LogToBuffer(log_buffer,
-                   "Universal: Skipping file %lu[%d] with size %lu %d\n",
-                   (unsigned long)f->number, i, (unsigned long)f->file_size,
-                   f->being_compacted);
+                   "[%s] Universal: Skipping file %lu[%d] with size %lu %d\n",
+                   version->cfd_->GetName().c_str(), (unsigned long)f->number,
+                   i, (unsigned long)f->file_size, f->being_compacted);
       }
     }
   }
@@ -769,8 +774,10 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
     int index = file_by_time[i];
     FileMetaData* f = c->input_version_->files_[level][index];
     c->inputs_[0].push_back(f);
-    LogToBuffer(log_buffer, "Universal: Picking file %lu[%d] with size %lu\n",
-                (unsigned long)f->number, i, (unsigned long)f->file_size);
+    LogToBuffer(log_buffer,
+                "[%s] Universal: Picking file %lu[%d] with size %lu\n",
+                version->cfd_->GetName().c_str(), (unsigned long)f->number, i,
+                (unsigned long)f->file_size);
   }
   return c;
 }
@@ -806,17 +813,19 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
       start_index = loop;         // Consider this as the first candidate.
       break;
     }
-    LogToBuffer(log_buffer, "Universal: skipping file %lu[%d] compacted %s",
-                (unsigned long)f->number, loop,
-                " cannot be a candidate to reduce size amp.\n");
+    LogToBuffer(log_buffer,
+                "[%s] Universal: skipping file %lu[%d] compacted %s",
+                version->cfd_->GetName().c_str(), (unsigned long)f->number,
+                loop, " cannot be a candidate to reduce size amp.\n");
     f = nullptr;
   }
   if (f == nullptr) {
     return nullptr;             // no candidate files
   }
 
-  LogToBuffer(log_buffer, "Universal: First candidate file %lu[%d] %s",
-              (unsigned long)f->number, start_index, " to reduce size amp.\n");
+  LogToBuffer(log_buffer, "[%s] Universal: First candidate file %lu[%d] %s",
+              version->cfd_->GetName().c_str(), (unsigned long)f->number,
+              start_index, " to reduce size amp.\n");
 
   // keep adding up all the remaining files
   for (unsigned int loop = start_index; loop < file_by_time.size() - 1;
@@ -825,8 +834,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
     f = version->files_[level][index];
     if (f->being_compacted) {
       LogToBuffer(
-          log_buffer, "Universal: Possible candidate file %lu[%d] %s.",
-          (unsigned long)f->number, loop,
+          log_buffer, "[%s] Universal: Possible candidate file %lu[%d] %s.",
+          version->cfd_->GetName().c_str(), (unsigned long)f->number, loop,
           " is already being compacted. No size amp reduction possible.\n");
       return nullptr;
     }
@@ -843,17 +852,18 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
 
   // size amplification = percentage of additional size
   if (candidate_size * 100 < ratio * earliest_file_size) {
-    LogToBuffer(log_buffer,
-                "Universal: size amp not needed. newer-files-total-size %lu "
-                "earliest-file-size %lu",
-                (unsigned long)candidate_size,
-                (unsigned long)earliest_file_size);
+    LogToBuffer(
+        log_buffer,
+        "[%s] Universal: size amp not needed. newer-files-total-size %lu "
+        "earliest-file-size %lu",
+        version->cfd_->GetName().c_str(), (unsigned long)candidate_size,
+        (unsigned long)earliest_file_size);
     return nullptr;
   } else {
     LogToBuffer(log_buffer,
-                "Universal: size amp needed. newer-files-total-size %lu "
+                "[%s] Universal: size amp needed. newer-files-total-size %lu "
                 "earliest-file-size %lu",
-                (unsigned long)candidate_size,
+                version->cfd_->GetName().c_str(), (unsigned long)candidate_size,
                 (unsigned long)earliest_file_size);
   }
   assert(start_index >= 0 && start_index < file_by_time.size() - 1);
@@ -869,8 +879,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
     f = c->input_version_->files_[level][index];
     c->inputs_[0].push_back(f);
     LogToBuffer(log_buffer,
-                "Universal: size amp picking file %lu[%d] with size %lu",
-                (unsigned long)f->number, index, (unsigned long)f->file_size);
+                "[%s] Universal: size amp picking file %lu[%d] with size %lu",
+                version->cfd_->GetName().c_str(), (unsigned long)f->number,
+                index, (unsigned long)f->file_size);
   }
   return c;
 }
diff --git a/db/corruption_test.cc b/db/corruption_test.cc
index 18da2621a..4726e92b9 100644
--- a/db/corruption_test.cc
+++ b/db/corruption_test.cc
@@ -40,7 +40,7 @@ class CorruptionTest {
   CorruptionTest() {
     tiny_cache_ = NewLRUCache(100);
     options_.env = &env_;
-    dbname_ = test::TmpDir() + "/db_test";
+    dbname_ = test::TmpDir() + "/corruption_test";
     DestroyDB(dbname_, options_);
 
     db_ = nullptr;
@@ -127,24 +127,7 @@ class CorruptionTest {
     ASSERT_GE(max_expected, correct);
   }
 
-  void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
-    // Pick file to corrupt
-    std::vector<std::string> filenames;
-    ASSERT_OK(env_.GetChildren(dbname_, &filenames));
-    uint64_t number;
-    FileType type;
-    std::string fname;
-    int picked_number = -1;
-    for (unsigned int i = 0; i < filenames.size(); i++) {
-      if (ParseFileName(filenames[i], &number, &type) &&
-          type == filetype &&
-          int(number) > picked_number) {  // Pick latest file
-        fname = dbname_ + "/" + filenames[i];
-        picked_number = number;
-      }
-    }
-    ASSERT_TRUE(!fname.empty()) << filetype;
-
+  void CorruptFile(const std::string fname, int offset, int bytes_to_corrupt) {
     struct stat sbuf;
     if (stat(fname.c_str(), &sbuf) != 0) {
       const char* msg = strerror(errno);
@@ -177,6 +160,42 @@ class CorruptionTest {
     ASSERT_TRUE(s.ok()) << s.ToString();
   }
 
+  void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
+    // Pick file to corrupt
+    std::vector<std::string> filenames;
+    ASSERT_OK(env_.GetChildren(dbname_, &filenames));
+    uint64_t number;
+    FileType type;
+    std::string fname;
+    int picked_number = -1;
+    for (unsigned int i = 0; i < filenames.size(); i++) {
+      if (ParseFileName(filenames[i], &number, &type) &&
+          type == filetype &&
+          static_cast<int>(number) > picked_number) {  // Pick latest file
+        fname = dbname_ + "/" + filenames[i];
+        picked_number = number;
+      }
+    }
+    ASSERT_TRUE(!fname.empty()) << filetype;
+
+    CorruptFile(fname, offset, bytes_to_corrupt);
+  }
+
+  // corrupts exactly one file at level `level`. if no file found at level,
+  // asserts
+  void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) {
+    std::vector<LiveFileMetaData> metadata;
+    db_->GetLiveFilesMetaData(&metadata);
+    for (const auto& m : metadata) {
+      if (m.level == level) {
+        CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt);
+        return;
+      }
+    }
+    ASSERT_TRUE(false) << "no file found at level";
+  }
+
+
   int Property(const std::string& name) {
     std::string property;
     int result;
@@ -331,19 +350,23 @@ TEST(CorruptionTest, CompactionInputErrorParanoid) {
   Reopen(&options);
   DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
 
-  // Fill levels >= 1 so memtable compaction outputs to level 1
+  // Fill levels >= 1 so memtable flush outputs to level 0
   for (int level = 1; level < dbi->NumberLevels(); level++) {
     dbi->Put(WriteOptions(), "", "begin");
     dbi->Put(WriteOptions(), "~", "end");
     dbi->TEST_FlushMemTable();
   }
 
+  options.max_mem_compaction_level = 0;
+  Reopen(&options);
+
+  dbi = reinterpret_cast<DBImpl*>(db_);
   Build(10);
   dbi->TEST_FlushMemTable();
   dbi->TEST_WaitForCompact();
   ASSERT_EQ(1, Property("rocksdb.num-files-at-level0"));
 
-  Corrupt(kTableFile, 100, 1);
+  CorruptTableFileAtLevel(0, 100, 1);
   Check(9, 9);
 
   // Write must eventually fail because of corrupted table
diff --git a/db/db_bench.cc b/db/db_bench.cc
index ecf40b943..345821596 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -28,11 +28,11 @@
 #include "rocksdb/statistics.h"
 #include "rocksdb/perf_context.h"
 #include "port/port.h"
+#include "port/stack_trace.h"
 #include "util/crc32c.h"
 #include "util/histogram.h"
 #include "util/mutexlock.h"
 #include "util/random.h"
-#include "util/stack_trace.h"
 #include "util/string_util.h"
 #include "util/statistics.h"
 #include "util/testutil.h"
@@ -1944,7 +1944,6 @@ class Benchmark {
   void IteratorCreation(ThreadState* thread) {
     Duration duration(FLAGS_duration, reads_);
     ReadOptions options(FLAGS_verify_checksum, true);
-    options.prefix_seek = (FLAGS_prefix_size > 0);
     while (!duration.Done(1)) {
       DB* db = SelectDB(thread);
       Iterator* iter = db->NewIterator(options);
@@ -1966,7 +1965,6 @@ class Benchmark {
     int64_t found = 0;
     ReadOptions options(FLAGS_verify_checksum, true);
     options.tailing = FLAGS_use_tailing_iterator;
-    options.prefix_seek = (FLAGS_prefix_size > 0);
 
     Iterator* single_iter = nullptr;
     std::vector<Iterator*> multi_iters;
@@ -2528,7 +2526,7 @@ class Benchmark {
 }  // namespace rocksdb
 
 int main(int argc, char** argv) {
-  rocksdb::InstallStackTraceHandler();
+  rocksdb::port::InstallStackTraceHandler();
   google::SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
                           " [OPTIONS]...");
   google::ParseCommandLineFlags(&argc, &argv, true);
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 44f18fb48..3aa2a2256 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -33,7 +33,6 @@
 #include "db/memtable_list.h"
 #include "db/merge_context.h"
 #include "db/merge_helper.h"
-#include "db/prefix_filter_iterator.h"
 #include "db/table_cache.h"
 #include "db/table_properties_collector.h"
 #include "db/tailing_iter.h"
@@ -1339,12 +1338,12 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
   FileMetaData meta;
   meta.number = versions_->NewFileNumber();
   pending_outputs_.insert(meta.number);
-  Iterator* iter = mem->NewIterator();
+  Iterator* iter = mem->NewIterator(ReadOptions(), true);
   const SequenceNumber newest_snapshot = snapshots_.GetNewest();
   const SequenceNumber earliest_seqno_in_memtable =
     mem->GetFirstSequenceNumber();
-  Log(options_.info_log, "Level-0 table #%lu: started",
-      (unsigned long) meta.number);
+  Log(options_.info_log, "[%s] Level-0 table #%lu: started",
+      cfd->GetName().c_str(), (unsigned long)meta.number);
 
   Status s;
   {
@@ -1357,10 +1356,9 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
     mutex_.Lock();
   }
 
-  Log(options_.info_log, "Level-0 table #%lu: %lu bytes %s",
-      (unsigned long) meta.number,
-      (unsigned long) meta.file_size,
-      s.ToString().c_str());
+  Log(options_.info_log, "[%s] Level-0 table #%lu: %lu bytes %s",
+      cfd->GetName().c_str(), (unsigned long)meta.number,
+      (unsigned long)meta.file_size, s.ToString().c_str());
   delete iter;
 
   pending_outputs_.erase(meta.number);
@@ -1404,15 +1402,14 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
     log_buffer->FlushBufferToLog();
     std::vector<Iterator*> memtables;
     for (MemTable* m : mems) {
-      Log(options_.info_log,
-          "[CF %u] Flushing memtable with next log file: %lu\n", cfd->GetID(),
-          (unsigned long)m->GetNextLogNumber());
-      memtables.push_back(m->NewIterator());
+      Log(options_.info_log, "[%s] Flushing memtable with next log file: %lu\n",
+          cfd->GetName().c_str(), (unsigned long)m->GetNextLogNumber());
+      memtables.push_back(m->NewIterator(ReadOptions(), true));
     }
     Iterator* iter = NewMergingIterator(&cfd->internal_comparator(),
                                         &memtables[0], memtables.size());
-    Log(options_.info_log, "Level-0 flush table #%lu: started",
-        (unsigned long)meta.number);
+    Log(options_.info_log, "[%s] Level-0 flush table #%lu: started",
+        cfd->GetName().c_str(), (unsigned long)meta.number);
 
     s = BuildTable(dbname_, env_, *cfd->options(), storage_options_,
                    cfd->table_cache(), iter, &meta, cfd->internal_comparator(),
@@ -1420,10 +1417,13 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
                    GetCompressionFlush(*cfd->options()));
     LogFlush(options_.info_log);
     delete iter;
-    Log(options_.info_log, "Level-0 flush table #%lu: %lu bytes %s",
-        (unsigned long) meta.number,
-        (unsigned long) meta.file_size,
-        s.ToString().c_str());
+    Log(options_.info_log, "[%s] Level-0 flush table #%lu: %lu bytes %s",
+        cfd->GetName().c_str(), (unsigned long)meta.number,
+        (unsigned long)meta.file_size, s.ToString().c_str());
+
+    Version::LevelSummaryStorage tmp;
+    Log(options_.info_log, "[%s] Level summary: %s\n", cfd->GetName().c_str(),
+        cfd->current()->LevelSummary(&tmp));
     if (!options_.disableDataSync) {
       db_directory_->Fsync();
     }
@@ -1483,7 +1483,8 @@ Status DBImpl::FlushMemTableToOutputFile(ColumnFamilyData* cfd,
   autovector<MemTable*> mems;
   cfd->imm()->PickMemtablesToFlush(&mems);
   if (mems.empty()) {
-    LogToBuffer(log_buffer, "Nothing in memstore to flush");
+    LogToBuffer(log_buffer, "[%s] Nothing in memtable to flush",
+                cfd->GetName().c_str());
     return Status::OK();
   }
 
@@ -1644,7 +1645,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
 
   Status status;
   if (to_level < level) {
-    Log(options_.info_log, "Before refitting:\n%s",
+    Log(options_.info_log, "[%s] Before refitting:\n%s", cfd->GetName().c_str(),
         cfd->current()->DebugString().data());
 
     VersionEdit edit;
@@ -1654,18 +1655,19 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
       edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest,
                    f->smallest_seqno, f->largest_seqno);
     }
-    Log(options_.info_log, "Apply version edit:\n%s",
-        edit.DebugString().data());
+    Log(options_.info_log, "[%s] Apply version edit:\n%s",
+        cfd->GetName().c_str(), edit.DebugString().data());
 
     status = versions_->LogAndApply(cfd, &edit, &mutex_, db_directory_.get());
     superversion_to_free = cfd->InstallSuperVersion(new_superversion, &mutex_);
     new_superversion = nullptr;
 
-    Log(options_.info_log, "LogAndApply: %s\n", status.ToString().data());
+    Log(options_.info_log, "[%s] LogAndApply: %s\n", cfd->GetName().c_str(),
+        status.ToString().data());
 
     if (status.ok()) {
-      Log(options_.info_log, "After refitting:\n%s",
-          cfd->current()->DebugString().data());
+      Log(options_.info_log, "[%s] After refitting:\n%s",
+          cfd->GetName().c_str(), cfd->current()->DebugString().data());
     }
   }
 
@@ -1752,12 +1754,14 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level,
   ++bg_manual_only_;
   while (bg_compaction_scheduled_ > 0) {
     Log(options_.info_log,
-        "Manual compaction waiting for all other scheduled background "
-        "compactions to finish");
+        "[%s] Manual compaction waiting for all other scheduled background "
+        "compactions to finish",
+        cfd->GetName().c_str());
     bg_cv_.Wait();
   }
 
-  Log(options_.info_log, "Manual compaction starting");
+  Log(options_.info_log, "[%s] Manual compaction starting",
+      cfd->GetName().c_str());
 
   while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) {
     assert(bg_manual_only_ > 0);
@@ -1874,8 +1878,9 @@ Status DBImpl::BackgroundFlush(bool* madeProgress,
       LogToBuffer(
           log_buffer,
           "BackgroundCallFlush doing FlushMemTableToOutputFile with column "
-          "family %u, flush slots available %d",
-          cfd->GetID(), options_.max_background_flushes - bg_flush_scheduled_);
+          "family [%s], flush slots available %d",
+          cfd->GetName().c_str(),
+          options_.max_background_flushes - bg_flush_scheduled_);
       flush_status = FlushMemTableToOutputFile(cfd, madeProgress,
                                                deletion_state, log_buffer);
     }
@@ -1963,8 +1968,6 @@ void DBImpl::BackgroundCallCompaction() {
   LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, options_.info_log.get());
   {
     MutexLock l(&mutex_);
-    // Log(options_.info_log, "XXX BG Thread %llx process new work item",
-    //     pthread_self());
     assert(bg_compaction_scheduled_);
     Status s;
     if (!shutting_down_.Acquire_Load()) {
@@ -2086,16 +2089,15 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
     if (!c) {
       m->done = true;
     }
-    LogToBuffer(
-        log_buffer,
-        "Manual compaction from level-%d to level-%d from %s .. %s; will stop "
-        "at %s\n",
-        m->input_level, m->output_level,
-        (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
-        (m->end ? m->end->DebugString().c_str() : "(end)"),
-        ((m->done || manual_end == nullptr)
-             ? "(end)"
-             : manual_end->DebugString().c_str()));
+    LogToBuffer(log_buffer,
+                "[%s] Manual compaction from level-%d to level-%d from %s .. "
+                "%s; will stop at %s\n",
+                m->cfd->GetName().c_str(), m->input_level, m->output_level,
+                (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
+                (m->end ? m->end->DebugString().c_str() : "(end)"),
+                ((m->done || manual_end == nullptr)
+                     ? "(end)"
+                     : manual_end->DebugString().c_str()));
   } else {
     // no need to refcount in iteration since it's always under a mutex
     for (auto cfd : *versions_->GetColumnFamilySet()) {
@@ -2128,10 +2130,12 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
     InstallSuperVersion(c->column_family_data(), deletion_state);
 
     Version::LevelSummaryStorage tmp;
-    LogToBuffer(log_buffer, "Moved #%lld to level-%d %lld bytes %s: %s\n",
-        static_cast<unsigned long long>(f->number), c->level() + 1,
-        static_cast<unsigned long long>(f->file_size),
-        status.ToString().c_str(), c->input_version()->LevelSummary(&tmp));
+    LogToBuffer(log_buffer, "[%s] Moved #%lld to level-%d %lld bytes %s: %s\n",
+                c->column_family_data()->GetName().c_str(),
+                static_cast<unsigned long long>(f->number), c->level() + 1,
+                static_cast<unsigned long long>(f->file_size),
+                status.ToString().c_str(),
+                c->input_version()->LevelSummary(&tmp));
     c->ReleaseCompactionFiles(status);
     *madeProgress = true;
   } else {
@@ -2235,7 +2239,6 @@ void DBImpl::ReleaseCompactionUnusedFileNumbers(CompactionState* compact) {
   mutex_.AssertHeld();
   for (const auto file_number : compact->allocated_file_numbers) {
     pending_outputs_.erase(file_number);
-    // Log(options_.info_log, "XXX releasing unused file num %d", file_number);
   }
 }
 
@@ -2334,11 +2337,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
     s = iter->status();
     delete iter;
     if (s.ok()) {
-      Log(options_.info_log,
-          "Generated table #%lu: %lu keys, %lu bytes",
-          (unsigned long) output_number,
-          (unsigned long) current_entries,
-          (unsigned long) current_bytes);
+      Log(options_.info_log, "[%s] Generated table #%lu: %lu keys, %lu bytes",
+          cfd->GetName().c_str(), (unsigned long)output_number,
+          (unsigned long)current_entries, (unsigned long)current_bytes);
     }
   }
   return s;
@@ -2354,15 +2355,16 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact,
   // This ensures that a concurrent compaction did not erroneously
   // pick the same files to compact.
   if (!versions_->VerifyCompactionFileConsistency(compact->compaction)) {
-    Log(options_.info_log,  "Compaction %d@%d + %d@%d files aborted",
-      compact->compaction->num_input_files(0),
-      compact->compaction->level(),
-      compact->compaction->num_input_files(1),
-      compact->compaction->output_level());
+    Log(options_.info_log, "[%s] Compaction %d@%d + %d@%d files aborted",
+        compact->compaction->column_family_data()->GetName().c_str(),
+        compact->compaction->num_input_files(0), compact->compaction->level(),
+        compact->compaction->num_input_files(1),
+        compact->compaction->output_level());
     return Status::Corruption("Compaction input files inconsistent");
   }
 
-  LogToBuffer(log_buffer, "Compacted %d@%d + %d@%d files => %lld bytes",
+  LogToBuffer(log_buffer, "[%s] Compacted %d@%d + %d@%d files => %lld bytes",
+              compact->compaction->column_family_data()->GetName().c_str(),
               compact->compaction->num_input_files(0),
               compact->compaction->level(),
               compact->compaction->num_input_files(1),
@@ -2620,16 +2622,6 @@ Status DBImpl::ProcessKeyValueCompaction(
       last_sequence_for_key = ikey.sequence;
       visible_in_snapshot = visible;
     }
-#if 0
-    Log(options_.info_log,
-        "  Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
-        "%d smallest_snapshot: %d level: %d bottommost %d",
-        ikey.user_key.ToString().c_str(),
-        (int)ikey.sequence, ikey.type, kTypeValue, drop,
-        compact->compaction->IsBaseLevelForKey(ikey.user_key),
-        (int)last_sequence_for_key, (int)earliest_snapshot,
-        compact->compaction->level(), bottommost_level);
-#endif
 
     if (!drop) {
       // We may write a single key (e.g.: for Put/Delete or successful merge).
@@ -2801,14 +2793,15 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
   ColumnFamilyData* cfd = compact->compaction->column_family_data();
   LogToBuffer(
       log_buffer,
-      "[CF %u] Compacting %d@%d + %d@%d files, score %.2f slots available %d",
-      cfd->GetID(), compact->compaction->num_input_files(0),
+      "[%s] Compacting %d@%d + %d@%d files, score %.2f slots available %d",
+      cfd->GetName().c_str(), compact->compaction->num_input_files(0),
       compact->compaction->level(), compact->compaction->num_input_files(1),
       compact->compaction->output_level(), compact->compaction->score(),
       options_.max_background_compactions - bg_compaction_scheduled_);
   char scratch[2345];
   compact->compaction->Summary(scratch, sizeof(scratch));
-  LogToBuffer(log_buffer, "Compaction start summary: %s\n", scratch);
+  LogToBuffer(log_buffer, "[%s] Compaction start summary: %s\n",
+              cfd->GetName().c_str(), scratch);
 
   assert(cfd->current()->NumLevelFiles(compact->compaction->level()) > 0);
   assert(compact->builder == nullptr);
@@ -2886,8 +2879,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
       }
       if (!ParseInternalKey(key, &ikey)) {
         // log error
-        Log(options_.info_log, "Failed to parse key: %s",
-            key.ToString().c_str());
+        Log(options_.info_log, "[%s] Failed to parse key: %s",
+            cfd->GetName().c_str(), key.ToString().c_str());
         continue;
       } else {
         // If the prefix remains the same, keep buffering
@@ -3068,10 +3061,10 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
   Version::LevelSummaryStorage tmp;
   LogToBuffer(
       log_buffer,
-      "compacted to: %s, %.1f MB/sec, level %d, files in(%d, %d) out(%d) "
+      "[%s] compacted to: %s, %.1f MB/sec, level %d, files in(%d, %d) out(%d) "
       "MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
       "write-amplify(%.1f) %s\n",
-      cfd->current()->LevelSummary(&tmp),
+      cfd->GetName().c_str(), cfd->current()->LevelSummary(&tmp),
       (stats.bytes_readn + stats.bytes_readnp1 + stats.bytes_written) /
           (double)stats.micros,
       compact->compaction->output_level(), stats.files_in_leveln,
@@ -3409,10 +3402,10 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
     assert(cfd != nullptr);
     delete cfd->InstallSuperVersion(new SuperVersion(), &mutex_);
     *handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_);
-    Log(options_.info_log, "Created column family \"%s\" (ID %u)",
+    Log(options_.info_log, "Created column family [%s] (ID %u)",
         column_family_name.c_str(), (unsigned)cfd->GetID());
   } else {
-    Log(options_.info_log, "Creating column family \"%s\" FAILED -- %s",
+    Log(options_.info_log, "Creating column family [%s] FAILED -- %s",
         column_family_name.c_str(), s.ToString().c_str());
   }
   return s;
@@ -3500,12 +3493,6 @@ Iterator* DBImpl::NewIterator(const ReadOptions& options,
                          cfd->user_comparator(), iter, snapshot);
   }
 
-  if (options.prefix) {
-    // use extra wrapper to exclude any keys from the results which
-    // don't begin with the prefix
-    iter = new PrefixFilterIterator(iter, *options.prefix,
-                                    cfd->options()->prefix_extractor.get());
-  }
   return iter;
 }
 
@@ -3513,12 +3500,6 @@ Status DBImpl::NewIterators(
     const ReadOptions& options,
     const std::vector<ColumnFamilyHandle*>& column_families,
     std::vector<Iterator*>* iterators) {
-
-  if (options.prefix) {
-    return Status::NotSupported(
-        "NewIterators doesn't support ReadOptions::prefix");
-  }
-
   iterators->clear();
   iterators->reserve(column_families.size());
   SequenceNumber latest_snapshot = 0;
@@ -3626,10 +3607,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
   Status status;
   // refcounting cfd in iteration
   bool dead_cfd = false;
+  autovector<SuperVersion*> superversions_to_free;
+  autovector<log::Writer*> logs_to_free;
   for (auto cfd : *versions_->GetColumnFamilySet()) {
     cfd->Ref();
     // May temporarily unlock and wait.
-    status = MakeRoomForWrite(cfd, my_batch == nullptr);
+    status = MakeRoomForWrite(cfd, my_batch == nullptr, &superversions_to_free,
+                              &logs_to_free);
     if (cfd->Unref()) {
       dead_cfd = true;
     }
@@ -3742,6 +3726,14 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
     writers_.front()->cv.Signal();
   }
   mutex_.Unlock();
+
+  for (auto& sv : superversions_to_free) {
+    delete sv;
+  }
+  for (auto& log : logs_to_free) {
+    delete log;
+  }
+
   PERF_TIMER_STOP(write_pre_and_post_process_time);
   return status;
 }
@@ -3828,7 +3820,10 @@ uint64_t DBImpl::SlowdownAmount(int n, double bottom, double top) {
 
 // REQUIRES: mutex_ is held
 // REQUIRES: this thread is currently at the front of the writer queue
-Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
+Status DBImpl::MakeRoomForWrite(
+    ColumnFamilyData* cfd, bool force,
+    autovector<SuperVersion*>* superversions_to_free,
+    autovector<log::Writer*>* logs_to_free) {
   mutex_.AssertHeld();
   assert(!writers_.empty());
   bool allow_delay = !force;
@@ -3878,7 +3873,8 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
       // We have filled up the current memtable, but the previous
       // ones are still being flushed, so we wait.
       DelayLoggingAndReset();
-      Log(options_.info_log, "wait for memtable flush...\n");
+      Log(options_.info_log, "[%s] wait for memtable flush...\n",
+          cfd->GetName().c_str());
       MaybeScheduleFlushOrCompaction();
       uint64_t stall;
       {
@@ -3895,7 +3891,8 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
                cfd->options()->level0_stop_writes_trigger) {
       // There are too many level-0 files.
       DelayLoggingAndReset();
-      Log(options_.info_log, "wait for fewer level0 files...\n");
+      Log(options_.info_log, "[%s] wait for fewer level0 files...\n",
+          cfd->GetName().c_str());
       uint64_t stall;
       {
         StopWatch sw(env_, options_.statistics.get(),
@@ -3996,8 +3993,7 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
       if (creating_new_log) {
         logfile_number_ = new_log_number;
         assert(new_log != nullptr);
-        // TODO(icanadi) delete outside of mutex
-        delete log_.release();
+        logs_to_free->push_back(log_.release());
         log_.reset(new_log);
         log_empty_ = true;
         alive_log_files_.push_back(logfile_number_);
@@ -4019,13 +4015,12 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
       }
       new_mem->Ref();
       cfd->SetMemtable(new_mem);
-      Log(options_.info_log,
-          "[CF %" PRIu32 "] New memtable created with log file: #%lu\n",
-          cfd->GetID(), (unsigned long)logfile_number_);
+      Log(options_.info_log, "[%s] New memtable created with log file: #%lu\n",
+          cfd->GetName().c_str(), (unsigned long)logfile_number_);
       force = false;  // Do not force another compaction if have room
       MaybeScheduleFlushOrCompaction();
-      // TODO(icanadi) delete outside of mutex
-      delete cfd->InstallSuperVersion(new_superversion, &mutex_);
+      superversions_to_free->push_back(
+          cfd->InstallSuperVersion(new_superversion, &mutex_));
     }
   }
   return s;
diff --git a/db/db_impl.h b/db/db_impl.h
index 5bc495400..b66d4e558 100644
--- a/db/db_impl.h
+++ b/db/db_impl.h
@@ -325,7 +325,9 @@ class DBImpl : public DB {
 
   // TODO(icanadi) free superversion_to_free and old_log outside of mutex
   Status MakeRoomForWrite(ColumnFamilyData* cfd,
-                          bool force /* flush even if there is room? */);
+                          bool force /* flush even if there is room? */,
+                          autovector<SuperVersion*>* superversions_to_free,
+                          autovector<log::Writer*>* logs_to_free);
 
   void BuildBatchGroup(Writer** last_writer,
                        autovector<WriteBatch*>* write_batch_group);
diff --git a/db/db_impl_debug.cc b/db/db_impl_debug.cc
index 346692beb..3dcde6c40 100644
--- a/db/db_impl_debug.cc
+++ b/db/db_impl_debug.cc
@@ -33,7 +33,6 @@ Iterator* DBImpl::TEST_NewInternalIterator(ColumnFamilyHandle* column_family) {
   SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
   mutex_.Unlock();
   ReadOptions roptions;
-  roptions.prefix_seek = true;
   return NewInternalIterator(roptions, cfd, super_version);
 }
 
diff --git a/db/db_test.cc b/db/db_test.cc
index f2c665af3..188cfff3d 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -306,7 +306,8 @@ class DBTest {
     kSkipUniversalCompaction = 2,
     kSkipMergePut = 4,
     kSkipPlainTable = 8,
-    kSkipHashIndex = 16
+    kSkipHashIndex = 16,
+    kSkipNoSeekToLast = 32
   };
 
   DBTest() : option_config_(kDefault),
@@ -341,6 +342,11 @@ class DBTest {
       if ((skip_mask & kSkipMergePut) && option_config_ == kMergePut) {
         continue;
       }
+      if ((skip_mask & kSkipNoSeekToLast) &&
+          (option_config_ == kHashLinkList ||
+           option_config_ == kHashSkipList)) {;
+        continue;
+      }
       if ((skip_mask & kSkipPlainTable)
           && (option_config_ == kPlainTableAllBytesPrefix
               || option_config_ == kPlainTableFirstBytePrefix)) {
@@ -862,10 +868,11 @@ class DBTest {
 
   void VerifyIterLast(std::string expected_key, int cf = 0) {
     Iterator* iter;
+    ReadOptions ro;
     if (cf == 0) {
-      iter = db_->NewIterator(ReadOptions());
+      iter = db_->NewIterator(ro);
     } else {
-      iter = db_->NewIterator(ReadOptions(), handles_[cf]);
+      iter = db_->NewIterator(ro, handles_[cf]);
     }
     iter->SeekToLast();
     ASSERT_EQ(IterStatus(iter), expected_key);
@@ -1009,12 +1016,28 @@ TEST(DBTest, Empty) {
     options.write_buffer_size = 100000;  // Small write buffer
     CreateAndReopenWithCF({"pikachu"}, &options);
 
+    std::string num;
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ("0", num);
+
     ASSERT_OK(Put(1, "foo", "v1"));
     ASSERT_EQ("v1", Get(1, "foo"));
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ("1", num);
 
     env_->delay_sstable_sync_.Release_Store(env_);  // Block sync calls
     Put(1, "k1", std::string(100000, 'x'));         // Fill memtable
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ("2", num);
+
     Put(1, "k2", std::string(100000, 'y'));         // Trigger compaction
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ("1", num);
+
     ASSERT_EQ("v1", Get(1, "foo"));
     env_->delay_sstable_sync_.Release_Store(nullptr);   // Release sync calls
   } while (ChangeOptions());
@@ -1447,7 +1470,7 @@ TEST(DBTest, NonBlockingIteration) {
 
     // This test verifies block cache behaviors, which is not used by plain
     // table format.
-  } while (ChangeOptions(kSkipPlainTable));
+  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast));
 }
 
 // A delete is skipped for key if KeyMayExist(key) returns False
@@ -1891,19 +1914,23 @@ TEST(DBTest, IterSmallAndLargeMix) {
 TEST(DBTest, IterMultiWithDelete) {
   do {
     CreateAndReopenWithCF({"pikachu"});
-    ASSERT_OK(Put(1, "a", "va"));
-    ASSERT_OK(Put(1, "b", "vb"));
-    ASSERT_OK(Put(1, "c", "vc"));
-    ASSERT_OK(Delete(1, "b"));
-    ASSERT_EQ("NOT_FOUND", Get(1, "b"));
+    ASSERT_OK(Put(1, "ka", "va"));
+    ASSERT_OK(Put(1, "kb", "vb"));
+    ASSERT_OK(Put(1, "kc", "vc"));
+    ASSERT_OK(Delete(1, "kb"));
+    ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
 
     Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
-    iter->Seek("c");
-    ASSERT_EQ(IterStatus(iter), "c->vc");
+    iter->Seek("kc");
+    ASSERT_EQ(IterStatus(iter), "kc->vc");
     if (!CurrentOptions().merge_operator) {
       // TODO: merge operator does not support backward iteration yet
-      iter->Prev();
-      ASSERT_EQ(IterStatus(iter), "a->va");
+      if (kPlainTableAllBytesPrefix != option_config_&&
+          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+          kHashLinkList != option_config_) {
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "ka->va");
+      }
     }
     delete iter;
   } while (ChangeOptions());
@@ -1936,7 +1963,7 @@ TEST(DBTest, IterPrevMaxSkip) {
 
     ASSERT_OK(Delete(1, "key1"));
     VerifyIterLast("(invalid)", 1);
-  } while (ChangeOptions(kSkipMergePut));
+  } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
 }
 
 TEST(DBTest, IterWithSnapshot) {
@@ -1961,15 +1988,19 @@ TEST(DBTest, IterWithSnapshot) {
     ASSERT_EQ(IterStatus(iter), "key5->val5");
     if (!CurrentOptions().merge_operator) {
       // TODO: merge operator does not support backward iteration yet
-      iter->Prev();
-      ASSERT_EQ(IterStatus(iter), "key4->val4");
-      iter->Prev();
-      ASSERT_EQ(IterStatus(iter), "key3->val3");
+      if (kPlainTableAllBytesPrefix != option_config_&&
+        kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+        kHashLinkList != option_config_) {
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "key4->val4");
+        iter->Prev();
+        ASSERT_EQ(IterStatus(iter), "key3->val3");
 
-      iter->Next();
-      ASSERT_EQ(IterStatus(iter), "key4->val4");
-      iter->Next();
-      ASSERT_EQ(IterStatus(iter), "key5->val5");
+        iter->Next();
+        ASSERT_EQ(IterStatus(iter), "key4->val4");
+        iter->Next();
+        ASSERT_EQ(IterStatus(iter), "key5->val5");
+      }
       iter->Next();
       ASSERT_TRUE(!iter->Valid());
     }
@@ -2225,6 +2256,9 @@ TEST(DBTest, NumImmutableMemTable) {
     ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                       "rocksdb.num-immutable-mem-table", &num));
     ASSERT_EQ(num, "0");
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ(num, "1");
     perf_context.Reset();
     Get(1, "k1");
     ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
@@ -2233,6 +2267,13 @@ TEST(DBTest, NumImmutableMemTable) {
     ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                       "rocksdb.num-immutable-mem-table", &num));
     ASSERT_EQ(num, "1");
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ(num, "1");
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-imm-mem-tables", &num));
+    ASSERT_EQ(num, "1");
+
     perf_context.Reset();
     Get(1, "k1");
     ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
@@ -2246,6 +2287,12 @@ TEST(DBTest, NumImmutableMemTable) {
     ASSERT_TRUE(dbfull()->GetProperty(handles_[1],
                                       "rocksdb.num-immutable-mem-table", &num));
     ASSERT_EQ(num, "2");
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-active-mem-table", &num));
+    ASSERT_EQ(num, "1");
+    ASSERT_TRUE(dbfull()->GetProperty(
+        handles_[1], "rocksdb.num-entries-imm-mem-tables", &num));
+    ASSERT_EQ(num, "2");
     perf_context.Reset();
     Get(1, "k2");
     ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
@@ -4374,6 +4421,8 @@ TEST(DBTest, HiddenValuesAreRemoved) {
 
 TEST(DBTest, CompactBetweenSnapshots) {
   do {
+    Options options = CurrentOptions();
+    options.disable_auto_compactions = true;
     CreateAndReopenWithCF({"pikachu"});
     Random rnd(301);
     FillLevels("a", "z", 1);
@@ -5912,7 +5961,7 @@ TEST(DBTest, GroupCommitTest) {
     ASSERT_TRUE(!itr->Valid());
     delete itr;
 
-  } while (ChangeOptions());
+  } while (ChangeOptions(kSkipNoSeekToLast));
 }
 
 namespace {
@@ -6281,7 +6330,7 @@ TEST(DBTest, Randomized) {
     }
     if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
     if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
-  } while (ChangeOptions(kSkipDeletesFilterFirst));
+  } while (ChangeOptions(kSkipDeletesFilterFirst | kSkipNoSeekToLast));
 }
 
 TEST(DBTest, MultiGetSimple) {
@@ -6397,7 +6446,6 @@ void PrefixScanInit(DBTest *dbtest) {
 }  // namespace
 
 TEST(DBTest, PrefixScan) {
-  ReadOptions ro = ReadOptions();
   int count;
   Slice prefix;
   Slice key;
@@ -6418,45 +6466,9 @@ TEST(DBTest, PrefixScan) {
   options.max_background_compactions = 2;
   options.create_if_missing = true;
   options.disable_seek_compaction = true;
-  // Tricky: options.prefix_extractor will be released by
-  // NewHashSkipListRepFactory after use.
   options.memtable_factory.reset(NewHashSkipListRepFactory());
 
-  // prefix specified, with blooms: 2 RAND I/Os
-  // SeekToFirst
-  DestroyAndReopen(&options);
-  PrefixScanInit(this);
-  count = 0;
-  env_->random_read_counter_.Reset();
-  ro.prefix = &prefix;
-  iter = db_->NewIterator(ro);
-  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-    assert(iter->key().starts_with(prefix));
-    count++;
-  }
-  ASSERT_OK(iter->status());
-  delete iter;
-  ASSERT_EQ(count, 2);
-  ASSERT_EQ(env_->random_read_counter_.Read(), 2);
-
-  // prefix specified, with blooms: 2 RAND I/Os
-  // Seek
-  DestroyAndReopen(&options);
-  PrefixScanInit(this);
-  count = 0;
-  env_->random_read_counter_.Reset();
-  ro.prefix = &prefix;
-  iter = db_->NewIterator(ro);
-  for (iter->Seek(key); iter->Valid(); iter->Next()) {
-    assert(iter->key().starts_with(prefix));
-    count++;
-  }
-  ASSERT_OK(iter->status());
-  delete iter;
-  ASSERT_EQ(count, 2);
-  ASSERT_EQ(env_->random_read_counter_.Read(), 2);
-
-  // no prefix specified: 11 RAND I/Os
+  // 11 RAND I/Os
   DestroyAndReopen(&options);
   PrefixScanInit(this);
   count = 0;
@@ -6471,7 +6483,7 @@ TEST(DBTest, PrefixScan) {
   ASSERT_OK(iter->status());
   delete iter;
   ASSERT_EQ(count, 2);
-  ASSERT_EQ(env_->random_read_counter_.Read(), 11);
+  ASSERT_EQ(env_->random_read_counter_.Read(), 2);
   Close();
   delete options.filter_policy;
 }
@@ -6620,7 +6632,6 @@ TEST(DBTest, TailingIteratorDeletes) {
 TEST(DBTest, TailingIteratorPrefixSeek) {
   ReadOptions read_options;
   read_options.tailing = true;
-  read_options.prefix_seek = true;
 
   Options options = CurrentOptions();
   options.env = env_;
diff --git a/db/dbformat.h b/db/dbformat.h
index 27a082284..1c86b127a 100644
--- a/db/dbformat.h
+++ b/db/dbformat.h
@@ -280,7 +280,7 @@ class IterKey {
       delete[] key_;
     }
     key_ = space_;
-    buf_size_ = sizeof(buf_size_);
+    buf_size_ = sizeof(space_);
     key_size_ = 0;
   }
 
diff --git a/db/internal_stats.cc b/db/internal_stats.cc
index fb5e9b229..e8b22a7f8 100644
--- a/db/internal_stats.cc
+++ b/db/internal_stats.cc
@@ -37,6 +37,10 @@ DBPropertyType GetPropertyType(const Slice& property) {
     return kBackgroundErrors;
   } else if (in == "cur-size-active-mem-table") {
     return kCurSizeActiveMemTable;
+  } else if (in == "num-entries-active-mem-table") {
+    return kNumEntriesInMutableMemtable;
+  } else if (in == "num-entries-imm-mem-tables") {
+    return kNumEntriesInImmutableMemtable;
   }
   return kUnknown;
 }
@@ -349,6 +353,14 @@ bool InternalStats::GetProperty(DBPropertyType property_type,
       // Current size of the active memtable
       *value = std::to_string(cfd->mem()->ApproximateMemoryUsage());
       return true;
+    case kNumEntriesInMutableMemtable:
+      // Current size of the active memtable
+      *value = std::to_string(cfd->mem()->GetNumEntries());
+      return true;
+    case kNumEntriesInImmutableMemtable:
+      // Current size of the active memtable
+      *value = std::to_string(cfd->imm()->current()->GetTotalNumEntries());
+      return true;
     default:
       return false;
   }
diff --git a/db/internal_stats.h b/db/internal_stats.h
index 616b6cc0d..2a743593d 100644
--- a/db/internal_stats.h
+++ b/db/internal_stats.h
@@ -28,13 +28,16 @@ enum DBPropertyType {
   kLevelStats,       // Return number of files and total sizes of each level
   kStats,            // Return general statitistics of DB
   kSsTables,         // Return a human readable string of current SST files
-  kNumImmutableMemTable,  // Return number of immutable mem tables
-  kMemtableFlushPending,  // Return 1 if mem table flushing is pending,
-                          // otherwise
-                          // 0.
-  kCompactionPending,     // Return 1 if a compaction is pending. Otherwise 0.
-  kBackgroundErrors,      // Return accumulated background errors encountered.
+  kNumImmutableMemTable,   // Return number of immutable mem tables
+  kMemtableFlushPending,   // Return 1 if mem table flushing is pending,
+                           // otherwise 0.
+  kCompactionPending,      // Return 1 if a compaction is pending. Otherwise 0.
+  kBackgroundErrors,       // Return accumulated background errors encountered.
   kCurSizeActiveMemTable,  // Return current size of the active memtable
+  kNumEntriesInMutableMemtable,    // Return number of entries in the mutable
+                                   // memtable.
+  kNumEntriesInImmutableMemtable,  // Return sum of number of entries in all
+                                   // the immutable mem tables.
   kUnknown,
 };
 
diff --git a/db/memtable.cc b/db/memtable.cc
index 2d12708c3..b13b9f294 100644
--- a/db/memtable.cc
+++ b/db/memtable.cc
@@ -29,8 +29,7 @@
 
 namespace rocksdb {
 
-MemTable::MemTable(const InternalKeyComparator& cmp,
-                   const Options& options)
+MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
     : comparator_(cmp),
       refs_(0),
       kArenaBlockSize(OptimizeBlockSize(options.arena_block_size)),
@@ -38,6 +37,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
       arena_(options.arena_block_size),
       table_(options.memtable_factory->CreateMemTableRep(
           comparator_, &arena_, options.prefix_extractor.get())),
+      num_entries_(0),
       flush_in_progress_(false),
       flush_completed_(false),
       file_number_(0),
@@ -159,14 +159,12 @@ const char* EncodeKey(std::string* scratch, const Slice& target) {
 
 class MemTableIterator: public Iterator {
  public:
-  MemTableIterator(const MemTable& mem, const ReadOptions& options)
+  MemTableIterator(const MemTable& mem, const ReadOptions& options,
+                   bool enforce_total_order)
       : bloom_(nullptr),
         prefix_extractor_(mem.prefix_extractor_),
-        iter_(),
         valid_(false) {
-    if (options.prefix) {
-      iter_.reset(mem.table_->GetPrefixIterator(*options.prefix));
-    } else if (options.prefix_seek) {
+    if (prefix_extractor_ != nullptr && !enforce_total_order) {
       bloom_ = mem.prefix_bloom_.get();
       iter_.reset(mem.table_->GetDynamicPrefixIterator());
     } else {
@@ -217,7 +215,7 @@ class MemTableIterator: public Iterator {
  private:
   DynamicBloom* bloom_;
   const SliceTransform* const prefix_extractor_;
-  std::shared_ptr<MemTableRep::Iterator> iter_;
+  std::unique_ptr<MemTableRep::Iterator> iter_;
   bool valid_;
 
   // No copying allowed
@@ -225,8 +223,9 @@ class MemTableIterator: public Iterator {
   void operator=(const MemTableIterator&);
 };
 
-Iterator* MemTable::NewIterator(const ReadOptions& options) {
-  return new MemTableIterator(*this, options);
+Iterator* MemTable::NewIterator(const ReadOptions& options,
+    bool enforce_total_order) {
+  return new MemTableIterator(*this, options, enforce_total_order);
 }
 
 port::RWMutex* MemTable::GetLock(const Slice& key) {
@@ -260,6 +259,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
   memcpy(p, value.data(), val_size);
   assert((unsigned)(p + val_size - buf) == (unsigned)encoded_len);
   table_->Insert(handle);
+  num_entries_++;
 
   if (prefix_bloom_) {
     assert(prefix_extractor_);
@@ -477,7 +477,7 @@ bool MemTable::UpdateCallback(SequenceNumber seq,
   LookupKey lkey(key, seq);
   Slice memkey = lkey.memtable_key();
 
-  std::shared_ptr<MemTableRep::Iterator> iter(
+  std::unique_ptr<MemTableRep::Iterator> iter(
     table_->GetIterator(lkey.user_key()));
   iter->Seek(lkey.internal_key(), memkey.data());
 
diff --git a/db/memtable.h b/db/memtable.h
index 3d392820c..a4700f731 100644
--- a/db/memtable.h
+++ b/db/memtable.h
@@ -75,14 +75,10 @@ class MemTable {
   // iterator are internal keys encoded by AppendInternalKey in the
   // db/dbformat.{h,cc} module.
   //
-  // If options.prefix is supplied, it is passed to the underlying MemTableRep
-  // as a hint that the iterator only need to support access to keys with that
-  // specific prefix.
-  // If options.prefix is not supplied and options.prefix_seek is set, the
-  // iterator is not bound to a specific prefix. However, the semantics of
-  // Seek is changed - the result might only include keys with the same prefix
-  // as the seek-key.
-  Iterator* NewIterator(const ReadOptions& options = ReadOptions());
+  // By default, it returns an iterator for prefix seek if prefix_extractor
+  // is configured in Options.
+  Iterator* NewIterator(const ReadOptions& options,
+                        bool enforce_total_order = false);
 
   // Add an entry into memtable that maps key to value at the
   // specified sequence number and with the specified type.
@@ -132,6 +128,9 @@ class MemTable {
   // key in the memtable.
   size_t CountSuccessiveMergeEntries(const LookupKey& key);
 
+  // Get total number of entries in the mem table.
+  uint64_t GetNumEntries() const { return num_entries_; }
+
   // Returns the edits area that is needed for flushing the memtable
   VersionEdit* GetEdits() { return &edit_; }
 
@@ -174,6 +173,8 @@ class MemTable {
   Arena arena_;
   unique_ptr<MemTableRep> table_;
 
+  uint64_t num_entries_;
+
   // These are used to manage memtable flushes to storage
   bool flush_in_progress_; // started the flush
   bool flush_completed_;   // finished the flush
diff --git a/db/memtable_list.cc b/db/memtable_list.cc
index 655ded7f1..235421962 100644
--- a/db/memtable_list.cc
+++ b/db/memtable_list.cc
@@ -78,6 +78,14 @@ void MemTableListVersion::AddIterators(const ReadOptions& options,
   }
 }
 
+uint64_t MemTableListVersion::GetTotalNumEntries() const {
+  uint64_t total_num = 0;
+  for (auto& m : memlist_) {
+    total_num += m->GetNumEntries();
+  }
+  return total_num;
+}
+
 // caller is responsible for referencing m
 void MemTableListVersion::Add(MemTable* m) {
   assert(refs_ == 1);  // only when refs_ == 1 is MemTableListVersion mutable
@@ -176,8 +184,8 @@ Status MemTableList::InstallMemtableFlushResults(
       break;
     }
 
-    LogToBuffer(log_buffer, "Level-0 commit table #%lu started",
-                (unsigned long)m->file_number_);
+    LogToBuffer(log_buffer, "[%s] Level-0 commit table #%lu started",
+                cfd->GetName().c_str(), (unsigned long)m->file_number_);
 
     // this can release and reacquire the mutex.
     s = vset->LogAndApply(cfd, &m->edit_, mu, db_directory);
@@ -191,8 +199,10 @@ Status MemTableList::InstallMemtableFlushResults(
     uint64_t mem_id = 1;  // how many memtables has been flushed.
     do {
       if (s.ok()) { // commit new state
-        LogToBuffer(log_buffer, "Level-0 commit table #%lu: memtable #%lu done",
-                    (unsigned long)m->file_number_, (unsigned long)mem_id);
+        LogToBuffer(log_buffer,
+                    "[%s] Level-0 commit table #%lu: memtable #%lu done",
+                    cfd->GetName().c_str(), (unsigned long)m->file_number_,
+                    (unsigned long)mem_id);
         current_->Remove(m);
         assert(m->file_number_ > 0);
 
diff --git a/db/memtable_list.h b/db/memtable_list.h
index 903305779..d85380b55 100644
--- a/db/memtable_list.h
+++ b/db/memtable_list.h
@@ -49,6 +49,8 @@ class MemTableListVersion {
   void AddIterators(const ReadOptions& options,
                     std::vector<Iterator*>* iterator_list);
 
+  uint64_t GetTotalNumEntries() const;
+
  private:
   // REQUIRE: m is mutable memtable
   void Add(MemTable* m);
diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc
index 9f836b76e..4cff95952 100644
--- a/db/plain_table_db_test.cc
+++ b/db/plain_table_db_test.cc
@@ -47,7 +47,6 @@ class PlainTableDBTest {
 
  public:
   PlainTableDBTest() : env_(Env::Default()) {
-    ro_.prefix_seek = true;
     dbname_ = test::TmpDir() + "/plain_table_db_test";
     ASSERT_OK(DestroyDB(dbname_, Options()));
     db_ = nullptr;
@@ -59,8 +58,6 @@ class PlainTableDBTest {
     ASSERT_OK(DestroyDB(dbname_, Options()));
   }
 
-  ReadOptions ro_;
-
   // Return the current option configuration.
   Options CurrentOptions() {
     Options options;
@@ -123,7 +120,7 @@ class PlainTableDBTest {
   }
 
   std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
-    ReadOptions options = ro_;
+    ReadOptions options;
     options.snapshot = snapshot;
     std::string result;
     Status s = db_->Get(options, k, &result);
@@ -190,7 +187,7 @@ class TestPlainTableReader : public PlainTableReader {
                          file_size, bloom_bits_per_key, hash_table_ratio,
                          index_sparseness, table_properties),
         expect_bloom_not_match_(expect_bloom_not_match) {
-    Status s = PopulateIndex();
+    Status s = PopulateIndex(const_cast<TableProperties*>(table_properties));
     ASSERT_TRUE(s.ok());
   }
 
@@ -265,6 +262,19 @@ TEST(PlainTableDBTest, Flush) {
       ASSERT_OK(Put("0000000000000bar", "v2"));
       ASSERT_OK(Put("1000000000000foo", "v3"));
       dbfull()->TEST_FlushMemTable();
+
+      TablePropertiesCollection ptc;
+      reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc);
+      ASSERT_EQ(1, ptc.size());
+      auto row = ptc.begin();
+      auto tp = row->second;
+      ASSERT_EQ(
+          total_order ? "4" : "12",
+          (tp->user_collected_properties).at("plain_table_hash_table_size"));
+      ASSERT_EQ(
+          total_order ? "9" : "0",
+          (tp->user_collected_properties).at("plain_table_sub_index_size"));
+
       ASSERT_EQ("v3", Get("1000000000000foo"));
       ASSERT_EQ("v2", Get("0000000000000bar"));
     }
@@ -356,7 +366,7 @@ TEST(PlainTableDBTest, Iterator) {
       dbfull()->TEST_FlushMemTable();
       ASSERT_EQ("v1", Get("1000000000foo001"));
       ASSERT_EQ("v__3", Get("1000000000foo003"));
-      Iterator* iter = dbfull()->NewIterator(ro_);
+      Iterator* iter = dbfull()->NewIterator(ReadOptions());
       iter->Seek("1000000000foo000");
       ASSERT_TRUE(iter->Valid());
       ASSERT_EQ("1000000000foo001", iter->key().ToString());
@@ -458,7 +468,7 @@ TEST(PlainTableDBTest, IteratorLargeKeys) {
 
   dbfull()->TEST_FlushMemTable();
 
-  Iterator* iter = dbfull()->NewIterator(ro_);
+  Iterator* iter = dbfull()->NewIterator(ReadOptions());
   iter->Seek(key_list[0]);
 
   for (size_t i = 0; i < 7; i++) {
@@ -522,7 +532,7 @@ TEST(PlainTableDBTest, IteratorReverseSuffixComparator) {
   dbfull()->TEST_FlushMemTable();
   ASSERT_EQ("v1", Get("1000000000foo001"));
   ASSERT_EQ("v__3", Get("1000000000foo003"));
-  Iterator* iter = dbfull()->NewIterator(ro_);
+  Iterator* iter = dbfull()->NewIterator(ReadOptions());
   iter->Seek("1000000000foo009");
   ASSERT_TRUE(iter->Valid());
   ASSERT_EQ("1000000000foo008", iter->key().ToString());
@@ -753,7 +763,7 @@ TEST(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) {
   ASSERT_EQ("NOT_FOUND", Get("8000000000000bar"));
   ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
 
-  Iterator* iter = dbfull()->NewIterator(ro_);
+  Iterator* iter = dbfull()->NewIterator(ReadOptions());
 
   iter->Seek("5000000000000bar");
   ASSERT_TRUE(iter->Valid());
diff --git a/db/prefix_filter_iterator.h b/db/prefix_filter_iterator.h
deleted file mode 100644
index e868c7a54..000000000
--- a/db/prefix_filter_iterator.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
-//  This source code is licensed under the BSD-style license found in the
-//  LICENSE file in the root directory of this source tree. An additional grant
-//  of patent rights can be found in the PATENTS file in the same directory.
-//
-// Wrap an underlying iterator, but exclude any results not starting
-// with a given prefix.  Seeking to keys not beginning with the prefix
-// is invalid, and SeekToLast is not implemented (that would be
-// non-trivial), but otherwise this iterator will behave just like the
-// underlying iterator would if there happened to be no non-matching
-// keys in the dataset.
-
-#pragma once
-#include "rocksdb/iterator.h"
-#include "rocksdb/slice.h"
-#include "rocksdb/slice_transform.h"
-
-namespace rocksdb {
-
-class PrefixFilterIterator : public Iterator {
- private:
-  Iterator* iter_;
-  const Slice &prefix_;
-  const SliceTransform *prefix_extractor_;
-  Status status_;
-
- public:
-  PrefixFilterIterator(Iterator* iter,
-                       const Slice &prefix,
-                       const SliceTransform* prefix_extractor)
-                             : iter_(iter), prefix_(prefix),
-                               prefix_extractor_(prefix_extractor),
-                               status_(Status::OK()) {
-    if (prefix_extractor == nullptr) {
-      status_ = Status::InvalidArgument("A prefix filter may not be used "
-                                        "unless a function is also defined "
-                                        "for extracting prefixes");
-    } else if (!prefix_extractor_->InRange(prefix)) {
-      status_ = Status::InvalidArgument("Must provide a slice for prefix which"
-                                        "is a prefix for some key");
-    }
-  }
-  ~PrefixFilterIterator() {
-    delete iter_;
-  }
-  Slice key() const { return iter_->key(); }
-  Slice value() const { return iter_->value(); }
-  Status status() const {
-    if (!status_.ok()) {
-      return status_;
-    }
-    return iter_->status();
-  }
-  void Next() { iter_->Next(); }
-  void Prev() { iter_->Prev(); }
-  void Seek(const Slice& k) {
-    if (prefix_extractor_->Transform(k) == prefix_) {
-      iter_->Seek(k);
-    } else {
-      status_ = Status::InvalidArgument("Seek must begin with target prefix");
-    }
-  }
-  void SeekToFirst() {
-    Seek(prefix_);
-  }
-  void SeekToLast() {
-    status_ = Status::NotSupported("SeekToLast is incompatible with prefixes");
-  }
-  bool Valid() const {
-    return (status_.ok() && iter_->Valid() &&
-            prefix_extractor_->Transform(iter_->key()) == prefix_);
-  }
-};
-
-}  // namespace rocksdb
diff --git a/db/prefix_test.cc b/db/prefix_test.cc
index c73cf00a6..18036bb93 100644
--- a/db/prefix_test.cc
+++ b/db/prefix_test.cc
@@ -17,7 +17,6 @@
 #include "util/stop_watch.h"
 #include "util/testharness.h"
 
-DEFINE_bool(use_prefix_hash_memtable, true, "");
 DEFINE_bool(trigger_deadlock, false,
             "issue delete in range scan to trigger PrefixHashMap deadlock");
 DEFINE_uint64(bucket_count, 100000, "number of buckets");
@@ -208,7 +207,6 @@ TEST(PrefixTest, TestResult) {
       auto db = OpenDb();
       WriteOptions write_options;
       ReadOptions read_options;
-      read_options.prefix_seek = true;
 
       // 1. Insert one row.
       Slice v16("v16");
@@ -371,43 +369,6 @@ TEST(PrefixTest, TestResult) {
   }
 }
 
-TEST(PrefixTest, FullIterator) {
-  while (NextOptions(1000000)) {
-    DestroyDB(kDbName, Options());
-    auto db = OpenDb();
-    WriteOptions write_options;
-
-    std::vector<uint64_t> prefixes;
-    for (uint64_t i = 0; i < 100; ++i) {
-      prefixes.push_back(i);
-    }
-    std::random_shuffle(prefixes.begin(), prefixes.end());
-
-    for (auto prefix : prefixes) {
-      for (uint64_t i = 0; i < 200; ++i) {
-        TestKey test_key(prefix, i);
-        Slice key = TestKeyToSlice(test_key);
-        ASSERT_OK(db->Put(write_options, key, Slice("0")));
-      }
-    }
-
-    auto func = [](void* db_void) {
-      auto db = reinterpret_cast<DB*>(db_void);
-      std::unique_ptr<Iterator> iter(db->NewIterator(ReadOptions()));
-      iter->SeekToFirst();
-      for (int i = 0; i < 3; ++i) {
-        iter->Next();
-      }
-    };
-
-    auto env = Env::Default();
-    for (int i = 0; i < 16; ++i) {
-      env->StartThread(func, reinterpret_cast<void*>(db.get()));
-    }
-    env->WaitForJoin();
-  }
-}
-
 TEST(PrefixTest, DynamicPrefixIterator) {
   while (NextOptions(FLAGS_bucket_count)) {
     std::cout << "*** Mem table: " << options.memtable_factory->Name()
@@ -452,9 +413,6 @@ TEST(PrefixTest, DynamicPrefixIterator) {
     HistogramImpl hist_seek_time;
     HistogramImpl hist_seek_comparison;
 
-    if (FLAGS_use_prefix_hash_memtable) {
-      read_options.prefix_seek = true;
-    }
     std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
 
     for (auto prefix : prefixes) {
@@ -464,14 +422,15 @@ TEST(PrefixTest, DynamicPrefixIterator) {
 
       perf_context.Reset();
       StopWatchNano timer(Env::Default(), true);
+      auto key_prefix = options.prefix_extractor->Transform(key);
       uint64_t total_keys = 0;
-      for (iter->Seek(key); iter->Valid(); iter->Next()) {
+      for (iter->Seek(key);
+           iter->Valid() && iter->key().starts_with(key_prefix);
+           iter->Next()) {
         if (FLAGS_trigger_deadlock) {
           std::cout << "Behold the deadlock!\n";
           db->Delete(write_options, iter->key());
         }
-        auto test_key = SliceToTestKey(iter->key());
-        if (test_key->prefix != prefix) break;
         total_keys++;
       }
       hist_seek_time.Add(timer.ElapsedNanos());
@@ -509,116 +468,6 @@ TEST(PrefixTest, DynamicPrefixIterator) {
   }
 }
 
-TEST(PrefixTest, PrefixHash) {
-  while (NextOptions(FLAGS_bucket_count)) {
-    std::cout << "*** Mem table: " << options.memtable_factory->Name()
-        << std::endl;
-    DestroyDB(kDbName, Options());
-    auto db = OpenDb();
-    WriteOptions write_options;
-    ReadOptions read_options;
-
-    std::vector<uint64_t> prefixes;
-    for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) {
-      prefixes.push_back(i);
-    }
-
-    if (FLAGS_random_prefix) {
-      std::random_shuffle(prefixes.begin(), prefixes.end());
-    }
-
-    // insert x random prefix, each with y continuous element.
-    HistogramImpl hist_put_time;
-    HistogramImpl hist_put_comparison;
-
-    for (auto prefix : prefixes) {
-       for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) {
-        TestKey test_key(prefix, sorted);
-
-        Slice key = TestKeyToSlice(test_key);
-        std::string value = "v" + std::to_string(sorted);
-
-        perf_context.Reset();
-        StopWatchNano timer(Env::Default(), true);
-        ASSERT_OK(db->Put(write_options, key, value));
-        hist_put_time.Add(timer.ElapsedNanos());
-        hist_put_comparison.Add(perf_context.user_key_comparison_count);
-      }
-    }
-
-    std::cout << "Put key comparison: \n" << hist_put_comparison.ToString()
-              << "Put time: \n" << hist_put_time.ToString();
-
-
-    // test seek existing keys
-    HistogramImpl hist_seek_time;
-    HistogramImpl hist_seek_comparison;
-
-    for (auto prefix : prefixes) {
-      TestKey test_key(prefix, 0);
-      Slice key = TestKeyToSlice(test_key);
-      std::string value = "v" + std::to_string(0);
-
-      Slice key_prefix;
-      if (FLAGS_use_prefix_hash_memtable) {
-        key_prefix = options.prefix_extractor->Transform(key);
-        read_options.prefix = &key_prefix;
-      }
-      std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
-
-      perf_context.Reset();
-      StopWatchNano timer(Env::Default(), true);
-      uint64_t total_keys = 0;
-      for (iter->Seek(key); iter->Valid(); iter->Next()) {
-        if (FLAGS_trigger_deadlock) {
-          std::cout << "Behold the deadlock!\n";
-          db->Delete(write_options, iter->key());
-        }
-        auto test_key = SliceToTestKey(iter->key());
-        if (test_key->prefix != prefix) break;
-        total_keys++;
-      }
-      hist_seek_time.Add(timer.ElapsedNanos());
-      hist_seek_comparison.Add(perf_context.user_key_comparison_count);
-      ASSERT_EQ(total_keys, FLAGS_items_per_prefix);
-    }
-
-    std::cout << "Seek key comparison: \n"
-              << hist_seek_comparison.ToString()
-              << "Seek time: \n"
-              << hist_seek_time.ToString();
-
-    // test non-existing keys
-    HistogramImpl hist_no_seek_time;
-    HistogramImpl hist_no_seek_comparison;
-
-    for (auto prefix = FLAGS_total_prefixes;
-         prefix < FLAGS_total_prefixes + 100;
-         prefix++) {
-      TestKey test_key(prefix, 0);
-      Slice key = TestKeyToSlice(test_key);
-
-      if (FLAGS_use_prefix_hash_memtable) {
-        Slice key_prefix = options.prefix_extractor->Transform(key);
-        read_options.prefix = &key_prefix;
-      }
-      std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
-
-      perf_context.Reset();
-      StopWatchNano timer(Env::Default(), true);
-      iter->Seek(key);
-      hist_no_seek_time.Add(timer.ElapsedNanos());
-      hist_no_seek_comparison.Add(perf_context.user_key_comparison_count);
-      ASSERT_TRUE(!iter->Valid());
-    }
-
-    std::cout << "non-existing Seek key comparison: \n"
-              << hist_no_seek_comparison.ToString()
-              << "non-existing Seek time: \n"
-              << hist_no_seek_time.ToString();
-  }
-}
-
 }
 
 int main(int argc, char** argv) {
diff --git a/db/repair.cc b/db/repair.cc
index c154c04ac..8ae64b219 100644
--- a/db/repair.cc
+++ b/db/repair.cc
@@ -231,7 +231,8 @@ class Repairer {
     // since ExtractMetaData() will also generate edits.
     FileMetaData meta;
     meta.number = next_file_number_++;
-    Iterator* iter = mem->NewIterator();
+    ReadOptions ro;
+    Iterator* iter = mem->NewIterator(ro, true /* enforce_total_order */);
     status = BuildTable(dbname_, env_, options_, storage_options_, table_cache_,
                         iter, &meta, icmp_, 0, 0, kNoCompression);
     delete iter;
diff --git a/db/simple_table_db_test.cc b/db/simple_table_db_test.cc
index a67114663..affa61465 100644
--- a/db/simple_table_db_test.cc
+++ b/db/simple_table_db_test.cc
@@ -83,8 +83,6 @@ public:
                      unique_ptr<RandomAccessFile> && file, uint64_t file_size,
                      unique_ptr<TableReader>* table_reader);
 
-  bool PrefixMayMatch(const Slice& internal_prefix) override;
-
   Iterator* NewIterator(const ReadOptions&) override;
 
   Status Get(const ReadOptions&, const Slice& key, void* arg,
@@ -220,10 +218,6 @@ std::shared_ptr<const TableProperties> SimpleTableReader::GetTableProperties()
   return rep_->table_properties;
 }
 
-bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) {
-  return true;
-}
-
 Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) {
   return new SimpleTableIterator(this);
 }
diff --git a/db/table_cache.cc b/db/table_cache.cc
index 395951324..2321d035a 100644
--- a/db/table_cache.cc
+++ b/db/table_cache.cc
@@ -190,33 +190,6 @@ Status TableCache::GetTableProperties(
   return s;
 }
 
-bool TableCache::PrefixMayMatch(const ReadOptions& options,
-                                const InternalKeyComparator& icomparator,
-                                const FileMetaData& file_meta,
-                                const Slice& internal_prefix, bool* table_io) {
-  bool may_match = true;
-  auto table_reader = file_meta.table_reader;
-  Cache::Handle* table_handle = nullptr;
-  if (table_reader == nullptr) {
-    // Need to get table handle from file number
-    Status s = FindTable(storage_options_, icomparator, file_meta.number,
-                         file_meta.file_size, &table_handle, table_io);
-    if (!s.ok()) {
-      return may_match;
-    }
-    table_reader = GetTableReaderFromHandle(table_handle);
-  }
-
-  may_match = table_reader->PrefixMayMatch(internal_prefix);
-
-  if (table_handle != nullptr) {
-    // Need to release handle if it is generated from here.
-    ReleaseHandle(table_handle);
-  }
-
-  return may_match;
-}
-
 void TableCache::Evict(Cache* cache, uint64_t file_number) {
   cache->Erase(GetSliceForFileNumber(&file_number));
 }
diff --git a/db/table_cache.h b/db/table_cache.h
index 97e0f6a27..e8cd7ea2e 100644
--- a/db/table_cache.h
+++ b/db/table_cache.h
@@ -56,13 +56,6 @@ class TableCache {
                                    const Slice&, bool),
              bool* table_io, void (*mark_key_may_exist)(void*) = nullptr);
 
-  // Determine whether the table may contain the specified prefix.  If
-  // the table index or blooms are not in memory, this may cause an I/O
-  bool PrefixMayMatch(const ReadOptions& options,
-                      const InternalKeyComparator& internal_comparator,
-                      const FileMetaData& file_meta,
-                      const Slice& internal_prefix, bool* table_io);
-
   // Evict any entry for the specified file number
   static void Evict(Cache* cache, uint64_t file_number);
 
diff --git a/db/tailing_iter.cc b/db/tailing_iter.cc
index 41d2b225a..67b59b2c9 100644
--- a/db/tailing_iter.cc
+++ b/db/tailing_iter.cc
@@ -60,8 +60,8 @@ void TailingIterator::Seek(const Slice& target) {
   // need to do a seek if 'target' belongs to that interval (i.e. immutable_ is
   // already at the correct position)!
   //
-  // If options.prefix_seek is used and immutable_ is not valid, seek if target
-  // has a different prefix than prev_key.
+  // If prefix seek is used and immutable_ is not valid, seek if target has a
+  // different prefix than prev_key.
   //
   // prev_key_ is updated by Next(). SeekImmutable() sets prev_key_ to
   // 'target' -- in this case, prev_key_ is included in the interval, so
@@ -70,7 +70,7 @@ void TailingIterator::Seek(const Slice& target) {
   const Comparator* cmp = cfd_->user_comparator();
   if (!is_prev_set_ || cmp->Compare(prev_key_, target) >= !is_prev_inclusive_ ||
       (immutable_->Valid() && cmp->Compare(target, immutable_->key()) > 0) ||
-      (read_options_.prefix_seek && !IsSamePrefix(target))) {
+      (cfd_->options()->prefix_extractor != nullptr && !IsSamePrefix(target))) {
     SeekImmutable(target);
   }
 
diff --git a/db/tailing_iter.h b/db/tailing_iter.h
index a66a85bc5..6b9c51375 100644
--- a/db/tailing_iter.h
+++ b/db/tailing_iter.h
@@ -2,9 +2,10 @@
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
-#ifndef ROCKSDB_LITE
 #pragma once
 
+#ifndef ROCKSDB_LITE
+
 #include <string>
 
 #include "rocksdb/db.h"
@@ -79,7 +80,7 @@ class TailingIterator : public Iterator {
   bool IsCurrentVersion() const;
 
   // check if SeekImmutable() is needed due to target having a different prefix
-  // than prev_key_ (used when options.prefix_seek is set)
+  // than prev_key_ (used when in prefix seek mode)
   bool IsSamePrefix(const Slice& target) const;
 
   // creates mutable_ and immutable_ iterators and updates version_number_
diff --git a/db/version_set.cc b/db/version_set.cc
index 40a096253..b85094d91 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -31,6 +31,7 @@
 #include "table/merger.h"
 #include "table/two_level_iterator.h"
 #include "table/format.h"
+#include "table/plain_table_factory.h"
 #include "table/meta_blocks.h"
 #include "util/coding.h"
 #include "util/logging.h"
@@ -217,58 +218,43 @@ class Version::LevelFileNumIterator : public Iterator {
   mutable EncodedFileMetaData current_value_;
 };
 
-static Iterator* GetFileIterator(void* arg, const ReadOptions& options,
-                                 const EnvOptions& soptions,
-                                 const InternalKeyComparator& icomparator,
-                                 const Slice& file_value, bool for_compaction) {
-  TableCache* cache = reinterpret_cast<TableCache*>(arg);
-  if (file_value.size() != sizeof(EncodedFileMetaData)) {
-    return NewErrorIterator(
-        Status::Corruption("FileReader invoked with unexpected value"));
-  } else {
-    ReadOptions options_copy;
-    if (options.prefix) {
-      // suppress prefix filtering since we have already checked the
-      // filters once at this point
-      options_copy = options;
-      options_copy.prefix = nullptr;
-    }
-
-    const EncodedFileMetaData* encoded_meta =
-        reinterpret_cast<const EncodedFileMetaData*>(file_value.data());
-    FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
-    meta.table_reader = encoded_meta->table_reader;
-    return cache->NewIterator(
-        options.prefix ? options_copy : options, soptions, icomparator, meta,
-        nullptr /* don't need reference to table*/, for_compaction);
+class Version::LevelFileIteratorState : public TwoLevelIteratorState {
+ public:
+  LevelFileIteratorState(TableCache* table_cache,
+    const ReadOptions& read_options, const EnvOptions& env_options,
+    const InternalKeyComparator& icomparator, bool for_compaction,
+    bool prefix_enabled)
+    : TwoLevelIteratorState(prefix_enabled),
+      table_cache_(table_cache), read_options_(read_options),
+      env_options_(env_options), icomparator_(icomparator),
+      for_compaction_(for_compaction) {}
+
+  Iterator* NewSecondaryIterator(const Slice& meta_handle) override {
+    if (meta_handle.size() != sizeof(EncodedFileMetaData)) {
+      return NewErrorIterator(
+          Status::Corruption("FileReader invoked with unexpected value"));
+    } else {
+      const EncodedFileMetaData* encoded_meta =
+          reinterpret_cast<const EncodedFileMetaData*>(meta_handle.data());
+      FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
+      meta.table_reader = encoded_meta->table_reader;
+      return table_cache_->NewIterator(read_options_, env_options_,
+          icomparator_, meta, nullptr /* don't need reference to table*/,
+          for_compaction_);
+    }
   }
-}
 
-bool Version::PrefixMayMatch(const ReadOptions& options,
-                             const EnvOptions& soptions,
-                             const Slice& internal_prefix,
-                             Iterator* level_iter) const {
-  bool may_match = true;
-  level_iter->Seek(internal_prefix);
-  if (!level_iter->Valid()) {
-    // we're past end of level
-    may_match = false;
-  } else if (ExtractUserKey(level_iter->key()).starts_with(
-                                             ExtractUserKey(internal_prefix))) {
-    // TODO(tylerharter): do we need this case?  Or are we guaranteed
-    // key() will always be the biggest value for this SST?
-    may_match = true;
-  } else {
-    const EncodedFileMetaData* encoded_meta =
-        reinterpret_cast<const EncodedFileMetaData*>(
-            level_iter->value().data());
-    FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
-    meta.table_reader = encoded_meta->table_reader;
-    may_match = cfd_->table_cache()->PrefixMayMatch(
-        options, cfd_->internal_comparator(), meta, internal_prefix, nullptr);
-  }
-  return may_match;
-}
+  bool PrefixMayMatch(const Slice& internal_key) override {
+    return true;
+  }
+
+ private:
+  TableCache* table_cache_;
+  const ReadOptions read_options_;
+  const EnvOptions& env_options_;
+  const InternalKeyComparator& icomparator_;
+  bool for_compaction_;
+};
 
 Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
   auto table_cache = cfd_->table_cache();
@@ -323,31 +309,13 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
   return Status::OK();
 }
 
-Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
-                                            const EnvOptions& soptions,
-                                            int level) const {
-  Iterator* level_iter =
-      new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]);
-  if (options.prefix) {
-    InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
-    if (!PrefixMayMatch(options, soptions,
-                        internal_prefix.Encode(), level_iter)) {
-      delete level_iter;
-      // nothing in this level can match the prefix
-      return NewEmptyIterator();
-    }
-  }
-  return NewTwoLevelIterator(level_iter, &GetFileIterator, cfd_->table_cache(),
-                             options, soptions, cfd_->internal_comparator());
-}
-
-void Version::AddIterators(const ReadOptions& options,
+void Version::AddIterators(const ReadOptions& read_options,
                            const EnvOptions& soptions,
                            std::vector<Iterator*>* iters) {
   // Merge all level zero files together since they may overlap
   for (const FileMetaData* file : files_[0]) {
     iters->push_back(cfd_->table_cache()->NewIterator(
-        options, soptions, cfd_->internal_comparator(), *file));
+        read_options, soptions, cfd_->internal_comparator(), *file));
   }
 
   // For levels > 0, we can use a concatenating iterator that sequentially
@@ -355,7 +323,11 @@ void Version::AddIterators(const ReadOptions& options,
   // lazily.
   for (int level = 1; level < num_levels_; level++) {
     if (!files_[level].empty()) {
-      iters->push_back(NewConcatenatingIterator(options, soptions, level));
+      iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState(
+          cfd_->table_cache(), read_options, soptions,
+          cfd_->internal_comparator(), false /* for_compaction */,
+          cfd_->options()->prefix_extractor != nullptr),
+        new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level])));
     }
   }
 }
@@ -767,16 +739,11 @@ void Version::ComputeCompactionScore(
       // If we are slowing down writes, then we better compact that first
       if (numfiles >= cfd_->options()->level0_stop_writes_trigger) {
         score = 1000000;
-        // Log(options_->info_log, "XXX score l0 = 1000000000 max");
       } else if (numfiles >= cfd_->options()->level0_slowdown_writes_trigger) {
         score = 10000;
-        // Log(options_->info_log, "XXX score l0 = 1000000 medium");
       } else {
         score = static_cast<double>(numfiles) /
                 cfd_->options()->level0_file_num_compaction_trigger;
-        if (score >= 1) {
-          // Log(options_->info_log, "XXX score l0 = %d least", (int)score);
-        }
       }
     } else {
       // Compute the ratio of current size to size limit.
@@ -784,9 +751,6 @@ void Version::ComputeCompactionScore(
           TotalFileSize(files_[level]) - size_being_compacted[level];
       score = static_cast<double>(level_bytes) /
               cfd_->compaction_picker()->MaxBytesForLevel(level);
-      if (score > 1) {
-        // Log(options_->info_log, "XXX score l%d = %d ", level, (int)score);
-      }
       if (max_score < score) {
         max_score = score;
         max_score_level = level;
@@ -1823,8 +1787,9 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
     manifest_file_size_ = new_manifest_file_size;
     prev_log_number_ = edit->prev_log_number_;
   } else {
-    Log(options_->info_log, "Error in committing version %lu",
-        (unsigned long)v->GetVersionNumber());
+    Log(options_->info_log, "Error in committing version %lu to [%s]",
+        (unsigned long)v->GetVersionNumber(),
+        column_family_data->GetName().c_str());
     delete v;
     if (new_descriptor_log) {
       descriptor_log_.reset();
@@ -1916,7 +1881,7 @@ Status VersionSet::Recover(
     return Status::Corruption("CURRENT file corrupted");
   }
 
-  Log(options_->info_log, "Recovering from manifest file:%s\n",
+  Log(options_->info_log, "Recovering from manifest file: %s\n",
       manifest_filename.c_str());
 
   manifest_filename = dbname_ + "/" + manifest_filename;
@@ -2162,8 +2127,8 @@ Status VersionSet::Recover(
 
     for (auto cfd : *column_family_set_) {
       Log(options_->info_log,
-          "Column family \"%s\", log number is %" PRIu64 "\n",
-          cfd->GetName().c_str(), cfd->GetLogNumber());
+          "Column family [%s] (ID %u), log number is %" PRIu64 "\n",
+          cfd->GetName().c_str(), cfd->GetID(), cfd->GetLogNumber());
     }
   }
 
@@ -2663,10 +2628,11 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_list) {
 }
 
 Iterator* VersionSet::MakeInputIterator(Compaction* c) {
-  ReadOptions options;
-  options.verify_checksums =
-      c->column_family_data()->options()->verify_checksums_in_compaction;
-  options.fill_cache = false;
+  auto cfd = c->column_family_data();
+  ReadOptions read_options;
+  read_options.verify_checksums =
+    cfd->options()->verify_checksums_in_compaction;
+  read_options.fill_cache = false;
 
   // Level-0 files have to be merged together.  For other levels,
   // we will make a concatenating iterator per level.
@@ -2678,20 +2644,19 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
     if (!c->inputs(which)->empty()) {
       if (c->level() + which == 0) {
         for (const auto& file : *c->inputs(which)) {
-          list[num++] = c->column_family_data()->table_cache()->NewIterator(
-              options, storage_options_compactions_,
-              c->column_family_data()->internal_comparator(), *file, nullptr,
+          list[num++] = cfd->table_cache()->NewIterator(
+              read_options, storage_options_compactions_,
+              cfd->internal_comparator(), *file, nullptr,
               true /* for compaction */);
         }
       } else {
         // Create concatenating iterator for the files from this level
-        list[num++] = NewTwoLevelIterator(
-            new Version::LevelFileNumIterator(
-                c->column_family_data()->internal_comparator(),
-                c->inputs(which)),
-            &GetFileIterator, c->column_family_data()->table_cache(), options,
-            storage_options_, c->column_family_data()->internal_comparator(),
-            true /* for compaction */);
+        list[num++] = NewTwoLevelIterator(new Version::LevelFileIteratorState(
+              cfd->table_cache(), read_options, storage_options_,
+              cfd->internal_comparator(), true /* for_compaction */,
+              false /* prefix enabled */),
+            new Version::LevelFileNumIterator(cfd->internal_comparator(),
+                                              c->inputs(which)));
       }
     }
   }
@@ -2708,7 +2673,9 @@ bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) {
 #ifndef NDEBUG
   Version* version = c->column_family_data()->current();
   if (c->input_version() != version) {
-    Log(options_->info_log, "VerifyCompactionFileConsistency version mismatch");
+    Log(options_->info_log,
+        "[%s] VerifyCompactionFileConsistency version mismatch",
+        c->column_family_data()->GetName().c_str());
   }
 
   // verify files in level
diff --git a/db/version_set.h b/db/version_set.h
index 8076e6bc6..c8297f8ec 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -219,11 +219,10 @@ class Version {
   friend class UniversalCompactionPicker;
 
   class LevelFileNumIterator;
-  Iterator* NewConcatenatingIterator(const ReadOptions&,
-                                     const EnvOptions& soptions,
-                                     int level) const;
-  bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions,
-                      const Slice& internal_prefix, Iterator* level_iter) const;
+  struct LevelFileIteratorState;
+
+  bool PrefixMayMatch(const ReadOptions& options, Iterator* level_iter,
+                      const Slice& internal_prefix) const;
 
   // Sort all files for this version based on their file size and
   // record results in files_by_size_. The largest files are listed first.
diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc
index c2f412c59..febd35c05 100644
--- a/db/write_batch_test.cc
+++ b/db/write_batch_test.cc
@@ -31,7 +31,7 @@ static std::string PrintContents(WriteBatch* b) {
   ColumnFamilyMemTablesDefault cf_mems_default(mem, &options);
   Status s = WriteBatchInternal::InsertInto(b, &cf_mems_default);
   int count = 0;
-  Iterator* iter = mem->NewIterator();
+  Iterator* iter = mem->NewIterator(ReadOptions());
   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
     ParsedInternalKey ikey;
     memset((void *)&ikey, 0, sizeof(ikey));
@@ -283,7 +283,7 @@ TEST(WriteBatchTest, PutGatherSlices) {
 namespace {
 class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl {
  public:
-  ColumnFamilyHandleImplDummy(int id)
+  explicit ColumnFamilyHandleImplDummy(int id)
       : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {}
   uint32_t GetID() const override { return id_; }
 
diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h
index 7d4a374d9..7f2c082d0 100644
--- a/include/rocksdb/c.h
+++ b/include/rocksdb/c.h
@@ -463,13 +463,9 @@ extern void rocksdb_readoptions_set_verify_checksums(
     unsigned char);
 extern void rocksdb_readoptions_set_fill_cache(
     rocksdb_readoptions_t*, unsigned char);
-extern void rocksdb_readoptions_set_prefix_seek(
-    rocksdb_readoptions_t*, unsigned char);
 extern void rocksdb_readoptions_set_snapshot(
     rocksdb_readoptions_t*,
     const rocksdb_snapshot_t*);
-extern void rocksdb_readoptions_set_prefix(
-    rocksdb_readoptions_t*, const char* key, size_t keylen);
 extern void rocksdb_readoptions_set_read_tier(
     rocksdb_readoptions_t*, int);
 extern void rocksdb_readoptions_set_tailing(
diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h
index 0d251a9a6..d23f41b62 100644
--- a/include/rocksdb/memtablerep.h
+++ b/include/rocksdb/memtablerep.h
@@ -148,13 +148,6 @@ class MemTableRep {
   // GetIterator().
   virtual Iterator* GetIterator(const Slice& user_key) { return GetIterator(); }
 
-  // Return an iterator over at least the keys with the specified prefix. The
-  // iterator may also allow access to other keys, but doesn't have to. Default:
-  // GetIterator().
-  virtual Iterator* GetPrefixIterator(const Slice& prefix) {
-    return GetIterator();
-  }
-
   // Return an iterator that has a special Seek semantics. The result of
   // a Seek might only include keys with the same prefix as the target key.
   virtual Iterator* GetDynamicPrefixIterator() { return GetIterator(); }
diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h
index 7f1bf39a9..e95fb557e 100644
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -817,7 +817,10 @@ struct ReadOptions {
 
   // If this option is set and memtable implementation allows, Seek
   // might only return keys with the same prefix as the seek-key
-  bool prefix_seek;
+  //
+  // ! DEPRECATED: prefix_seek is on by default when prefix_extractor
+  // is configured
+  // bool prefix_seek;
 
   // If "snapshot" is non-nullptr, read as of the supplied snapshot
   // (which must belong to the DB that is being read and which must
@@ -837,7 +840,9 @@ struct ReadOptions {
   // prefix, and SeekToLast() is not supported.  prefix filter with this
   // option will sometimes reduce the number of read IOPs.
   // Default: nullptr
-  const Slice* prefix;
+  //
+  // ! DEPRECATED
+  // const Slice* prefix;
 
   // Specify if this read request should process data that ALREADY
   // resides on a particular cache. If the required data is not
@@ -856,17 +861,13 @@ struct ReadOptions {
   ReadOptions()
       : verify_checksums(true),
         fill_cache(true),
-        prefix_seek(false),
         snapshot(nullptr),
-        prefix(nullptr),
         read_tier(kReadAllTier),
         tailing(false) {}
   ReadOptions(bool cksum, bool cache)
       : verify_checksums(cksum),
         fill_cache(cache),
-        prefix_seek(false),
         snapshot(nullptr),
-        prefix(nullptr),
         read_tier(kReadAllTier),
         tailing(false) {}
 };
diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h
index b50007a32..53bf18e87 100644
--- a/include/rocksdb/table.h
+++ b/include/rocksdb/table.h
@@ -62,9 +62,7 @@ struct BlockBasedTableOptions {
     kBinarySearch,
 
     // The hash index, if enabled, will do the hash lookup when
-    // `ReadOption.prefix_seek == true`. User should also specify
-    // `Options.prefix_extractor` to allow the index block to correctly
-    // extract the prefix of the given key and perform hash table lookup.
+    // `Options.prefix_extractor` is provided.
     kHashSearch,
   };
 
diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h
index 55b83f441..aa8b8a0b8 100644
--- a/include/rocksdb/table_properties.h
+++ b/include/rocksdb/table_properties.h
@@ -23,7 +23,7 @@ namespace rocksdb {
 //      ++pos) {
 //   ...
 // }
-typedef std::map<std::string, std::string> UserCollectedProperties;
+typedef std::map<const std::string, std::string> UserCollectedProperties;
 
 // TableProperties contains a bunch of read-only properties of its associated
 // table.
diff --git a/include/utilities/backupable_db.h b/include/utilities/backupable_db.h
index 80f82154d..7c34e08e1 100644
--- a/include/utilities/backupable_db.h
+++ b/include/utilities/backupable_db.h
@@ -117,6 +117,29 @@ struct BackupInfo {
       : backup_id(_backup_id), timestamp(_timestamp), size(_size) {}
 };
 
+class BackupEngineReadOnly {
+ public:
+  virtual ~BackupEngineReadOnly() {}
+
+  static BackupEngineReadOnly* NewReadOnlyBackupEngine(
+      Env* db_env, const BackupableDBOptions& options);
+
+  // You can GetBackupInfo safely, even with other BackupEngine performing
+  // backups on the same directory
+  virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
+
+  // Restoring DB from backup is NOT safe when there is another BackupEngine
+  // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
+  // responsibility to synchronize the operation, i.e. don't delete the backup
+  // when you're restoring from it
+  virtual Status RestoreDBFromBackup(
+      BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+  virtual Status RestoreDBFromLatestBackup(
+      const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+};
+
 // Please see the documentation in BackupableDB and RestoreBackupableDB
 class BackupEngine {
  public:
diff --git a/java/RocksDBSample.java b/java/RocksDBSample.java
index f8c1c6038..2e27e9377 100644
--- a/java/RocksDBSample.java
+++ b/java/RocksDBSample.java
@@ -248,6 +248,5 @@ public class RocksDBSample {
     // be sure to dispose c++ pointers
     options.dispose();
     readOptions.dispose();
-    filter.dispose();
   }
 }
diff --git a/java/org/rocksdb/Filter.java b/java/org/rocksdb/Filter.java
index 0de392ac6..d16dedc69 100644
--- a/java/org/rocksdb/Filter.java
+++ b/java/org/rocksdb/Filter.java
@@ -20,7 +20,7 @@ public abstract class Filter {
   /**
    * Deletes underlying C++ filter pointer.
    */
-  public synchronized void dispose() {
+  protected synchronized void dispose() {
     if(nativeHandle_ != 0) {
       dispose0(nativeHandle_);
     }
diff --git a/java/org/rocksdb/Options.java b/java/org/rocksdb/Options.java
index ff289b776..cfb3c4a3f 100644
--- a/java/org/rocksdb/Options.java
+++ b/java/org/rocksdb/Options.java
@@ -146,15 +146,21 @@ public class Options {
 
   /**
    * Use the specified filter policy to reduce disk reads.
+   *
+   * Note that the caller should not dispose the input filter as
+   * Options.dispose() will dispose this filter.
+   *
    * @param Filter policy java instance.
    * @return the instance of the current Options.
    * @see RocksDB.open()
    */
   public Options setFilter(Filter filter) {
     assert(isInitialized());
-    setFilter0(nativeHandle_, filter);
+    setFilterHandle(nativeHandle_, filter.nativeHandle_);
+    filter_ = filter;
     return this;
   }
+  private native void setFilterHandle(long optHandle, long filterHandle);
 
   /*
    * Disable compaction triggered by seek.
@@ -786,7 +792,8 @@ public class Options {
       long handle, int limit);
 
   /**
-   * The following two fields affect how archived logs will be deleted.
+   * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
+   * will be deleted.
    * 1. If both set to 0, logs will be deleted asap and will not get into
    *    the archive.
    * 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
@@ -800,6 +807,7 @@ public class Options {
    *    checks will be performed with ttl being first.
    *
    * @return the wal-ttl seconds
+   * @see walSizeLimitMB()
    */
   public long walTtlSeconds() {
     assert(isInitialized());
@@ -808,7 +816,8 @@ public class Options {
   private native long walTtlSeconds(long handle);
 
   /**
-   * The following two fields affect how archived logs will be deleted.
+   * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
+   * will be deleted.
    * 1. If both set to 0, logs will be deleted asap and will not get into
    *    the archive.
    * 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
@@ -823,13 +832,64 @@ public class Options {
    *
    * @param walTtlSeconds the ttl seconds
    * @return the reference to the current option.
+   * @see setWalSizeLimitMB()
+   */
+  public Options setWalTtlSeconds(long walTtlSeconds) {
+    assert(isInitialized());
+    setWalTtlSeconds(nativeHandle_, walTtlSeconds);
+    return this;
+  }
+  private native void setWalTtlSeconds(long handle, long walTtlSeconds);
+
+  /**
+   * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
+   * will be deleted.
+   * 1. If both set to 0, logs will be deleted asap and will not get into
+   *    the archive.
+   * 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
+   *    WAL files will be checked every 10 min and if total size is greater
+   *    then WAL_size_limit_MB, they will be deleted starting with the
+   *    earliest until size_limit is met. All empty files will be deleted.
+   * 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
+   *    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
+   *    are older than WAL_ttl_seconds will be deleted.
+   * 4. If both are not 0, WAL files will be checked every 10 min and both
+   *    checks will be performed with ttl being first.
+   *
+   * @return size limit in mega-bytes.
+   * @see walSizeLimitMB()
+   */
+  public long walSizeLimitMB() {
+    assert(isInitialized());
+    return walSizeLimitMB(nativeHandle_);
+  }
+  private native long walSizeLimitMB(long handle);
+
+  /**
+   * WalTtlSeconds() and walSizeLimitMB() affect how archived logs
+   * will be deleted.
+   * 1. If both set to 0, logs will be deleted asap and will not get into
+   *    the archive.
+   * 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
+   *    WAL files will be checked every 10 min and if total size is greater
+   *    then WAL_size_limit_MB, they will be deleted starting with the
+   *    earliest until size_limit is met. All empty files will be deleted.
+   * 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then
+   *    WAL files will be checked every WAL_ttl_secondsi / 2 and those that
+   *    are older than WAL_ttl_seconds will be deleted.
+   * 4. If both are not 0, WAL files will be checked every 10 min and both
+   *    checks will be performed with ttl being first.
+   *
+   * @param sizeLimitMB size limit in mega-bytes.
+   * @return the reference to the current option.
+   * @see setWalSizeLimitMB()
    */
-  public Options setWALTtlSeconds(long walTtlSeconds) {
+  public Options setWalSizeLimitMB(long sizeLimitMB) {
     assert(isInitialized());
-    setWALTtlSeconds(nativeHandle_, walTtlSeconds);
+    setWalSizeLimitMB(nativeHandle_, sizeLimitMB);
     return this;
   }
-  private native void setWALTtlSeconds(long handle, long walTtlSeconds);
+  private native void setWalSizeLimitMB(long handle, long sizeLimitMB);
 
   /**
    * Number of bytes to preallocate (via fallocate) the manifest
@@ -1199,6 +1259,1054 @@ public class Options {
     return this;
   }
 
+///////////////////////////////////////////////////////////////////////
+  /**
+   * Number of keys between restart points for delta encoding of keys.
+   * This parameter can be changed dynamically.  Most clients should
+   * leave this parameter alone.
+   * Default: 16
+   *
+   * @return the number of keys between restart points.
+   */
+  public int blockRestartInterval() {
+    return blockRestartInterval(nativeHandle_);
+  }
+  private native int blockRestartInterval(long handle);
+
+  /**
+   * Number of keys between restart points for delta encoding of keys.
+   * This parameter can be changed dynamically.  Most clients should
+   * leave this parameter alone.
+   * Default: 16
+   *
+   * @param blockRestartInterval the number of keys between restart points.
+   * @return the reference to the current option.
+   */
+  public Options setBlockRestartInterval(int blockRestartInterval) {
+    setBlockRestartInterval(nativeHandle_, blockRestartInterval);
+    return this;
+  }
+  private native void setBlockRestartInterval(
+      long handle, int blockRestartInterval);
+
+  /**
+   * If true, place whole keys in the filter (not just prefixes).
+   * This must generally be true for gets to be efficient.
+   * Default: true
+   *
+   * @return if true, then whole-key-filtering is on.
+   */
+  public boolean wholeKeyFiltering() {
+    return wholeKeyFiltering(nativeHandle_);
+  }
+  private native boolean wholeKeyFiltering(long handle);
+
+  /**
+   * If true, place whole keys in the filter (not just prefixes).
+   * This must generally be true for gets to be efficient.
+   * Default: true
+   *
+   * @param wholeKeyFiltering if true, then whole-key-filtering is on.
+   * @return the reference to the current option.
+   */
+  public Options setWholeKeyFiltering(boolean wholeKeyFiltering) {
+    setWholeKeyFiltering(nativeHandle_, wholeKeyFiltering);
+    return this;
+  }
+  private native void setWholeKeyFiltering(
+      long handle, boolean wholeKeyFiltering);
+
+  /**
+   * If level-styled compaction is used, then this number determines
+   * the total number of levels.
+   *
+   * @return the number of levels.
+   */
+  public int numLevels() {
+    return numLevels(nativeHandle_);
+  }
+  private native int numLevels(long handle);
+
+  /**
+   * Set the number of levels for this database
+   * If level-styled compaction is used, then this number determines
+   * the total number of levels.
+   *
+   * @param numLevels the number of levels.
+   * @return the reference to the current option.
+   */
+  public Options setNumLevels(int numLevels) {
+    setNumLevels(nativeHandle_, numLevels);
+    return this;
+  }
+  private native void setNumLevels(
+      long handle, int numLevels);
+
+  /**
+   * The number of files in leve 0 to trigger compaction from level-0 to
+   * level-1.  A value < 0 means that level-0 compaction will not be
+   * triggered by number of files at all.
+   * Default: 4
+   *
+   * @return the number of files in level 0 to trigger compaction.
+   */
+  public int levelZeroFileNumCompactionTrigger() {
+    return levelZeroFileNumCompactionTrigger(nativeHandle_);
+  }
+  private native int levelZeroFileNumCompactionTrigger(long handle);
+
+  /**
+   * Number of files to trigger level-0 compaction. A value <0 means that
+   * level-0 compaction will not be triggered by number of files at all.
+   * Default: 4
+   *
+   * @param numFiles the number of files in level-0 to trigger compaction.
+   * @return the reference to the current option.
+   */
+  public Options setLevelZeroFileNumCompactionTrigger(
+      int numFiles) {
+    setLevelZeroFileNumCompactionTrigger(
+        nativeHandle_, numFiles);
+    return this;
+  }
+  private native void setLevelZeroFileNumCompactionTrigger(
+      long handle, int numFiles);
+
+  /**
+   * Soft limit on the number of level-0 files. We start slowing down writes
+   * at this point. A value < 0 means that no writing slow down will be
+   * triggered by number of files in level-0.
+   *
+   * @return the soft limit on the number of level-0 files.
+   */
+  public int levelZeroSlowdownWritesTrigger() {
+    return levelZeroSlowdownWritesTrigger(nativeHandle_);
+  }
+  private native int levelZeroSlowdownWritesTrigger(long handle);
+
+  /**
+   * Soft limit on number of level-0 files. We start slowing down writes at this
+   * point. A value <0 means that no writing slow down will be triggered by
+   * number of files in level-0.
+   *
+   * @param numFiles soft limit on number of level-0 files.
+   * @return the reference to the current option.
+   */
+  public Options setLevelZeroSlowdownWritesTrigger(
+      int numFiles) {
+    setLevelZeroSlowdownWritesTrigger(nativeHandle_, numFiles);
+    return this;
+  }
+  private native void setLevelZeroSlowdownWritesTrigger(
+      long handle, int numFiles);
+
+  /**
+   * Maximum number of level-0 files.  We stop writes at this point.
+   *
+   * @return the hard limit of the number of level-0 file.
+   */
+  public int levelZeroStopWritesTrigger() {
+    return levelZeroStopWritesTrigger(nativeHandle_);
+  }
+  private native int levelZeroStopWritesTrigger(long handle);
+
+  /**
+   * Maximum number of level-0 files.  We stop writes at this point.
+   *
+   * @param numFiles the hard limit of the number of level-0 files.
+   * @return the reference to the current option.
+   */
+  public Options setLevelZeroStopWritesTrigger(int numFiles) {
+    setLevelZeroStopWritesTrigger(nativeHandle_, numFiles);
+    return this;
+  }
+  private native void setLevelZeroStopWritesTrigger(
+      long handle, int numFiles);
+
+  /**
+   * The highest level to which a new compacted memtable is pushed if it
+   * does not create overlap.  We try to push to level 2 to avoid the
+   * relatively expensive level 0=>1 compactions and to avoid some
+   * expensive manifest file operations.  We do not push all the way to
+   * the largest level since that can generate a lot of wasted disk
+   * space if the same key space is being repeatedly overwritten.
+   *
+   * @return the highest level where a new compacted memtable will be pushed.
+   */
+  public int maxMemCompactionLevel() {
+    return maxMemCompactionLevel(nativeHandle_);
+  }
+  private native int maxMemCompactionLevel(long handle);
+
+  /**
+   * The highest level to which a new compacted memtable is pushed if it
+   * does not create overlap.  We try to push to level 2 to avoid the
+   * relatively expensive level 0=>1 compactions and to avoid some
+   * expensive manifest file operations.  We do not push all the way to
+   * the largest level since that can generate a lot of wasted disk
+   * space if the same key space is being repeatedly overwritten.
+   *
+   * @param maxMemCompactionLevel the highest level to which a new compacted
+   *     mem-table will be pushed.
+   * @return the reference to the current option.
+   */
+  public Options setMaxMemCompactionLevel(int maxMemCompactionLevel) {
+    setMaxMemCompactionLevel(nativeHandle_, maxMemCompactionLevel);
+    return this;
+  }
+  private native void setMaxMemCompactionLevel(
+      long handle, int maxMemCompactionLevel);
+
+  /**
+   * The target file size for compaction.
+   * This targetFileSizeBase determines a level-1 file size.
+   * Target file size for level L can be calculated by
+   * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1))
+   * For example, if targetFileSizeBase is 2MB and
+   * target_file_size_multiplier is 10, then each file on level-1 will
+   * be 2MB, and each file on level 2 will be 20MB,
+   * and each file on level-3 will be 200MB.
+   * by default targetFileSizeBase is 2MB.
+   *
+   * @return the target size of a level-0 file.
+   *
+   * @see targetFileSizeMultiplier()
+   */
+  public int targetFileSizeBase() {
+    return targetFileSizeBase(nativeHandle_);
+  }
+  private native int targetFileSizeBase(long handle);
+
+  /**
+   * The target file size for compaction.
+   * This targetFileSizeBase determines a level-1 file size.
+   * Target file size for level L can be calculated by
+   * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1))
+   * For example, if targetFileSizeBase is 2MB and
+   * target_file_size_multiplier is 10, then each file on level-1 will
+   * be 2MB, and each file on level 2 will be 20MB,
+   * and each file on level-3 will be 200MB.
+   * by default targetFileSizeBase is 2MB.
+   *
+   * @param targetFileSizeBase the target size of a level-0 file.
+   * @return the reference to the current option.
+   *
+   * @see setTargetFileSizeMultiplier()
+   */
+  public Options setTargetFileSizeBase(int targetFileSizeBase) {
+    setTargetFileSizeBase(nativeHandle_, targetFileSizeBase);
+    return this;
+  }
+  private native void setTargetFileSizeBase(
+      long handle, int targetFileSizeBase);
+
+  /**
+   * targetFileSizeMultiplier defines the size ratio between a
+   * level-(L+1) file and level-L file.
+   * By default targetFileSizeMultiplier is 1, meaning
+   * files in different levels have the same target.
+   *
+   * @return the size ratio between a level-(L+1) file and level-L file.
+   */
+  public int targetFileSizeMultiplier() {
+    return targetFileSizeMultiplier(nativeHandle_);
+  }
+  private native int targetFileSizeMultiplier(long handle);
+
+  /**
+   * targetFileSizeMultiplier defines the size ratio between a
+   * level-L file and level-(L+1) file.
+   * By default target_file_size_multiplier is 1, meaning
+   * files in different levels have the same target.
+   *
+   * @param multiplier the size ratio between a level-(L+1) file
+   *     and level-L file.
+   * @return the reference to the current option.
+   */
+  public Options setTargetFileSizeMultiplier(int multiplier) {
+    setTargetFileSizeMultiplier(nativeHandle_, multiplier);
+    return this;
+  }
+  private native void setTargetFileSizeMultiplier(
+      long handle, int multiplier);
+
+  /**
+   * The upper-bound of the total size of level-1 files in bytes.
+   * Maximum number of bytes for level L can be calculated as
+   * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1))
+   * For example, if maxBytesForLevelBase is 20MB, and if
+   * max_bytes_for_level_multiplier is 10, total data size for level-1
+   * will be 20MB, total file size for level-2 will be 200MB,
+   * and total file size for level-3 will be 2GB.
+   * by default 'maxBytesForLevelBase' is 10MB.
+   *
+   * @return the upper-bound of the total size of leve-1 files in bytes.
+   * @see maxBytesForLevelMultiplier()
+   */
+  public long maxBytesForLevelBase() {
+    return maxBytesForLevelBase(nativeHandle_);
+  }
+  private native long maxBytesForLevelBase(long handle);
+
+  /**
+   * The upper-bound of the total size of level-1 files in bytes.
+   * Maximum number of bytes for level L can be calculated as
+   * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1))
+   * For example, if maxBytesForLevelBase is 20MB, and if
+   * max_bytes_for_level_multiplier is 10, total data size for level-1
+   * will be 20MB, total file size for level-2 will be 200MB,
+   * and total file size for level-3 will be 2GB.
+   * by default 'maxBytesForLevelBase' is 10MB.
+   *
+   * @return maxBytesForLevelBase the upper-bound of the total size of
+   *     leve-1 files in bytes.
+   * @return the reference to the current option.
+   * @see setMaxBytesForLevelMultiplier()
+   */
+  public Options setMaxBytesForLevelBase(long maxBytesForLevelBase) {
+    setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase);
+    return this;
+  }
+  private native void setMaxBytesForLevelBase(
+      long handle, long maxBytesForLevelBase);
+
+  /**
+   * The ratio between the total size of level-(L+1) files and the total
+   * size of level-L files for all L.
+   * DEFAULT: 10
+   *
+   * @return the ratio between the total size of level-(L+1) files and
+   *     the total size of level-L files for all L.
+   * @see maxBytesForLevelBase()
+   */
+  public int maxBytesForLevelMultiplier() {
+    return maxBytesForLevelMultiplier(nativeHandle_);
+  }
+  private native int maxBytesForLevelMultiplier(long handle);
+
+  /**
+   * The ratio between the total size of level-(L+1) files and the total
+   * size of level-L files for all L.
+   * DEFAULT: 10
+   *
+   * @param multiplier the ratio between the total size of level-(L+1)
+   *     files and the total size of level-L files for all L.
+   * @return the reference to the current option.
+   * @see setMaxBytesForLevelBase()
+   */
+  public Options setMaxBytesForLevelMultiplier(int multiplier) {
+    setMaxBytesForLevelMultiplier(nativeHandle_, multiplier);
+    return this;
+  }
+  private native void setMaxBytesForLevelMultiplier(
+      long handle, int multiplier);
+
+  /**
+   * Maximum number of bytes in all compacted files.  We avoid expanding
+   * the lower level file set of a compaction if it would make the
+   * total compaction cover more than
+   * (expanded_compaction_factor * targetFileSizeLevel()) many bytes.
+   *
+   * @return the maximum number of bytes in all compacted files.
+   * @see sourceCompactionFactor()
+   */
+  public int expandedCompactionFactor() {
+    return expandedCompactionFactor(nativeHandle_);
+  }
+  private native int expandedCompactionFactor(long handle);
+
+  /**
+   * Maximum number of bytes in all compacted files.  We avoid expanding
+   * the lower level file set of a compaction if it would make the
+   * total compaction cover more than
+   * (expanded_compaction_factor * targetFileSizeLevel()) many bytes.
+   *
+   * @param expandedCompactionFactor the maximum number of bytes in all
+   *     compacted files.
+   * @return the reference to the current option.
+   * @see setSourceCompactionFactor()
+   */
+  public Options setExpandedCompactionFactor(int expandedCompactionFactor) {
+    setExpandedCompactionFactor(nativeHandle_, expandedCompactionFactor);
+    return this;
+  }
+  private native void setExpandedCompactionFactor(
+      long handle, int expandedCompactionFactor);
+
+  /**
+   * Maximum number of bytes in all source files to be compacted in a
+   * single compaction run. We avoid picking too many files in the
+   * source level so that we do not exceed the total source bytes
+   * for compaction to exceed
+   * (source_compaction_factor * targetFileSizeLevel()) many bytes.
+   * Default:1, i.e. pick maxfilesize amount of data as the source of
+   * a compaction.
+   *
+   * @return the maximum number of bytes in all source files to be compactedo.
+   * @see expendedCompactionFactor()
+   */
+  public int sourceCompactionFactor() {
+    return sourceCompactionFactor(nativeHandle_);
+  }
+  private native int sourceCompactionFactor(long handle);
+
+  /**
+   * Maximum number of bytes in all source files to be compacted in a
+   * single compaction run. We avoid picking too many files in the
+   * source level so that we do not exceed the total source bytes
+   * for compaction to exceed
+   * (source_compaction_factor * targetFileSizeLevel()) many bytes.
+   * Default:1, i.e. pick maxfilesize amount of data as the source of
+   * a compaction.
+   *
+   * @param sourceCompactionFactor the maximum number of bytes in all
+   *     source files to be compacted in a single compaction run.
+   * @return the reference to the current option.
+   * @see setExpendedCompactionFactor()
+   */
+  public Options setSourceCompactionFactor(int sourceCompactionFactor) {
+    setSourceCompactionFactor(nativeHandle_, sourceCompactionFactor);
+    return this;
+  }
+  private native void setSourceCompactionFactor(
+      long handle, int sourceCompactionFactor);
+
+  /**
+   * Control maximum bytes of overlaps in grandparent (i.e., level+2) before we
+   * stop building a single file in a level->level+1 compaction.
+   *
+   * @return maximum bytes of overlaps in "grandparent" level.
+   */
+  public int maxGrandparentOverlapFactor() {
+    return maxGrandparentOverlapFactor(nativeHandle_);
+  }
+  private native int maxGrandparentOverlapFactor(long handle);
+
+  /**
+   * Control maximum bytes of overlaps in grandparent (i.e., level+2) before we
+   * stop building a single file in a level->level+1 compaction.
+   *
+   * @param maxGrandparentOverlapFactor maximum bytes of overlaps in
+   *     "grandparent" level.
+   * @return the reference to the current option.
+   */
+  public Options setMaxGrandparentOverlapFactor(
+      int maxGrandparentOverlapFactor) {
+    setMaxGrandparentOverlapFactor(nativeHandle_, maxGrandparentOverlapFactor);
+    return this;
+  }
+  private native void setMaxGrandparentOverlapFactor(
+      long handle, int maxGrandparentOverlapFactor);
+
+  /**
+   * Puts are delayed 0-1 ms when any level has a compaction score that exceeds
+   * soft_rate_limit. This is ignored when == 0.0.
+   * CONSTRAINT: soft_rate_limit <= hard_rate_limit. If this constraint does not
+   * hold, RocksDB will set soft_rate_limit = hard_rate_limit
+   * Default: 0 (disabled)
+   *
+   * @return soft-rate-limit for put delay.
+   */
+  public double softRateLimit() {
+    return softRateLimit(nativeHandle_);
+  }
+  private native double softRateLimit(long handle);
+
+  /**
+   * Puts are delayed 0-1 ms when any level has a compaction score that exceeds
+   * soft_rate_limit. This is ignored when == 0.0.
+   * CONSTRAINT: soft_rate_limit <= hard_rate_limit. If this constraint does not
+   * hold, RocksDB will set soft_rate_limit = hard_rate_limit
+   * Default: 0 (disabled)
+   *
+   * @param softRateLimit the soft-rate-limit of a compaction score
+   *     for put delay.
+   * @return the reference to the current option.
+   */
+  public Options setSoftRateLimit(double softRateLimit) {
+    setSoftRateLimit(nativeHandle_, softRateLimit);
+    return this;
+  }
+  private native void setSoftRateLimit(
+      long handle, double softRateLimit);
+
+  /**
+   * Puts are delayed 1ms at a time when any level has a compaction score that
+   * exceeds hard_rate_limit. This is ignored when <= 1.0.
+   * Default: 0 (disabled)
+   *
+   * @return the hard-rate-limit of a compaction score for put delay.
+   */
+  public double hardRateLimit() {
+    return hardRateLimit(nativeHandle_);
+  }
+  private native double hardRateLimit(long handle);
+
+  /**
+   * Puts are delayed 1ms at a time when any level has a compaction score that
+   * exceeds hard_rate_limit. This is ignored when <= 1.0.
+   * Default: 0 (disabled)
+   *
+   * @param hardRateLimit the hard-rate-limit of a compaction score for put
+   *     delay.
+   * @return the reference to the current option.
+   */
+  public Options setHardRateLimit(double hardRateLimit) {
+    setHardRateLimit(nativeHandle_, hardRateLimit);
+    return this;
+  }
+  private native void setHardRateLimit(
+      long handle, double hardRateLimit);
+
+  /**
+   * The maximum time interval a put will be stalled when hard_rate_limit
+   * is enforced.  If 0, then there is no limit.
+   * Default: 1000
+   *
+   * @return the maximum time interval a put will be stalled when
+   *     hard_rate_limit is enforced.
+   */
+  public int rateLimitDelayMaxMilliseconds() {
+    return rateLimitDelayMaxMilliseconds(nativeHandle_);
+  }
+  private native int rateLimitDelayMaxMilliseconds(long handle);
+
+  /**
+   * The maximum time interval a put will be stalled when hard_rate_limit
+   * is enforced. If 0, then there is no limit.
+   * Default: 1000
+   *
+   * @param rateLimitDelayMaxMilliseconds the maximum time interval a put
+   *     will be stalled.
+   * @return the reference to the current option.
+   */
+  public Options setRateLimitDelayMaxMilliseconds(
+      int rateLimitDelayMaxMilliseconds) {
+    setRateLimitDelayMaxMilliseconds(
+        nativeHandle_, rateLimitDelayMaxMilliseconds);
+    return this;
+  }
+  private native void setRateLimitDelayMaxMilliseconds(
+      long handle, int rateLimitDelayMaxMilliseconds);
+
+  /**
+   * Disable block cache. If this is set to true,
+   * then no block cache should be used, and the block_cache should
+   * point to a nullptr object.
+   * Default: false
+   *
+   * @return true if block cache is disabled.
+   */
+  public boolean noBlockCache() {
+    return noBlockCache(nativeHandle_);
+  }
+  private native boolean noBlockCache(long handle);
+
+  /**
+   * Disable block cache. If this is set to true,
+   * then no block cache should be used, and the block_cache should
+   * point to a nullptr object.
+   * Default: false
+   *
+   * @param noBlockCache true if block-cache is disabled.
+   * @return the reference to the current option.
+   */
+  public Options setNoBlockCache(boolean noBlockCache) {
+    setNoBlockCache(nativeHandle_, noBlockCache);
+    return this;
+  }
+  private native void setNoBlockCache(
+      long handle, boolean noBlockCache);
+
+  /**
+   * The size of one block in arena memory allocation.
+   * If <= 0, a proper value is automatically calculated (usually 1/10 of
+   * writer_buffer_size).
+   *
+   * There are two additonal restriction of the The specified size:
+   * (1) size should be in the range of [4096, 2 << 30] and
+   * (2) be the multiple of the CPU word (which helps with the memory
+   * alignment).
+   *
+   * We'll automatically check and adjust the size number to make sure it
+   * conforms to the restrictions.
+   * Default: 0
+   *
+   * @return the size of an arena block
+   */
+  public long arenaBlockSize() {
+    return arenaBlockSize(nativeHandle_);
+  }
+  private native long arenaBlockSize(long handle);
+
+  /**
+   * The size of one block in arena memory allocation.
+   * If <= 0, a proper value is automatically calculated (usually 1/10 of
+   * writer_buffer_size).
+   *
+   * There are two additonal restriction of the The specified size:
+   * (1) size should be in the range of [4096, 2 << 30] and
+   * (2) be the multiple of the CPU word (which helps with the memory
+   * alignment).
+   *
+   * We'll automatically check and adjust the size number to make sure it
+   * conforms to the restrictions.
+   * Default: 0
+   *
+   * @param arenaBlockSize the size of an arena block
+   * @return the reference to the current option.
+   */
+  public Options setArenaBlockSize(long arenaBlockSize) {
+    setArenaBlockSize(nativeHandle_, arenaBlockSize);
+    return this;
+  }
+  private native void setArenaBlockSize(
+      long handle, long arenaBlockSize);
+
+  /**
+   * Disable automatic compactions. Manual compactions can still
+   * be issued on this column family
+   *
+   * @return true if auto-compactions are disabled.
+   */
+  public boolean disableAutoCompactions() {
+    return disableAutoCompactions(nativeHandle_);
+  }
+  private native boolean disableAutoCompactions(long handle);
+
+  /**
+   * Disable automatic compactions. Manual compactions can still
+   * be issued on this column family
+   *
+   * @param disableAutoCompactions true if auto-compactions are disabled.
+   * @return the reference to the current option.
+   */
+  public Options setDisableAutoCompactions(boolean disableAutoCompactions) {
+    setDisableAutoCompactions(nativeHandle_, disableAutoCompactions);
+    return this;
+  }
+  private native void setDisableAutoCompactions(
+      long handle, boolean disableAutoCompactions);
+
+  /**
+   * Purge duplicate/deleted keys when a memtable is flushed to storage.
+   * Default: true
+   *
+   * @return true if purging keys is disabled.
+   */
+  public boolean purgeRedundantKvsWhileFlush() {
+    return purgeRedundantKvsWhileFlush(nativeHandle_);
+  }
+  private native boolean purgeRedundantKvsWhileFlush(long handle);
+
+  /**
+   * Purge duplicate/deleted keys when a memtable is flushed to storage.
+   * Default: true
+   *
+   * @param purgeRedundantKvsWhileFlush true if purging keys is disabled.
+   * @return the reference to the current option.
+   */
+  public Options setPurgeRedundantKvsWhileFlush(
+      boolean purgeRedundantKvsWhileFlush) {
+    setPurgeRedundantKvsWhileFlush(
+        nativeHandle_, purgeRedundantKvsWhileFlush);
+    return this;
+  }
+  private native void setPurgeRedundantKvsWhileFlush(
+      long handle, boolean purgeRedundantKvsWhileFlush);
+
+  /**
+   * This is used to close a block before it reaches the configured
+   * 'block_size'. If the percentage of free space in the current block is less
+   * than this specified number and adding a new record to the block will
+   * exceed the configured block size, then this block will be closed and the
+   * new record will be written to the next block.
+   * Default is 10.
+   *
+   * @return the target block size
+   */
+  public int blockSizeDeviation() {
+    return blockSizeDeviation(nativeHandle_);
+  }
+  private native int blockSizeDeviation(long handle);
+
+  /**
+   * This is used to close a block before it reaches the configured
+   * 'block_size'. If the percentage of free space in the current block is less
+   * than this specified number and adding a new record to the block will
+   * exceed the configured block size, then this block will be closed and the
+   * new record will be written to the next block.
+   * Default is 10.
+   *
+   * @param blockSizeDeviation the target block size
+   * @return the reference to the current option.
+   */
+  public Options setBlockSizeDeviation(int blockSizeDeviation) {
+    setBlockSizeDeviation(nativeHandle_, blockSizeDeviation);
+    return this;
+  }
+  private native void setBlockSizeDeviation(
+      long handle, int blockSizeDeviation);
+
+  /**
+   * If true, compaction will verify checksum on every read that happens
+   * as part of compaction
+   * Default: true
+   *
+   * @return true if compaction verifies checksum on every read.
+   */
+  public boolean verifyChecksumsInCompaction() {
+    return verifyChecksumsInCompaction(nativeHandle_);
+  }
+  private native boolean verifyChecksumsInCompaction(long handle);
+
+  /**
+   * If true, compaction will verify checksum on every read that happens
+   * as part of compaction
+   * Default: true
+   *
+   * @param verifyChecksumsInCompaction true if compaction verifies
+   *     checksum on every read.
+   * @return the reference to the current option.
+   */
+  public Options setVerifyChecksumsInCompaction(
+      boolean verifyChecksumsInCompaction) {
+    setVerifyChecksumsInCompaction(
+        nativeHandle_, verifyChecksumsInCompaction);
+    return this;
+  }
+  private native void setVerifyChecksumsInCompaction(
+      long handle, boolean verifyChecksumsInCompaction);
+
+  /**
+   * Use KeyMayExist API to filter deletes when this is true.
+   * If KeyMayExist returns false, i.e. the key definitely does not exist, then
+   * the delete is a noop. KeyMayExist only incurs in-memory look up.
+   * This optimization avoids writing the delete to storage when appropriate.
+   * Default: false
+   *
+   * @return true if filter-deletes behavior is on.
+   */
+  public boolean filterDeletes() {
+    return filterDeletes(nativeHandle_);
+  }
+  private native boolean filterDeletes(long handle);
+
+  /**
+   * Use KeyMayExist API to filter deletes when this is true.
+   * If KeyMayExist returns false, i.e. the key definitely does not exist, then
+   * the delete is a noop. KeyMayExist only incurs in-memory look up.
+   * This optimization avoids writing the delete to storage when appropriate.
+   * Default: false
+   *
+   * @param filterDeletes true if filter-deletes behavior is on.
+   * @return the reference to the current option.
+   */
+  public Options setFilterDeletes(boolean filterDeletes) {
+    setFilterDeletes(nativeHandle_, filterDeletes);
+    return this;
+  }
+  private native void setFilterDeletes(
+      long handle, boolean filterDeletes);
+
+  /**
+   * An iteration->Next() sequentially skips over keys with the same
+   * user-key unless this option is set. This number specifies the number
+   * of keys (with the same userkey) that will be sequentially
+   * skipped before a reseek is issued.
+   * Default: 8
+   *
+   * @return the number of keys could be skipped in a iteration.
+   */
+  public long maxSequentialSkipInIterations() {
+    return maxSequentialSkipInIterations(nativeHandle_);
+  }
+  private native long maxSequentialSkipInIterations(long handle);
+
+  /**
+   * An iteration->Next() sequentially skips over keys with the same
+   * user-key unless this option is set. This number specifies the number
+   * of keys (with the same userkey) that will be sequentially
+   * skipped before a reseek is issued.
+   * Default: 8
+   *
+   * @param maxSequentialSkipInIterations the number of keys could
+   *     be skipped in a iteration.
+   * @return the reference to the current option.
+   */
+  public Options setMaxSequentialSkipInIterations(long maxSequentialSkipInIterations) {
+    setMaxSequentialSkipInIterations(nativeHandle_, maxSequentialSkipInIterations);
+    return this;
+  }
+  private native void setMaxSequentialSkipInIterations(
+      long handle, long maxSequentialSkipInIterations);
+
+  /**
+   * Allows thread-safe inplace updates.
+   * If inplace_callback function is not set,
+   *   Put(key, new_value) will update inplace the existing_value iff
+   *   * key exists in current memtable
+   *   * new sizeof(new_value) <= sizeof(existing_value)
+   *   * existing_value for that key is a put i.e. kTypeValue
+   * If inplace_callback function is set, check doc for inplace_callback.
+   * Default: false.
+   *
+   * @return true if thread-safe inplace updates are allowed.
+   */
+  public boolean inplaceUpdateSupport() {
+    return inplaceUpdateSupport(nativeHandle_);
+  }
+  private native boolean inplaceUpdateSupport(long handle);
+
+  /**
+   * Allows thread-safe inplace updates.
+   * If inplace_callback function is not set,
+   *   Put(key, new_value) will update inplace the existing_value iff
+   *   * key exists in current memtable
+   *   * new sizeof(new_value) <= sizeof(existing_value)
+   *   * existing_value for that key is a put i.e. kTypeValue
+   * If inplace_callback function is set, check doc for inplace_callback.
+   * Default: false.
+   *
+   * @param inplaceUpdateSupport true if thread-safe inplace updates
+   *     are allowed.
+   * @return the reference to the current option.
+   */
+  public Options setInplaceUpdateSupport(boolean inplaceUpdateSupport) {
+    setInplaceUpdateSupport(nativeHandle_, inplaceUpdateSupport);
+    return this;
+  }
+  private native void setInplaceUpdateSupport(
+      long handle, boolean inplaceUpdateSupport);
+
+  /**
+   * Number of locks used for inplace update
+   * Default: 10000, if inplace_update_support = true, else 0.
+   *
+   * @return the number of locks used for inplace update.
+   */
+  public long inplaceUpdateNumLocks() {
+    return inplaceUpdateNumLocks(nativeHandle_);
+  }
+  private native long inplaceUpdateNumLocks(long handle);
+
+  /**
+   * Number of locks used for inplace update
+   * Default: 10000, if inplace_update_support = true, else 0.
+   *
+   * @param inplaceUpdateNumLocks the number of locks used for
+   *     inplace updates.
+   * @return the reference to the current option.
+   */
+  public Options setInplaceUpdateNumLocks(long inplaceUpdateNumLocks) {
+    setInplaceUpdateNumLocks(nativeHandle_, inplaceUpdateNumLocks);
+    return this;
+  }
+  private native void setInplaceUpdateNumLocks(
+      long handle, long inplaceUpdateNumLocks);
+
+  /**
+   * Returns the number of bits used in the prefix bloom filter.
+   *
+   * This value will be used only when a prefix-extractor is specified.
+   *
+   * @return the number of bloom-bits.
+   * @see useFixedLengthPrefixExtractor()
+   */
+  public int memtablePrefixBloomBits() {
+    return memtablePrefixBloomBits(nativeHandle_);
+  }
+  private native int memtablePrefixBloomBits(long handle);
+
+  /**
+   * Sets the number of bits used in the prefix bloom filter.
+   *
+   * This value will be used only when a prefix-extractor is specified.
+   *
+   * @param memtablePrefixBloomBits the number of bits used in the
+   *     prefix bloom filter.
+   * @return the reference to the current option.
+   */
+  public Options setMemtablePrefixBloomBits(int memtablePrefixBloomBits) {
+    setMemtablePrefixBloomBits(nativeHandle_, memtablePrefixBloomBits);
+    return this;
+  }
+  private native void setMemtablePrefixBloomBits(
+      long handle, int memtablePrefixBloomBits);
+
+  /**
+   * The number of hash probes per key used in the mem-table.
+   *
+   * @return the number of hash probes per key.
+   */
+  public int memtablePrefixBloomProbes() {
+    return memtablePrefixBloomProbes(nativeHandle_);
+  }
+  private native int memtablePrefixBloomProbes(long handle);
+
+  /**
+   * The number of hash probes per key used in the mem-table.
+   *
+   * @param memtablePrefixBloomProbes the number of hash probes per key.
+   * @return the reference to the current option.
+   */
+  public Options setMemtablePrefixBloomProbes(int memtablePrefixBloomProbes) {
+    setMemtablePrefixBloomProbes(nativeHandle_, memtablePrefixBloomProbes);
+    return this;
+  }
+  private native void setMemtablePrefixBloomProbes(
+      long handle, int memtablePrefixBloomProbes);
+
+  /**
+   * Control locality of bloom filter probes to improve cache miss rate.
+   * This option only applies to memtable prefix bloom and plaintable
+   * prefix bloom. It essentially limits the max number of cache lines each
+   * bloom filter check can touch.
+   * This optimization is turned off when set to 0. The number should never
+   * be greater than number of probes. This option can boost performance
+   * for in-memory workload but should use with care since it can cause
+   * higher false positive rate.
+   * Default: 0
+   *
+   * @return the level of locality of bloom-filter probes.
+   * @see setMemTablePrefixBloomProbes
+   */
+  public int bloomLocality() {
+    return bloomLocality(nativeHandle_);
+  }
+  private native int bloomLocality(long handle);
+
+  /**
+   * Control locality of bloom filter probes to improve cache miss rate.
+   * This option only applies to memtable prefix bloom and plaintable
+   * prefix bloom. It essentially limits the max number of cache lines each
+   * bloom filter check can touch.
+   * This optimization is turned off when set to 0. The number should never
+   * be greater than number of probes. This option can boost performance
+   * for in-memory workload but should use with care since it can cause
+   * higher false positive rate.
+   * Default: 0
+   *
+   * @param bloomLocality the level of locality of bloom-filter probes.
+   * @return the reference to the current option.
+   */
+  public Options setBloomLocality(int bloomLocality) {
+    setBloomLocality(nativeHandle_, bloomLocality);
+    return this;
+  }
+  private native void setBloomLocality(
+      long handle, int bloomLocality);
+
+  /**
+   * Maximum number of successive merge operations on a key in the memtable.
+   *
+   * When a merge operation is added to the memtable and the maximum number of
+   * successive merges is reached, the value of the key will be calculated and
+   * inserted into the memtable instead of the merge operation. This will
+   * ensure that there are never more than max_successive_merges merge
+   * operations in the memtable.
+   *
+   * Default: 0 (disabled)
+   *
+   * @return the maximum number of successive merges.
+   */
+  public long maxSuccessiveMerges() {
+    return maxSuccessiveMerges(nativeHandle_);
+  }
+  private native long maxSuccessiveMerges(long handle);
+
+  /**
+   * Maximum number of successive merge operations on a key in the memtable.
+   *
+   * When a merge operation is added to the memtable and the maximum number of
+   * successive merges is reached, the value of the key will be calculated and
+   * inserted into the memtable instead of the merge operation. This will
+   * ensure that there are never more than max_successive_merges merge
+   * operations in the memtable.
+   *
+   * Default: 0 (disabled)
+   *
+   * @param maxSuccessiveMerges the maximum number of successive merges.
+   * @return the reference to the current option.
+   */
+  public Options setMaxSuccessiveMerges(long maxSuccessiveMerges) {
+    setMaxSuccessiveMerges(nativeHandle_, maxSuccessiveMerges);
+    return this;
+  }
+  private native void setMaxSuccessiveMerges(
+      long handle, long maxSuccessiveMerges);
+
+  /**
+   * The minimum number of write buffers that will be merged together
+   * before writing to storage.  If set to 1, then
+   * all write buffers are fushed to L0 as individual files and this increases
+   * read amplification because a get request has to check in all of these
+   * files. Also, an in-memory merge may result in writing lesser
+   * data to storage if there are duplicate records in each of these
+   * individual write buffers.  Default: 1
+   *
+   * @return the minimum number of write buffers that will be merged together.
+   */
+  public int minWriteBufferNumberToMerge() {
+    return minWriteBufferNumberToMerge(nativeHandle_);
+  }
+  private native int minWriteBufferNumberToMerge(long handle);
+
+  /**
+   * The minimum number of write buffers that will be merged together
+   * before writing to storage.  If set to 1, then
+   * all write buffers are fushed to L0 as individual files and this increases
+   * read amplification because a get request has to check in all of these
+   * files. Also, an in-memory merge may result in writing lesser
+   * data to storage if there are duplicate records in each of these
+   * individual write buffers.  Default: 1
+   *
+   * @param minWriteBufferNumberToMerge the minimum number of write buffers
+   *     that will be merged together.
+   * @return the reference to the current option.
+   */
+  public Options setMinWriteBufferNumberToMerge(int minWriteBufferNumberToMerge) {
+    setMinWriteBufferNumberToMerge(nativeHandle_, minWriteBufferNumberToMerge);
+    return this;
+  }
+  private native void setMinWriteBufferNumberToMerge(
+      long handle, int minWriteBufferNumberToMerge);
+
+  /**
+   * The number of partial merge operands to accumulate before partial
+   * merge will be performed. Partial merge will not be called
+   * if the list of values to merge is less than min_partial_merge_operands.
+   *
+   * If min_partial_merge_operands < 2, then it will be treated as 2.
+   *
+   * Default: 2
+   *
+   * @return
+   */
+  public int minPartialMergeOperands() {
+    return minPartialMergeOperands(nativeHandle_);
+  }
+  private native int minPartialMergeOperands(long handle);
+
+  /**
+   * The number of partial merge operands to accumulate before partial
+   * merge will be performed. Partial merge will not be called
+   * if the list of values to merge is less than min_partial_merge_operands.
+   *
+   * If min_partial_merge_operands < 2, then it will be treated as 2.
+   *
+   * Default: 2
+   *
+   * @param minPartialMergeOperands
+   * @return the reference to the current option.
+   */
+  public Options setMinPartialMergeOperands(int minPartialMergeOperands) {
+    setMinPartialMergeOperands(nativeHandle_, minPartialMergeOperands);
+    return this;
+  }
+  private native void setMinPartialMergeOperands(
+      long handle, int minPartialMergeOperands);
+
   /**
    * Release the memory allocated for the current instance
    * in the c++ side.
@@ -1250,8 +2358,7 @@ public class Options {
   private native void useFixedLengthPrefixExtractor(
       long handle, int prefixLength);
 
-  private native void setFilter0(long optHandle, Filter fp);
-
   long nativeHandle_;
   long cacheSize_;
+  Filter filter_;
 }
diff --git a/java/org/rocksdb/RocksDB.java b/java/org/rocksdb/RocksDB.java
index 649433b8c..728798ade 100644
--- a/java/org/rocksdb/RocksDB.java
+++ b/java/org/rocksdb/RocksDB.java
@@ -39,6 +39,7 @@ public class RocksDB {
     // the c++ one.
     Options options = new Options();
     db.open(options.nativeHandle_, options.cacheSize_, path);
+    db.transferCppRawPointersOwnershipFrom(options);
     options.dispose();
     return db;
   }
@@ -49,8 +50,12 @@ public class RocksDB {
    */
   public static RocksDB open(Options options, String path)
       throws RocksDBException {
+    // when non-default Options is used, keeping an Options reference
+    // in RocksDB can prevent Java to GC during the life-time of
+    // the currently-created RocksDB.
     RocksDB db = new RocksDB();
     db.open(options.nativeHandle_, options.cacheSize_, path);
+    db.transferCppRawPointersOwnershipFrom(options);
     return db;
   }
 
@@ -253,6 +258,17 @@ public class RocksDB {
     nativeHandle_ = 0;
   }
 
+  /**
+   * Transfer the ownership of all c++ raw-pointers from Options
+   * to RocksDB to ensure the life-time of those raw-pointers
+   * will be at least as long as the life-time of any RocksDB
+   * that uses these raw-pointers.
+   */
+  protected void transferCppRawPointersOwnershipFrom(Options opt) {
+    filter_ = opt.filter_;
+    opt.filter_ = null;
+  }
+
   // native methods
   protected native void open(
       long optionsHandle, long cacheSize, String path) throws RocksDBException;
@@ -289,4 +305,5 @@ public class RocksDB {
   protected native void close0();
 
   protected long nativeHandle_;
+  protected Filter filter_;
 }
diff --git a/java/org/rocksdb/benchmark/DbBenchmark.java b/java/org/rocksdb/benchmark/DbBenchmark.java
index 0106413cf..37b08bc15 100644
--- a/java/org/rocksdb/benchmark/DbBenchmark.java
+++ b/java/org/rocksdb/benchmark/DbBenchmark.java
@@ -54,6 +54,10 @@ class Stats {
   StringBuilder message_;
   boolean excludeFromMerge_;
 
+  // TODO(yhchiang): use the following arguments:
+  //   (Long)Flag.stats_interval
+  //   (Integer)Flag.stats_per_interval
+
   Stats(int id) {
     id_ = id;
     nextReport_ = 100;
@@ -163,6 +167,7 @@ public class DbBenchmark {
   }
 
   abstract class BenchmarkTask implements Callable<Stats> {
+    // TODO(yhchiang): use (Integer)Flag.perf_level.
     public BenchmarkTask(
         int tid, long randSeed, long numEntries, long keyRange) {
       tid_ = tid;
@@ -311,13 +316,73 @@ public class DbBenchmark {
     }
   }
 
+  class WriteUniqueRandomTask extends WriteTask {
+    static final int MAX_BUFFER_SIZE = 10000000;
+    public WriteUniqueRandomTask(
+        int tid, long randSeed, long numEntries, long keyRange,
+        WriteOptions writeOpt, long entriesPerBatch) {
+      super(tid, randSeed, numEntries, keyRange,
+            writeOpt, entriesPerBatch);
+      initRandomKeySequence();
+    }
+    public WriteUniqueRandomTask(
+        int tid, long randSeed, long numEntries, long keyRange,
+        WriteOptions writeOpt, long entriesPerBatch,
+        long maxWritesPerSecond) {
+      super(tid, randSeed, numEntries, keyRange,
+            writeOpt, entriesPerBatch,
+            maxWritesPerSecond);
+      initRandomKeySequence();
+    }
+    @Override protected void getKey(byte[] key, long id, long range) {
+      generateKeyFromLong(key, nextUniqueRandom());
+    }
+
+    protected void initRandomKeySequence() {
+      bufferSize_ = MAX_BUFFER_SIZE;
+      if (bufferSize_ > keyRange_) {
+        bufferSize_ = (int) keyRange_;
+      }
+      currentKeyCount_ = bufferSize_;
+      keyBuffer_ = new long[MAX_BUFFER_SIZE];
+      for (int k = 0; k < bufferSize_; ++k) {
+        keyBuffer_[k] = k;
+      }
+    }
+
+    /**
+     * Semi-randomly return the next unique key.  It is guaranteed to be
+     * fully random if keyRange_ <= MAX_BUFFER_SIZE.
+     */
+    long nextUniqueRandom() {
+      if (bufferSize_ == 0) {
+        System.err.println("bufferSize_ == 0.");
+        return 0;
+      }
+      int r = rand_.nextInt(bufferSize_);
+      // randomly pick one from the keyBuffer
+      long randKey = keyBuffer_[r];
+      if (currentKeyCount_ < keyRange_) {
+        // if we have not yet inserted all keys, insert next new key to [r].
+        keyBuffer_[r] = currentKeyCount_++;
+      } else {
+        // move the last element to [r] and decrease the size by 1.
+        keyBuffer_[r] = keyBuffer_[--bufferSize_];
+      }
+      return randKey;
+    }
+
+    int bufferSize_;
+    long currentKeyCount_;
+    long[] keyBuffer_;
+  }
+
   class ReadRandomTask extends BenchmarkTask {
     public ReadRandomTask(
         int tid, long randSeed, long numEntries, long keyRange) {
       super(tid, randSeed, numEntries, keyRange);
     }
     @Override public void runTask() throws RocksDBException {
-      stats_.found_ = 0;
       byte[] key = new byte[keySize_];
       byte[] value = new byte[valueSize_];
       for (long i = 0; i < numEntries_; i++) {
@@ -338,18 +403,22 @@ public class DbBenchmark {
 
   class ReadSequentialTask extends BenchmarkTask {
     public ReadSequentialTask(
-        int tid, long randSeed, long numEntries, long keyRange, long initId) {
+        int tid, long randSeed, long numEntries, long keyRange) {
       super(tid, randSeed, numEntries, keyRange);
-      initId_ = initId;
     }
     @Override public void runTask() throws RocksDBException {
-      // make sure we have enough things to read in sequential
-      if (numEntries_ > keyRange_ - initId_) {
-        numEntries_ = keyRange_ - initId_;
+      org.rocksdb.Iterator iter = db_.newIterator();
+      long i;
+      for (iter.seekToFirst(), i = 0;
+           iter.isValid() && i < numEntries_;
+           iter.next(), ++i) {
+        stats_.found_++;
+        stats_.finishedSingleOp(iter.key().length + iter.value().length);
+        if (isFinished()) {
+          return;
+        }
       }
-      throw new UnsupportedOperationException();
     }
-    private long initId_;
   }
 
   public DbBenchmark(Map<Flag, Object> flags) throws Exception {
@@ -360,22 +429,33 @@ public class DbBenchmark {
         flags.get(Flag.num) : flags.get(Flag.reads));
     keySize_ = (Integer) flags.get(Flag.key_size);
     valueSize_ = (Integer) flags.get(Flag.value_size);
-    writeBufferSize_ = (Integer) flags.get(Flag.write_buffer_size) > 0 ?
-        (Integer) flags.get(Flag.write_buffer_size) : 0;
     compressionRatio_ = (Double) flags.get(Flag.compression_ratio);
     useExisting_ = (Boolean) flags.get(Flag.use_existing_db);
     randSeed_ = (Long) flags.get(Flag.seed);
     databaseDir_ = (String) flags.get(Flag.db);
     writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second);
     cacheSize_ = (Long) flags.get(Flag.cache_size);
-    gen_ = new RandomGenerator(compressionRatio_);
+    gen_ = new RandomGenerator(randSeed_, compressionRatio_);
     memtable_ = (String) flags.get(Flag.memtablerep);
     maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number);
     prefixSize_ = (Integer) flags.get(Flag.prefix_size);
     keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix);
     hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count);
     usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table);
+    flags_ = flags;
     finishLock_ = new Object();
+    // options.setPrefixSize((Integer)flags_.get(Flag.prefix_size));
+    // options.setKeysPerPrefix((Long)flags_.get(Flag.keys_per_prefix));
+  }
+
+  private void prepareReadOptions(ReadOptions options) {
+    options.setVerifyChecksums((Boolean)flags_.get(Flag.verify_checksum));
+    options.setTailing((Boolean)flags_.get(Flag.use_tailing_iterator));
+  }
+
+  private void prepareWriteOptions(WriteOptions options) {
+    options.setSync((Boolean)flags_.get(Flag.sync));
+    options.setDisableWAL((Boolean)flags_.get(Flag.disable_wal));
   }
 
   private void prepareOptions(Options options) {
@@ -405,9 +485,119 @@ public class DbBenchmark {
           options.memTableFactoryName());
     }
     if (usePlainTable_) {
-      options.setSstFormatConfig(
+      options.setTableFormatConfig(
           new PlainTableConfig().setKeySize(keySize_));
     }
+    options.setMaxWriteBufferNumber(
+        (Integer)flags_.get(Flag.max_write_buffer_number));
+    options.setMaxBackgroundCompactions(
+        (Integer)flags_.get(Flag.max_background_compactions));
+    options.setMaxBackgroundFlushes(
+        (Integer)flags_.get(Flag.max_background_flushes));
+    options.setCacheSize(
+        (Long)flags_.get(Flag.cache_size));
+    options.setBlockSize(
+        (Long)flags_.get(Flag.block_size));
+    options.setMaxOpenFiles(
+        (Integer)flags_.get(Flag.open_files));
+    options.setCreateIfMissing(
+        !(Boolean)flags_.get(Flag.use_existing_db));
+    options.setTableCacheRemoveScanCountLimit(
+        (Integer)flags_.get(Flag.cache_remove_scan_count_limit));
+    options.setDisableDataSync(
+        (Boolean)flags_.get(Flag.disable_data_sync));
+    options.setUseFsync(
+        (Boolean)flags_.get(Flag.use_fsync));
+    options.setWalDir(
+        (String)flags_.get(Flag.wal_dir));
+    options.setDisableSeekCompaction(
+        (Boolean)flags_.get(Flag.disable_seek_compaction));
+    options.setDeleteObsoleteFilesPeriodMicros(
+        (Long)flags_.get(Flag.delete_obsolete_files_period_micros));
+    options.setTableCacheNumshardbits(
+        (Integer)flags_.get(Flag.table_cache_numshardbits));
+    options.setAllowMmapReads(
+        (Boolean)flags_.get(Flag.mmap_read));
+    options.setAllowMmapWrites(
+        (Boolean)flags_.get(Flag.mmap_write));
+    options.setAdviseRandomOnOpen(
+        (Boolean)flags_.get(Flag.advise_random_on_open));
+    options.setUseAdaptiveMutex(
+        (Boolean)flags_.get(Flag.use_adaptive_mutex));
+    options.setBytesPerSync(
+        (Long)flags_.get(Flag.bytes_per_sync));
+    options.setBloomLocality(
+        (Integer)flags_.get(Flag.bloom_locality));
+    options.setMinWriteBufferNumberToMerge(
+        (Integer)flags_.get(Flag.min_write_buffer_number_to_merge));
+    options.setMemtablePrefixBloomBits(
+        (Integer)flags_.get(Flag.memtable_bloom_bits));
+    options.setNumLevels(
+        (Integer)flags_.get(Flag.num_levels));
+    options.setTargetFileSizeBase(
+        (Integer)flags_.get(Flag.target_file_size_base));
+    options.setTargetFileSizeMultiplier(
+        (Integer)flags_.get(Flag.target_file_size_multiplier));
+    options.setMaxBytesForLevelBase(
+        (Integer)flags_.get(Flag.max_bytes_for_level_base));
+    options.setMaxBytesForLevelMultiplier(
+        (Integer)flags_.get(Flag.max_bytes_for_level_multiplier));
+    options.setLevelZeroStopWritesTrigger(
+        (Integer)flags_.get(Flag.level0_stop_writes_trigger));
+    options.setLevelZeroSlowdownWritesTrigger(
+        (Integer)flags_.get(Flag.level0_slowdown_writes_trigger));
+    options.setLevelZeroFileNumCompactionTrigger(
+        (Integer)flags_.get(Flag.level0_file_num_compaction_trigger));
+    options.setSoftRateLimit(
+        (Double)flags_.get(Flag.soft_rate_limit));
+    options.setHardRateLimit(
+        (Double)flags_.get(Flag.hard_rate_limit));
+    options.setRateLimitDelayMaxMilliseconds(
+        (Integer)flags_.get(Flag.rate_limit_delay_max_milliseconds));
+    options.setMaxGrandparentOverlapFactor(
+        (Integer)flags_.get(Flag.max_grandparent_overlap_factor));
+    options.setDisableAutoCompactions(
+        (Boolean)flags_.get(Flag.disable_auto_compactions));
+    options.setSourceCompactionFactor(
+        (Integer)flags_.get(Flag.source_compaction_factor));
+    options.setFilterDeletes(
+        (Boolean)flags_.get(Flag.filter_deletes));
+    options.setMaxSuccessiveMerges(
+        (Integer)flags_.get(Flag.max_successive_merges));
+    options.setWalTtlSeconds((Long)flags_.get(Flag.wal_ttl_seconds));
+    options.setWalSizeLimitMB((Long)flags_.get(Flag.wal_size_limit_MB));
+    int bloomBits = (Integer)flags_.get(Flag.bloom_bits);
+    if (bloomBits > 0) {
+      // Internally, options will keep a reference to this BloomFilter.
+      // This will disallow Java to GC this BloomFilter.  In addition,
+      // options.dispose() will release the c++ object of this BloomFilter.
+      // As a result, the caller should not directly call
+      // BloomFilter.dispose().
+      options.setFilter(new BloomFilter(bloomBits));
+    }
+    /* TODO(yhchiang): enable the following parameters
+    options.setCompressionType((String)flags_.get(Flag.compression_type));
+    options.setCompressionLevel((Integer)flags_.get(Flag.compression_level));
+    options.setMinLevelToCompress((Integer)flags_.get(Flag.min_level_to_compress));
+    options.setHdfs((String)flags_.get(Flag.hdfs)); // env
+    options.setCacheNumshardbits((Integer)flags_.get(Flag.cache_numshardbits));
+    options.setStatistics((Boolean)flags_.get(Flag.statistics));
+    options.setUniversalSizeRatio(
+        (Integer)flags_.get(Flag.universal_size_ratio));
+    options.setUniversalMinMergeWidth(
+        (Integer)flags_.get(Flag.universal_min_merge_width));
+    options.setUniversalMaxMergeWidth(
+        (Integer)flags_.get(Flag.universal_max_merge_width));
+    options.setUniversalMaxSizeAmplificationPercent(
+        (Integer)flags_.get(Flag.universal_max_size_amplification_percent));
+    options.setUniversalCompressionSizePercent(
+        (Integer)flags_.get(Flag.universal_compression_size_percent));
+    // TODO(yhchiang): add RocksDB.openForReadOnly() to enable Flag.readonly
+    // TODO(yhchiang): enable Flag.merge_operator by switch
+    options.setAccessHintOnCompactionStart(
+        (String)flags_.get(Flag.compaction_fadvice));
+    // available values of fadvice are "NONE", "NORMAL", "SEQUENTIAL", "WILLNEED" for fadvice
+    */
   }
 
   private void run() throws RocksDBException {
@@ -424,6 +614,9 @@ public class DbBenchmark {
       List<Callable<Stats>> tasks = new ArrayList<Callable<Stats>>();
       List<Callable<Stats>> bgTasks = new ArrayList<Callable<Stats>>();
       WriteOptions writeOpt = new WriteOptions();
+      prepareWriteOptions(writeOpt);
+      ReadOptions readOpt = new ReadOptions();
+      prepareReadOptions(readOpt);
       int currentTaskId = 0;
       boolean known = true;
 
@@ -436,6 +629,9 @@ public class DbBenchmark {
       } else if (benchmark.equals("fillrandom")) {
         tasks.add(new WriteRandomTask(
             currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
+      } else if (benchmark.equals("filluniquerandom")) {
+        tasks.add(new WriteUniqueRandomTask(
+            currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
       } else if (benchmark.equals("fillsync")) {
         writeOpt.setSync(true);
         tasks.add(new WriteRandomTask(
@@ -444,13 +640,12 @@ public class DbBenchmark {
       } else if (benchmark.equals("readseq")) {
         for (int t = 0; t < threadNum_; ++t) {
           tasks.add(new ReadSequentialTask(
-              currentTaskId++, randSeed_, reads_ / threadNum_,
-              num_, (num_ / threadNum_) * t));
+              currentTaskId++, randSeed_, reads_, num_));
         }
       } else if (benchmark.equals("readrandom")) {
         for (int t = 0; t < threadNum_; ++t) {
           tasks.add(new ReadRandomTask(
-              currentTaskId++, randSeed_, reads_ / threadNum_, num_));
+              currentTaskId++, randSeed_, reads_, num_));
         }
       } else if (benchmark.equals("readwhilewriting")) {
         WriteTask writeTask = new WriteRandomTask(
@@ -508,6 +703,7 @@ public class DbBenchmark {
         }
       }
       writeOpt.dispose();
+      readOpt.dispose();
     }
     options.dispose();
     db_.close();
@@ -573,7 +769,7 @@ public class DbBenchmark {
 
     System.out.printf(
         "%-16s : %11.5f micros/op; %6.1f MB/s; %d / %d task(s) finished.\n",
-        benchmark, elapsedSeconds * 1e6 / num_,
+        benchmark, elapsedSeconds * 1e6 / stats.done_,
         (stats.bytes_ / 1048576.0) / elapsedSeconds,
         taskFinishedCount, concurrentThreads);
   }
@@ -616,14 +812,13 @@ public class DbBenchmark {
   static void printHelp() {
     System.out.println("usage:");
     for (Flag flag : Flag.values()) {
-      System.out.format("  --%s%n    %s%n",
+      System.out.format("  --%s%n\t%s%n",
           flag.name(),
           flag.desc());
       if (flag.getDefaultValue() != null) {
-        System.out.format("    DEFAULT: %s%n",
+        System.out.format("\tDEFAULT: %s%n",
             flag.getDefaultValue().toString());
       }
-      System.out.println("");
     }
   }
 
@@ -677,30 +872,28 @@ public class DbBenchmark {
         "\t\tfillseq          -- write N values in sequential key order in async mode.\n" +
         "\t\tfillrandom       -- write N values in random key order in async mode.\n" +
         "\t\tfillbatch        -- write N/1000 batch where each batch has 1000 values\n" +
-        "\t\t                    in random key order in sync mode.\n" +
+        "\t\t                   in random key order in sync mode.\n" +
         "\t\tfillsync         -- write N/100 values in random key order in sync mode.\n" +
         "\t\tfill100K         -- write N/1000 100K values in random order in async mode.\n" +
         "\t\treadseq          -- read N times sequentially.\n" +
         "\t\treadrandom       -- read N times in random order.\n" +
         "\t\treadhot          -- read N times in random order from 1% section of DB.\n" +
         "\t\treadwhilewriting -- measure the read performance of multiple readers\n" +
-        "\t\t                    with a bg single writer.  The write rate of the bg\n" +
-        "\t\t                    is capped by --writes_per_second.\n" +
+        "\t\t                   with a bg single writer.  The write rate of the bg\n" +
+        "\t\t                   is capped by --writes_per_second.\n" +
         "\tMeta Operations:\n" +
         "\t\tdelete            -- delete DB") {
       @Override public Object parseValue(String value) {
         return new ArrayList<String>(Arrays.asList(value.split(",")));
       }
     },
-
     compression_ratio(0.5d,
         "Arrange to generate values that shrink to this fraction of\n" +
-        "\ttheir original size after compression") {
+        "\ttheir original size after compression.") {
       @Override public Object parseValue(String value) {
         return Double.parseDouble(value);
       }
     },
-
     use_existing_db(false,
         "If true, do not destroy the existing database.  If you set this\n" +
         "\tflag and also specify a benchmark that wants a fresh database,\n" +
@@ -709,51 +902,43 @@ public class DbBenchmark {
         return Boolean.parseBoolean(value);
       }
     },
-
     num(1000000,
         "Number of key/values to place in database.") {
       @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     threads(1,
         "Number of concurrent threads to run.") {
       @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     reads(null,
         "Number of read operations to do.  If negative, do --nums reads.") {
-      @Override
-      public Object parseValue(String value) {
+      @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     key_size(16,
         "The size of each key in bytes.") {
       @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     value_size(100,
         "The size of each value in bytes.") {
       @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     write_buffer_size(4 << 20,
         "Number of bytes to buffer in memtable before compacting\n" +
         "\t(initialized to default value by 'main'.)") {
       @Override public Object parseValue(String value) {
-        return Integer.parseInt(value);
+        return Long.parseLong(value);
       }
     },
-
     max_write_buffer_number(2,
              "The number of in-memory memtables. Each memtable is of size\n" +
              "\twrite_buffer_size.") {
@@ -761,14 +946,12 @@ public class DbBenchmark {
         return Integer.parseInt(value);
       }
     },
-
     prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" +
                    "\tand plain table.") {
       @Override public Object parseValue(String value) {
         return Integer.parseInt(value);
       }
     },
-
     keys_per_prefix(0, "Controls the average number of keys generated\n" +
              "\tper prefix, 0 means no special handling of the prefix,\n" +
              "\ti.e. use the prefix comes with the generated random number.") {
@@ -776,7 +959,6 @@ public class DbBenchmark {
         return Integer.parseInt(value);
       }
     },
-
     memtablerep("skip_list",
         "The memtable format.  Available options are\n" +
         "\tskip_list,\n" +
@@ -787,7 +969,6 @@ public class DbBenchmark {
         return value;
       }
     },
-
     hash_bucket_count(SizeUnit.MB,
         "The number of hash buckets used in the hash-bucket-based\n" +
         "\tmemtables.  Memtables that currently support this argument are\n" +
@@ -796,7 +977,6 @@ public class DbBenchmark {
         return Long.parseLong(value);
       }
     },
-
     writes_per_second(10000,
         "The write-rate of the background writer used in the\n" +
         "\t`readwhilewriting` benchmark.  Non-positive number indicates\n" +
@@ -805,14 +985,12 @@ public class DbBenchmark {
         return Integer.parseInt(value);
       }
     },
-
     use_plain_table(false,
         "Use plain-table sst format.") {
       @Override public Object parseValue(String value) {
         return Boolean.parseBoolean(value);
       }
     },
-
     cache_size(-1L,
         "Number of bytes to use as a cache of uncompressed data.\n" +
         "\tNegative means use default settings.") {
@@ -820,15 +998,445 @@ public class DbBenchmark {
         return Long.parseLong(value);
       }
     },
-
     seed(0L,
         "Seed base for random number generators.") {
       @Override public Object parseValue(String value) {
         return Long.parseLong(value);
       }
     },
-
-
+    num_levels(7,
+        "The total number of levels.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    numdistinct(1000,
+        "Number of distinct keys to use. Used in RandomWithVerify to\n" +
+        "\tread/write on fewer keys so that gets are more likely to find the\n" +
+        "\tkey and puts are more likely to update the same key.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    merge_keys(-1,
+        "Number of distinct keys to use for MergeRandom and\n" +
+        "\tReadRandomMergeRandom.\n" +
+        "\tIf negative, there will be FLAGS_num keys.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    bloom_locality(0,"Control bloom filter probes locality.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    duration(0,"Time in seconds for the random-ops tests to run.\n" +
+        "\tWhen 0 then num & reads determine the test duration.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    num_multi_db(0,
+        "Number of DBs used in the benchmark. 0 means single DB.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    histogram(false,"Print histogram of operation timings.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    min_write_buffer_number_to_merge(
+        defaultOptions_.minWriteBufferNumberToMerge(),
+        "The minimum number of write buffers that will be merged together\n" +
+        "\tbefore writing to storage. This is cheap because it is an\n" +
+        "\tin-memory merge. If this feature is not enabled, then all these\n" +
+        "\twrite buffers are flushed to L0 as separate files and this\n" +
+        "\tincreases read amplification because a get request has to check\n" +
+        "\tin all of these files. Also, an in-memory merge may result in\n" +
+        "\twriting less data to storage if there are duplicate records\n" +
+        "\tin each of these individual write buffers.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    max_background_compactions(
+        defaultOptions_.maxBackgroundCompactions(),
+        "The maximum number of concurrent background compactions\n" +
+        "\tthat can occur in parallel.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    max_background_flushes(
+        defaultOptions_.maxBackgroundFlushes(),
+        "The maximum number of concurrent background flushes\n" +
+        "\tthat can occur in parallel.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    /* TODO(yhchiang): enable the following
+    compaction_style((int32_t) defaultOptions_.compactionStyle(),
+        "style of compaction: level-based vs universal.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },*/
+    universal_size_ratio(0,
+        "Percentage flexibility while comparing file size\n" +
+        "\t(for universal compaction only).") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    universal_min_merge_width(0,"The minimum number of files in a\n" +
+        "\tsingle compaction run (for universal compaction only).") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    universal_max_merge_width(0,"The max number of files to compact\n" +
+        "\tin universal style compaction.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    universal_max_size_amplification_percent(0,
+        "The max size amplification for universal style compaction.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    universal_compression_size_percent(-1,
+        "The percentage of the database to compress for universal\n" +
+        "\tcompaction. -1 means compress everything.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    block_size(defaultOptions_.blockSize(),
+        "Number of bytes in a block.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    compressed_cache_size(-1,
+        "Number of bytes to use as a cache of compressed data.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    open_files(defaultOptions_.maxOpenFiles(),
+        "Maximum number of files to keep open at the same time\n" +
+        "\t(use default if == 0)") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    bloom_bits(-1,"Bloom filter bits per key. Negative means\n" +
+        "\tuse default settings.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    memtable_bloom_bits(0,"Bloom filter bits per key for memtable.\n" +
+        "\tNegative means no bloom filter.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    cache_numshardbits(-1,"Number of shards for the block cache\n" +
+        "\tis 2 ** cache_numshardbits. Negative means use default settings.\n" +
+        "\tThis is applied only if FLAGS_cache_size is non-negative.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    cache_remove_scan_count_limit(32,"") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    verify_checksum(false,"Verify checksum for every block read\n" +
+        "\tfrom storage.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    statistics(false,"Database statistics.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    writes(-1,"Number of write operations to do. If negative, do\n" +
+        "\t--num reads.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    sync(false,"Sync all writes to disk.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    disable_data_sync(false,"If true, do not wait until data is\n" +
+        "\tsynced to disk.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    use_fsync(false,"If true, issue fsync instead of fdatasync.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    disable_wal(false,"If true, do not write WAL for write.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    wal_dir("", "If not empty, use the given dir for WAL.") {
+      @Override public Object parseValue(String value) {
+        return value;
+      }
+    },
+    target_file_size_base(2 * 1048576,"Target file size at level-1") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    target_file_size_multiplier(1,
+        "A multiplier to compute target level-N file size (N >= 2)") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    max_bytes_for_level_base(10 * 1048576,
+      "Max bytes for level-1") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    max_bytes_for_level_multiplier(10,
+        "A multiplier to compute max bytes for level-N (N >= 2)") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    level0_stop_writes_trigger(12,"Number of files in level-0\n" +
+        "\tthat will trigger put stop.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    level0_slowdown_writes_trigger(8,"Number of files in level-0\n" +
+        "\tthat will slow down writes.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    level0_file_num_compaction_trigger(4,"Number of files in level-0\n" +
+        "\twhen compactions start.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    readwritepercent(90,"Ratio of reads to reads/writes (expressed\n" +
+        "\tas percentage) for the ReadRandomWriteRandom workload. The\n" +
+        "\tdefault value 90 means 90% operations out of all reads and writes\n" +
+        "\toperations are reads. In other words, 9 gets for every 1 put.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    mergereadpercent(70,"Ratio of merges to merges&reads (expressed\n" +
+        "\tas percentage) for the ReadRandomMergeRandom workload. The\n" +
+        "\tdefault value 70 means 70% out of all read and merge operations\n" +
+        "\tare merges. In other words, 7 merges for every 3 gets.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    deletepercent(2,"Percentage of deletes out of reads/writes/\n" +
+        "\tdeletes (used in RandomWithVerify only). RandomWithVerify\n" +
+        "\tcalculates writepercent as (100 - FLAGS_readwritepercent -\n" +
+        "\tdeletepercent), so deletepercent must be smaller than (100 -\n" +
+        "\tFLAGS_readwritepercent)") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    disable_seek_compaction(false,"Option to disable compaction\n" +
+        "\ttriggered by read.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    delete_obsolete_files_period_micros(0L,"Option to delete\n" +
+        "\tobsolete files periodically. 0 means that obsolete files are\n" +
+        "\tdeleted after every compaction run.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    compression_level(-1,
+        "Compression level. For zlib this should be -1 for the\n" +
+        "\tdefault level, or between 0 and 9.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    min_level_to_compress(-1,"If non-negative, compression starts\n" +
+        "\tfrom this level. Levels with number < min_level_to_compress are\n" +
+        "\tnot compressed. Otherwise, apply compression_type to\n" +
+        "\tall levels.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    table_cache_numshardbits(4,"") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    stats_interval(0,"Stats are reported every N operations when\n" +
+        "\tthis is greater than zero. When 0 the interval grows over time.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    stats_per_interval(0,"Reports additional stats per interval when\n" +
+        "\tthis is greater than 0.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    perf_level(0,"Level of perf collection.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    soft_rate_limit(0.0,"") {
+      @Override public Object parseValue(String value) {
+        return Double.parseDouble(value);
+      }
+    },
+    hard_rate_limit(0.0,"When not equal to 0 this make threads\n" +
+        "\tsleep at each stats reporting interval until the compaction\n" +
+        "\tscore for all levels is less than or equal to this value.") {
+      @Override public Object parseValue(String value) {
+        return Double.parseDouble(value);
+      }
+    },
+    rate_limit_delay_max_milliseconds(1000,
+        "When hard_rate_limit is set then this is the max time a put will\n" +
+        "\tbe stalled.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    max_grandparent_overlap_factor(10,"Control maximum bytes of\n" +
+        "\toverlaps in grandparent (i.e., level+2) before we stop building a\n" +
+        "\tsingle file in a level->level+1 compaction.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    readonly(false,"Run read only benchmarks.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    disable_auto_compactions(false,"Do not auto trigger compactions.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    source_compaction_factor(1,"Cap the size of data in level-K for\n" +
+        "\ta compaction run that compacts Level-K with Level-(K+1) (for\n" +
+        "\tK >= 1)") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
+    wal_ttl_seconds(0L,"Set the TTL for the WAL Files in seconds.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    wal_size_limit_MB(0L,"Set the size limit for the WAL Files\n" +
+        "\tin MB.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    /* TODO(yhchiang): enable the following
+    bufferedio(rocksdb::EnvOptions().use_os_buffer,
+        "Allow buffered io using OS buffers.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    */
+    mmap_read(false,
+        "Allow reads to occur via mmap-ing files.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    mmap_write(false,
+        "Allow writes to occur via mmap-ing files.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    advise_random_on_open(defaultOptions_.adviseRandomOnOpen(),
+        "Advise random access on table file open.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    compaction_fadvice("NORMAL",
+      "Access pattern advice when a file is compacted.") {
+      @Override public Object parseValue(String value) {
+        return value;
+      }
+    },
+    use_tailing_iterator(false,
+        "Use tailing iterator to access a series of keys instead of get.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    use_adaptive_mutex(defaultOptions_.useAdaptiveMutex(),
+        "Use adaptive mutex.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    bytes_per_sync(defaultOptions_.bytesPerSync(),
+        "Allows OS to incrementally sync files to disk while they are\n" +
+        "\tbeing written, in the background. Issue one request for every\n" +
+        "\tbytes_per_sync written. 0 turns it off.") {
+      @Override public Object parseValue(String value) {
+        return Long.parseLong(value);
+      }
+    },
+    filter_deletes(false," On true, deletes use bloom-filter and drop\n" +
+        "\tthe delete if key not present.") {
+      @Override public Object parseValue(String value) {
+        return Boolean.parseBoolean(value);
+      }
+    },
+    max_successive_merges(0,"Maximum number of successive merge\n" +
+        "\toperations on a key in the memtable.") {
+      @Override public Object parseValue(String value) {
+        return Integer.parseInt(value);
+      }
+    },
     db("/tmp/rocksdbjni-bench",
        "Use the db with the following name.") {
       @Override public Object parseValue(String value) {
@@ -859,25 +1467,23 @@ public class DbBenchmark {
     private final byte[] data_;
     private int dataLength_;
     private int position_;
+    Random rand_;
 
-    private RandomGenerator(double compressionRatio) {
+    private RandomGenerator(long seed, double compressionRatio) {
       // We use a limited amount of data over and over again and ensure
       // that it is larger than the compression window (32KB), and also
       // large enough to serve all typical value sizes we want to write.
-      Random rand = new Random(301);
+      rand_ = new Random(seed);
       dataLength_ = 1048576 + 100;
       data_ = new byte[dataLength_];
       // TODO(yhchiang): mimic test::CompressibleString?
       for (int i = 0; i < dataLength_; ++i) {
-        data_[i] = (byte) (' ' + rand.nextInt(95));
+        data_[i] = (byte) (' ' + rand_.nextInt(95));
       }
     }
 
     private byte[] generate(int length) {
-      if (position_ + length > data_.length) {
-        position_ = 0;
-        assert (length < data_.length);
-      }
+      position_ = rand_.nextInt(data_.length - length);
       return Arrays.copyOfRange(data_, position_, position_ + length);
     }
   }
@@ -911,7 +1517,6 @@ public class DbBenchmark {
   long startTime_;
 
   // memtable related
-  final int writeBufferSize_;
   final int maxWriteBufferNumber_;
   final int prefixSize_;
   final int keysPerPrefix_;
@@ -923,4 +1528,8 @@ public class DbBenchmark {
 
   Object finishLock_;
   boolean isFinished_;
+  Map<Flag, Object> flags_;
+  // as the scope of a static member equals to the scope of the problem,
+  // we let its c++ pointer to be disposed in its finalizer.
+  static Options defaultOptions_ = new Options();
 }
diff --git a/java/org/rocksdb/test/OptionsTest.java b/java/org/rocksdb/test/OptionsTest.java
index 7c13db6d1..cd3ba785d 100644
--- a/java/org/rocksdb/test/OptionsTest.java
+++ b/java/org/rocksdb/test/OptionsTest.java
@@ -123,9 +123,9 @@ public class OptionsTest {
       assert(opt.tableCacheRemoveScanCountLimit() == intValue);
     }
 
-    { // WALTtlSeconds test
+    { // WalTtlSeconds test
       long longValue = rand.nextLong();
-      opt.setWALTtlSeconds(longValue);
+      opt.setWalTtlSeconds(longValue);
       assert(opt.walTtlSeconds() == longValue);
     }
 
@@ -195,6 +195,228 @@ public class OptionsTest {
       assert(opt.allowThreadLocal() == boolValue);
     }
 
+    { // WriteBufferSize test
+      long longValue = rand.nextLong();
+      opt.setWriteBufferSize(longValue);
+      assert(opt.writeBufferSize() == longValue);
+    }
+
+    { // MaxWriteBufferNumber test
+      int intValue = rand.nextInt();
+      opt.setMaxWriteBufferNumber(intValue);
+      assert(opt.maxWriteBufferNumber() == intValue);
+    }
+
+    { // MinWriteBufferNumberToMerge test
+      int intValue = rand.nextInt();
+      opt.setMinWriteBufferNumberToMerge(intValue);
+      assert(opt.minWriteBufferNumberToMerge() == intValue);
+    }
+
+    { // BlockSize test
+      long longValue = rand.nextLong();
+      opt.setBlockSize(longValue);
+      assert(opt.blockSize() == longValue);
+    }
+
+    { // BlockRestartInterval test
+      int intValue = rand.nextInt();
+      opt.setBlockRestartInterval(intValue);
+      assert(opt.blockRestartInterval() == intValue);
+    }
+
+    { // WholeKeyFiltering test
+      boolean boolValue = rand.nextBoolean();
+      opt.setWholeKeyFiltering(boolValue);
+      assert(opt.wholeKeyFiltering() == boolValue);
+    }
+
+    { // NumLevels test
+      int intValue = rand.nextInt();
+      opt.setNumLevels(intValue);
+      assert(opt.numLevels() == intValue);
+    }
+
+    { // LevelFileNumCompactionTrigger test
+      int intValue = rand.nextInt();
+      opt.setLevelZeroFileNumCompactionTrigger(intValue);
+      assert(opt.levelZeroFileNumCompactionTrigger() == intValue);
+    }
+
+    { // LevelSlowdownWritesTrigger test
+      int intValue = rand.nextInt();
+      opt.setLevelZeroSlowdownWritesTrigger(intValue);
+      assert(opt.levelZeroSlowdownWritesTrigger() == intValue);
+    }
+
+    { // LevelStopWritesTrigger test
+      int intValue = rand.nextInt();
+      opt.setLevelZeroStopWritesTrigger(intValue);
+      assert(opt.levelZeroStopWritesTrigger() == intValue);
+    }
+
+    { // MaxMemCompactionLevel test
+      int intValue = rand.nextInt();
+      opt.setMaxMemCompactionLevel(intValue);
+      assert(opt.maxMemCompactionLevel() == intValue);
+    }
+
+    { // TargetFileSizeBase test
+      int intValue = rand.nextInt();
+      opt.setTargetFileSizeBase(intValue);
+      assert(opt.targetFileSizeBase() == intValue);
+    }
+
+    { // TargetFileSizeMultiplier test
+      int intValue = rand.nextInt();
+      opt.setTargetFileSizeMultiplier(intValue);
+      assert(opt.targetFileSizeMultiplier() == intValue);
+    }
+
+    { // MaxBytesForLevelBase test
+      long longValue = rand.nextLong();
+      opt.setMaxBytesForLevelBase(longValue);
+      assert(opt.maxBytesForLevelBase() == longValue);
+    }
+
+    { // MaxBytesForLevelMultiplier test
+      int intValue = rand.nextInt();
+      opt.setMaxBytesForLevelMultiplier(intValue);
+      assert(opt.maxBytesForLevelMultiplier() == intValue);
+    }
+
+    { // ExpandedCompactionFactor test
+      int intValue = rand.nextInt();
+      opt.setExpandedCompactionFactor(intValue);
+      assert(opt.expandedCompactionFactor() == intValue);
+    }
+
+    { // SourceCompactionFactor test
+      int intValue = rand.nextInt();
+      opt.setSourceCompactionFactor(intValue);
+      assert(opt.sourceCompactionFactor() == intValue);
+    }
+
+    { // MaxGrandparentOverlapFactor test
+      int intValue = rand.nextInt();
+      opt.setMaxGrandparentOverlapFactor(intValue);
+      assert(opt.maxGrandparentOverlapFactor() == intValue);
+    }
+
+    { // DisableSeekCompaction test
+      boolean boolValue = rand.nextBoolean();
+      opt.setDisableSeekCompaction(boolValue);
+      assert(opt.disableSeekCompaction() == boolValue);
+    }
+
+    { // SoftRateLimit test
+      double doubleValue = rand.nextDouble();
+      opt.setSoftRateLimit(doubleValue);
+      assert(opt.softRateLimit() == doubleValue);
+    }
+
+    { // HardRateLimit test
+      double doubleValue = rand.nextDouble();
+      opt.setHardRateLimit(doubleValue);
+      assert(opt.hardRateLimit() == doubleValue);
+    }
+
+    { // RateLimitDelayMaxMilliseconds test
+      int intValue = rand.nextInt();
+      opt.setRateLimitDelayMaxMilliseconds(intValue);
+      assert(opt.rateLimitDelayMaxMilliseconds() == intValue);
+    }
+
+    { // NoBlockCache test
+      boolean boolValue = rand.nextBoolean();
+      opt.setNoBlockCache(boolValue);
+      assert(opt.noBlockCache() == boolValue);
+    }
+
+    { // ArenaBlockSize test
+      long longValue = rand.nextLong();
+      opt.setArenaBlockSize(longValue);
+      assert(opt.arenaBlockSize() == longValue);
+    }
+
+    { // DisableAutoCompactions test
+      boolean boolValue = rand.nextBoolean();
+      opt.setDisableAutoCompactions(boolValue);
+      assert(opt.disableAutoCompactions() == boolValue);
+    }
+
+    { // PurgeRedundantKvsWhileFlush test
+      boolean boolValue = rand.nextBoolean();
+      opt.setPurgeRedundantKvsWhileFlush(boolValue);
+      assert(opt.purgeRedundantKvsWhileFlush() == boolValue);
+    }
+
+    { // BlockSizeDeviation test
+      int intValue = rand.nextInt();
+      opt.setBlockSizeDeviation(intValue);
+      assert(opt.blockSizeDeviation() == intValue);
+    }
+
+    { // VerifyChecksumsInCompaction test
+      boolean boolValue = rand.nextBoolean();
+      opt.setVerifyChecksumsInCompaction(boolValue);
+      assert(opt.verifyChecksumsInCompaction() == boolValue);
+    }
+
+    { // FilterDeletes test
+      boolean boolValue = rand.nextBoolean();
+      opt.setFilterDeletes(boolValue);
+      assert(opt.filterDeletes() == boolValue);
+    }
+
+    { // MaxSequentialSkipInIterations test
+      long longValue = rand.nextLong();
+      opt.setMaxSequentialSkipInIterations(longValue);
+      assert(opt.maxSequentialSkipInIterations() == longValue);
+    }
+
+    { // InplaceUpdateSupport test
+      boolean boolValue = rand.nextBoolean();
+      opt.setInplaceUpdateSupport(boolValue);
+      assert(opt.inplaceUpdateSupport() == boolValue);
+    }
+
+    { // InplaceUpdateNumLocks test
+      long longValue = rand.nextLong();
+      opt.setInplaceUpdateNumLocks(longValue);
+      assert(opt.inplaceUpdateNumLocks() == longValue);
+    }
+
+    { // MemtablePrefixBloomBits test
+      int intValue = rand.nextInt();
+      opt.setMemtablePrefixBloomBits(intValue);
+      assert(opt.memtablePrefixBloomBits() == intValue);
+    }
+
+    { // MemtablePrefixBloomProbes test
+      int intValue = rand.nextInt();
+      opt.setMemtablePrefixBloomProbes(intValue);
+      assert(opt.memtablePrefixBloomProbes() == intValue);
+    }
+
+    { // BloomLocality test
+      int intValue = rand.nextInt();
+      opt.setBloomLocality(intValue);
+      assert(opt.bloomLocality() == intValue);
+    }
+
+    { // MaxSuccessiveMerges test
+      long longValue = rand.nextLong();
+      opt.setMaxSuccessiveMerges(longValue);
+      assert(opt.maxSuccessiveMerges() == longValue);
+    }
+
+    { // MinPartialMergeOperands test
+      int intValue = rand.nextInt();
+      opt.setMinPartialMergeOperands(intValue);
+      assert(opt.minPartialMergeOperands() == intValue);
+    }
+
     opt.dispose();
     System.out.println("Passed OptionsTest");
   }
diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc
index d9aef3d74..a05a74e7a 100644
--- a/java/rocksjni/options.cc
+++ b/java/rocksjni/options.cc
@@ -122,13 +122,13 @@ jlong Java_org_rocksdb_Options_statisticsPtr(
 
 /*
  * Class:     org_rocksdb_Options
- * Method:    setFilter0
+ * Method:    setFilterHandle
  * Signature: (JJ)V
  */
-void Java_org_rocksdb_Options_setFilter0(
-    JNIEnv* env, jobject jobj, jlong jopt_handle, jobject jfp) {
+void Java_org_rocksdb_Options_setFilterHandle(
+    JNIEnv* env, jobject jobj, jlong jopt_handle, jlong jfilter_handle) {
   reinterpret_cast<rocksdb::Options*>(jopt_handle)->filter_policy =
-      rocksdb::FilterJni::getHandle(env, jfp);
+      reinterpret_cast<rocksdb::FilterPolicy*>(jfilter_handle);
 }
 
 /*
@@ -602,15 +602,36 @@ jlong Java_org_rocksdb_Options_walTtlSeconds(
 
 /*
  * Class:     org_rocksdb_Options
- * Method:    setWALTtlSeconds
+ * Method:    setWalTtlSeconds
  * Signature: (JJ)V
  */
-void Java_org_rocksdb_Options_setWALTtlSeconds(
+void Java_org_rocksdb_Options_setWalTtlSeconds(
     JNIEnv* env, jobject jobj, jlong jhandle, jlong WAL_ttl_seconds) {
   reinterpret_cast<rocksdb::Options*>(jhandle)->WAL_ttl_seconds =
       static_cast<int64_t>(WAL_ttl_seconds);
 }
 
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    walTtlSeconds
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_walSizeLimitMB(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->WAL_size_limit_MB;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setWalSizeLimitMB
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setWalSizeLimitMB(
+    JNIEnv* env, jobject jobj, jlong jhandle, jlong WAL_size_limit_MB) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->WAL_size_limit_MB =
+      static_cast<int64_t>(WAL_size_limit_MB);
+}
+
 /*
  * Class:     org_rocksdb_Options
  * Method:    manifestPreallocationSize
@@ -870,6 +891,764 @@ jstring Java_org_rocksdb_Options_tableFactoryName(
   return env->NewStringUTF(tf->Name());
 }
 
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    minWriteBufferNumberToMerge
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_minWriteBufferNumberToMerge(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->min_write_buffer_number_to_merge;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMinWriteBufferNumberToMerge
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMinWriteBufferNumberToMerge(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmin_write_buffer_number_to_merge) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->min_write_buffer_number_to_merge =
+          static_cast<int>(jmin_write_buffer_number_to_merge);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    blockRestartInterval
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_blockRestartInterval(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->block_restart_interval;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setBlockRestartInterval
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setBlockRestartInterval(
+    JNIEnv* env, jobject jobj, jlong jhandle, jint jblock_restart_interval) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->block_restart_interval =
+      static_cast<int>(jblock_restart_interval);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    wholeKeyFiltering
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_wholeKeyFiltering(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->whole_key_filtering;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setWholeKeyFiltering
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setWholeKeyFiltering(
+    JNIEnv* env, jobject jobj, jlong jhandle, jboolean jwhole_key_filtering) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->whole_key_filtering =
+      static_cast<bool>(jwhole_key_filtering);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    numLevels
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_numLevels(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->num_levels;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setNumLevels
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setNumLevels(
+    JNIEnv* env, jobject jobj, jlong jhandle, jint jnum_levels) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->num_levels =
+      static_cast<int>(jnum_levels);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    levelZeroFileNumCompactionTrigger
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_levelZeroFileNumCompactionTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->level0_file_num_compaction_trigger;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setLevelZeroFileNumCompactionTrigger
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setLevelZeroFileNumCompactionTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jlevel0_file_num_compaction_trigger) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->level0_file_num_compaction_trigger =
+          static_cast<int>(jlevel0_file_num_compaction_trigger);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    levelZeroSlowdownWritesTrigger
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_levelZeroSlowdownWritesTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->level0_slowdown_writes_trigger;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setLevelSlowdownWritesTrigger
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setLevelZeroSlowdownWritesTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jlevel0_slowdown_writes_trigger) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->level0_slowdown_writes_trigger =
+          static_cast<int>(jlevel0_slowdown_writes_trigger);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    levelZeroStopWritesTrigger
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_levelZeroStopWritesTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->level0_stop_writes_trigger;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setLevelStopWritesTrigger
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setLevelZeroStopWritesTrigger(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jlevel0_stop_writes_trigger) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->level0_stop_writes_trigger =
+      static_cast<int>(jlevel0_stop_writes_trigger);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxMemCompactionLevel
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_maxMemCompactionLevel(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_mem_compaction_level;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxMemCompactionLevel
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMaxMemCompactionLevel(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmax_mem_compaction_level) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->max_mem_compaction_level =
+      static_cast<int>(jmax_mem_compaction_level);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    targetFileSizeBase
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_targetFileSizeBase(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->target_file_size_base;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setTargetFileSizeBase
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setTargetFileSizeBase(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jtarget_file_size_base) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->target_file_size_base =
+      static_cast<int>(jtarget_file_size_base);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    targetFileSizeMultiplier
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_targetFileSizeMultiplier(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->target_file_size_multiplier;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setTargetFileSizeMultiplier
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setTargetFileSizeMultiplier(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jtarget_file_size_multiplier) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->target_file_size_multiplier =
+          static_cast<int>(jtarget_file_size_multiplier);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxBytesForLevelBase
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_maxBytesForLevelBase(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_bytes_for_level_base;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxBytesForLevelBase
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setMaxBytesForLevelBase(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jlong jmax_bytes_for_level_base) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_bytes_for_level_base =
+          static_cast<int64_t>(jmax_bytes_for_level_base);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxBytesForLevelMultiplier
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_maxBytesForLevelMultiplier(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_bytes_for_level_multiplier;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxBytesForLevelMultiplier
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplier(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmax_bytes_for_level_multiplier) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_bytes_for_level_multiplier =
+          static_cast<int>(jmax_bytes_for_level_multiplier);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    expandedCompactionFactor
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_expandedCompactionFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->expanded_compaction_factor;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setExpandedCompactionFactor
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setExpandedCompactionFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jexpanded_compaction_factor) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->expanded_compaction_factor =
+          static_cast<int>(jexpanded_compaction_factor);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    sourceCompactionFactor
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_sourceCompactionFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->source_compaction_factor;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setSourceCompactionFactor
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setSourceCompactionFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+        jint jsource_compaction_factor) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->source_compaction_factor =
+          static_cast<int>(jsource_compaction_factor);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxGrandparentOverlapFactor
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_maxGrandparentOverlapFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_grandparent_overlap_factor;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxGrandparentOverlapFactor
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMaxGrandparentOverlapFactor(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmax_grandparent_overlap_factor) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_grandparent_overlap_factor =
+          static_cast<int>(jmax_grandparent_overlap_factor);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    softRateLimit
+ * Signature: (J)D
+ */
+jdouble Java_org_rocksdb_Options_softRateLimit(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->soft_rate_limit;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setSoftRateLimit
+ * Signature: (JD)V
+ */
+void Java_org_rocksdb_Options_setSoftRateLimit(
+    JNIEnv* env, jobject jobj, jlong jhandle, jdouble jsoft_rate_limit) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->soft_rate_limit =
+      static_cast<double>(jsoft_rate_limit);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    hardRateLimit
+ * Signature: (J)D
+ */
+jdouble Java_org_rocksdb_Options_hardRateLimit(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->hard_rate_limit;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setHardRateLimit
+ * Signature: (JD)V
+ */
+void Java_org_rocksdb_Options_setHardRateLimit(
+    JNIEnv* env, jobject jobj, jlong jhandle, jdouble jhard_rate_limit) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->hard_rate_limit =
+      static_cast<double>(jhard_rate_limit);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    rateLimitDelayMaxMilliseconds
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_rateLimitDelayMaxMilliseconds(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->rate_limit_delay_max_milliseconds;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setRateLimitDelayMaxMilliseconds
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setRateLimitDelayMaxMilliseconds(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jrate_limit_delay_max_milliseconds) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->rate_limit_delay_max_milliseconds =
+          static_cast<int>(jrate_limit_delay_max_milliseconds);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    noBlockCache
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_noBlockCache(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->no_block_cache;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setNoBlockCache
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setNoBlockCache(
+    JNIEnv* env, jobject jobj, jlong jhandle, jboolean jno_block_cache) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->no_block_cache =
+      static_cast<bool>(jno_block_cache);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    arenaBlockSize
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_arenaBlockSize(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->arena_block_size;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setArenaBlockSize
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setArenaBlockSize(
+    JNIEnv* env, jobject jobj, jlong jhandle, jlong jarena_block_size) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->arena_block_size =
+      static_cast<size_t>(jarena_block_size);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    disableAutoCompactions
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_disableAutoCompactions(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->disable_auto_compactions;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setDisableAutoCompactions
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setDisableAutoCompactions(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jboolean jdisable_auto_compactions) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->disable_auto_compactions =
+          static_cast<bool>(jdisable_auto_compactions);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    purgeRedundantKvsWhileFlush
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_purgeRedundantKvsWhileFlush(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->purge_redundant_kvs_while_flush;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setPurgeRedundantKvsWhileFlush
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setPurgeRedundantKvsWhileFlush(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jboolean jpurge_redundant_kvs_while_flush) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->purge_redundant_kvs_while_flush =
+          static_cast<bool>(jpurge_redundant_kvs_while_flush);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    blockSizeDeviation
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_blockSizeDeviation(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->block_size_deviation;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setBlockSizeDeviation
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setBlockSizeDeviation(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jblock_size_deviation) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->block_size_deviation =
+      static_cast<int>(jblock_size_deviation);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    verifyChecksumsInCompaction
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_verifyChecksumsInCompaction(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->verify_checksums_in_compaction;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setVerifyChecksumsInCompaction
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setVerifyChecksumsInCompaction(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jboolean jverify_checksums_in_compaction) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->verify_checksums_in_compaction =
+          static_cast<bool>(jverify_checksums_in_compaction);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    filterDeletes
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_filterDeletes(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->filter_deletes;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setFilterDeletes
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setFilterDeletes(
+    JNIEnv* env, jobject jobj, jlong jhandle, jboolean jfilter_deletes) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->filter_deletes =
+      static_cast<bool>(jfilter_deletes);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxSequentialSkipInIterations
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_maxSequentialSkipInIterations(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_sequential_skip_in_iterations;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxSequentialSkipInIterations
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setMaxSequentialSkipInIterations(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jlong jmax_sequential_skip_in_iterations) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->max_sequential_skip_in_iterations =
+          static_cast<int64_t>(jmax_sequential_skip_in_iterations);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    inplaceUpdateSupport
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_Options_inplaceUpdateSupport(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->inplace_update_support;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setInplaceUpdateSupport
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_Options_setInplaceUpdateSupport(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jboolean jinplace_update_support) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->inplace_update_support =
+          static_cast<bool>(jinplace_update_support);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    inplaceUpdateNumLocks
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_inplaceUpdateNumLocks(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->inplace_update_num_locks;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setInplaceUpdateNumLocks
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setInplaceUpdateNumLocks(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jlong jinplace_update_num_locks) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->inplace_update_num_locks =
+          static_cast<size_t>(jinplace_update_num_locks);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    memtablePrefixBloomBits
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_memtablePrefixBloomBits(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->memtable_prefix_bloom_bits;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMemtablePrefixBloomBits
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMemtablePrefixBloomBits(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmemtable_prefix_bloom_bits) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->memtable_prefix_bloom_bits =
+          static_cast<int32_t>(jmemtable_prefix_bloom_bits);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    memtablePrefixBloomProbes
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_memtablePrefixBloomProbes(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->memtable_prefix_bloom_probes;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMemtablePrefixBloomProbes
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMemtablePrefixBloomProbes(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmemtable_prefix_bloom_probes) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->memtable_prefix_bloom_probes =
+          static_cast<int32_t>(jmemtable_prefix_bloom_probes);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    bloomLocality
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_bloomLocality(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->bloom_locality;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setBloomLocality
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setBloomLocality(
+    JNIEnv* env, jobject jobj, jlong jhandle, jint jbloom_locality) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->bloom_locality =
+      static_cast<int32_t>(jbloom_locality);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    maxSuccessiveMerges
+ * Signature: (J)J
+ */
+jlong Java_org_rocksdb_Options_maxSuccessiveMerges(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(jhandle)->max_successive_merges;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMaxSuccessiveMerges
+ * Signature: (JJ)V
+ */
+void Java_org_rocksdb_Options_setMaxSuccessiveMerges(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jlong jmax_successive_merges) {
+  reinterpret_cast<rocksdb::Options*>(jhandle)->max_successive_merges =
+      static_cast<size_t>(jmax_successive_merges);
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    minPartialMergeOperands
+ * Signature: (J)I
+ */
+jint Java_org_rocksdb_Options_minPartialMergeOperands(
+    JNIEnv* env, jobject jobj, jlong jhandle) {
+  return reinterpret_cast<rocksdb::Options*>(
+      jhandle)->min_partial_merge_operands;
+}
+
+/*
+ * Class:     org_rocksdb_Options
+ * Method:    setMinPartialMergeOperands
+ * Signature: (JI)V
+ */
+void Java_org_rocksdb_Options_setMinPartialMergeOperands(
+    JNIEnv* env, jobject jobj, jlong jhandle,
+    jint jmin_partial_merge_operands) {
+  reinterpret_cast<rocksdb::Options*>(
+      jhandle)->min_partial_merge_operands =
+          static_cast<int32_t>(jmin_partial_merge_operands);
+}
+
 //////////////////////////////////////////////////////////////////////////////
 // WriteOptions
 
diff --git a/port/stack_trace.cc b/port/stack_trace.cc
index aa01fd0cf..76866e63c 100644
--- a/port/stack_trace.cc
+++ b/port/stack_trace.cc
@@ -3,9 +3,19 @@
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
-#include "util/stack_trace.h"
+#include "port/stack_trace.h"
 
-#ifdef OS_LINUX
+namespace rocksdb {
+namespace port {
+
+#if defined(ROCKSDB_LITE) || !(defined(OS_LINUX) || defined(OS_MACOSX))
+
+// noop
+
+void InstallStackTraceHandler() {}
+void PrintStack(int first_frames_to_skip) {}
+
+#else
 
 #include <execinfo.h>
 #include <signal.h>
@@ -13,11 +23,12 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <cxxabi.h>
 
-namespace rocksdb {
+namespace {
 
-static const char* GetExecutableName()
-{
+#ifdef OS_LINUX
+const char* GetExecutableName() {
   static char name[1024];
 
   char link[1024];
@@ -31,38 +42,68 @@ static const char* GetExecutableName()
   }
 }
 
+void PrintStackTraceLine(const char* symbol, void* frame) {
+  static const char* executable = GetExecutableName();
+  if (symbol) {
+    fprintf(stderr, "%s ", symbol);
+  }
+  if (executable) {
+    // out source to addr2line, for the address translation
+    const int kLineMax = 256;
+    char cmd[kLineMax];
+    snprintf(cmd, kLineMax, "addr2line %p -e %s -f -C 2>&1", frame, executable);
+    auto f = popen(cmd, "r");
+    if (f) {
+      char line[kLineMax];
+      while (fgets(line, sizeof(line), f)) {
+        line[strlen(line) - 1] = 0;  // remove newline
+        fprintf(stderr, "%s\t", line);
+      }
+      pclose(f);
+    }
+  } else {
+    fprintf(stderr, " %p", frame);
+  }
+
+  fprintf(stderr, "\n");
+}
+#elif OS_MACOSX
+
+void PrintStackTraceLine(const char* symbol, void* frame) {
+  static int pid = getpid();
+  // out source to atos, for the address translation
+  const int kLineMax = 256;
+  char cmd[kLineMax];
+  snprintf(cmd, kLineMax, "xcrun atos %p -p %d  2>&1", frame, pid);
+  auto f = popen(cmd, "r");
+  if (f) {
+    char line[kLineMax];
+    while (fgets(line, sizeof(line), f)) {
+      line[strlen(line) - 1] = 0;  // remove newline
+      fprintf(stderr, "%s\t", line);
+    }
+    pclose(f);
+  } else if (symbol) {
+    fprintf(stderr, "%s ", symbol);
+  }
+
+  fprintf(stderr, "\n");
+}
+
+#endif
+
+}  // namespace
+
 void PrintStack(int first_frames_to_skip) {
   const int kMaxFrames = 100;
-  void *frames[kMaxFrames];
+  void* frames[kMaxFrames];
 
   auto num_frames = backtrace(frames, kMaxFrames);
   auto symbols = backtrace_symbols(frames, num_frames);
 
-  auto executable = GetExecutableName();
-
   for (int i = first_frames_to_skip; i < num_frames; ++i) {
     fprintf(stderr, "#%-2d  ", i - first_frames_to_skip);
-    if (symbols) {
-      fprintf(stderr, "%s ", symbols[i]);
-    }
-    if (executable) {
-      // out source to addr2line, for the address translation
-      const int kLineMax = 256;
-      char cmd[kLineMax];
-      sprintf(cmd, "addr2line %p -e %s -f -C 2>&1", frames[i], executable);
-      auto f = popen(cmd, "r");
-      if (f) {
-        char line[kLineMax];
-        while (fgets(line, sizeof(line), f)) {
-          line[strlen(line) - 1] = 0; // remove newline
-          fprintf(stderr, "%s\t", line);
-        }
-        pclose(f);
-      }
-    } else {
-      fprintf(stderr, " %p", frames[i]);
-    }
-    fprintf(stderr, "\n");
+    PrintStackTraceLine((symbols != nullptr) ? symbols[i] : nullptr, frames[i]);
   }
 }
 
@@ -83,20 +124,9 @@ void InstallStackTraceHandler() {
   signal(SIGSEGV, StackTraceHandler);
   signal(SIGBUS, StackTraceHandler);
   signal(SIGABRT, StackTraceHandler);
-
-  printf("Installed stack trace handler for SIGILL SIGSEGV SIGBUS SIGABRT\n");
-
 }
 
-}   // namespace rocksdb
-
-#else // no-op for non-linux system for now
-
-namespace rocksdb {
-
-void InstallStackTraceHandler() {}
-void PrintStack(int first_frames_to_skip) {}
-
-}
+#endif
 
-#endif // OS_LINUX
+}  // namespace port
+}  // namespace rocksdb
diff --git a/util/stack_trace.h b/port/stack_trace.h
similarity index 90%
rename from util/stack_trace.h
rename to port/stack_trace.h
index 3b06e1df0..8bc6c7d2e 100644
--- a/util/stack_trace.h
+++ b/port/stack_trace.h
@@ -5,6 +5,7 @@
 //
 #pragma once
 namespace rocksdb {
+namespace port {
 
 // Install a signal handler to print callstack on the following signals:
 // SIGILL SIGSEGV SIGBUS SIGABRT
@@ -14,4 +15,5 @@ void InstallStackTraceHandler();
 // Prints stack, skips skip_first_frames frames
 void PrintStack(int first_frames_to_skip = 0);
 
-}   // namespace rocksdb
+}  // namespace port
+}  // namespace rocksdb
diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc
index 35f6a194c..f75726108 100644
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -642,94 +642,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter (
        rep->options, block.data, block.heap_allocated);
 }
 
-// Convert an index iterator value (i.e., an encoded BlockHandle)
-// into an iterator over the contents of the corresponding block.
-Iterator* BlockBasedTable::DataBlockReader(void* arg,
-                                           const ReadOptions& options,
-                                           const Slice& index_value,
-                                           bool* didIO, bool for_compaction) {
-  const bool no_io = (options.read_tier == kBlockCacheTier);
-  BlockBasedTable* table = reinterpret_cast<BlockBasedTable*>(arg);
-  Cache* block_cache = table->rep_->options.block_cache.get();
-  Cache* block_cache_compressed = table->rep_->options.
-                                    block_cache_compressed.get();
-  CachableEntry<Block> block;
-
-  BlockHandle handle;
-  Slice input = index_value;
-  // We intentionally allow extra stuff in index_value so that we
-  // can add more features in the future.
-  Status s = handle.DecodeFrom(&input);
-
-  if (!s.ok()) {
-    return NewErrorIterator(s);
-  }
-
-  // If either block cache is enabled, we'll try to read from it.
-  if (block_cache != nullptr || block_cache_compressed != nullptr) {
-    Statistics* statistics = table->rep_->options.statistics.get();
-    char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
-    char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
-    Slice key, /* key to the block cache */
-        ckey /* key to the compressed block cache */;
-
-    // create key for block cache
-    if (block_cache != nullptr) {
-      key = GetCacheKey(table->rep_->cache_key_prefix,
-                        table->rep_->cache_key_prefix_size, handle, cache_key);
-    }
-
-    if (block_cache_compressed != nullptr) {
-      ckey = GetCacheKey(table->rep_->compressed_cache_key_prefix,
-                         table->rep_->compressed_cache_key_prefix_size, handle,
-                         compressed_cache_key);
-    }
-
-    s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
-                              statistics, options, &block);
-
-    if (block.value == nullptr && !no_io && options.fill_cache) {
-      Histograms histogram = for_compaction ?
-        READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
-      Block* raw_block = nullptr;
-      {
-        StopWatch sw(table->rep_->options.env, statistics, histogram);
-        s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
-                              &raw_block, table->rep_->options.env, didIO,
-                              block_cache_compressed == nullptr);
-      }
-
-      if (s.ok()) {
-        s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
-                                options, statistics, &block, raw_block);
-      }
-    }
-  }
-
-  // Didn't get any data from block caches.
-  if (block.value == nullptr) {
-    if (no_io) {
-      // Could not read from block_cache and can't do IO
-      return NewErrorIterator(Status::Incomplete("no blocking io"));
-    }
-    s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
-                          &block.value, table->rep_->options.env, didIO);
-  }
-
-  Iterator* iter;
-  if (block.value != nullptr) {
-    iter = block.value->NewIterator(&table->rep_->internal_comparator);
-    if (block.cache_handle != nullptr) {
-      iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
-                            block.cache_handle);
-    } else {
-      iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
-    }
-  } else {
-    iter = NewErrorIterator(s);
-  }
-  return iter;
-}
 
 BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
     bool no_io) const {
@@ -838,13 +750,115 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
   return iter;
 }
 
-Iterator* BlockBasedTable::DataBlockReader(
-    void* arg, const ReadOptions& options, const EnvOptions& soptions,
-    const InternalKeyComparator& icomparator, const Slice& index_value,
-    bool for_compaction) {
-  return DataBlockReader(arg, options, index_value, nullptr, for_compaction);
+// Convert an index iterator value (i.e., an encoded BlockHandle)
+// into an iterator over the contents of the corresponding block.
+Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
+    const ReadOptions& ro, bool* didIO, const Slice& index_value) {
+  const bool no_io = (ro.read_tier == kBlockCacheTier);
+  Cache* block_cache = rep->options.block_cache.get();
+  Cache* block_cache_compressed = rep->options.
+                                    block_cache_compressed.get();
+  CachableEntry<Block> block;
+
+  BlockHandle handle;
+  Slice input = index_value;
+  // We intentionally allow extra stuff in index_value so that we
+  // can add more features in the future.
+  Status s = handle.DecodeFrom(&input);
+
+  if (!s.ok()) {
+    return NewErrorIterator(s);
+  }
+
+  // If either block cache is enabled, we'll try to read from it.
+  if (block_cache != nullptr || block_cache_compressed != nullptr) {
+    Statistics* statistics = rep->options.statistics.get();
+    char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
+    char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
+    Slice key, /* key to the block cache */
+        ckey /* key to the compressed block cache */;
+
+    // create key for block cache
+    if (block_cache != nullptr) {
+      key = GetCacheKey(rep->cache_key_prefix,
+                        rep->cache_key_prefix_size, handle, cache_key);
+    }
+
+    if (block_cache_compressed != nullptr) {
+      ckey = GetCacheKey(rep->compressed_cache_key_prefix,
+                         rep->compressed_cache_key_prefix_size, handle,
+                         compressed_cache_key);
+    }
+
+    s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
+                      statistics, ro, &block);
+
+    if (block.value == nullptr && !no_io && ro.fill_cache) {
+      Histograms histogram = READ_BLOCK_GET_MICROS;
+      Block* raw_block = nullptr;
+      {
+        StopWatch sw(rep->options.env, statistics, histogram);
+        s = ReadBlockFromFile(rep->file.get(), ro, handle,
+                              &raw_block, rep->options.env, didIO,
+                              block_cache_compressed == nullptr);
+      }
+
+      if (s.ok()) {
+        s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
+                                ro, statistics, &block, raw_block);
+      }
+    }
+  }
+
+  // Didn't get any data from block caches.
+  if (block.value == nullptr) {
+    if (no_io) {
+      // Could not read from block_cache and can't do IO
+      return NewErrorIterator(Status::Incomplete("no blocking io"));
+    }
+    s = ReadBlockFromFile(rep->file.get(), ro, handle,
+                          &block.value, rep->options.env, didIO);
+  }
+
+  Iterator* iter;
+  if (block.value != nullptr) {
+    iter = block.value->NewIterator(&rep->internal_comparator);
+    if (block.cache_handle != nullptr) {
+      iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
+                            block.cache_handle);
+    } else {
+      iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
+    }
+  } else {
+    iter = NewErrorIterator(s);
+  }
+  return iter;
 }
 
+class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
+ public:
+  BlockEntryIteratorState(BlockBasedTable* table,
+      const ReadOptions& read_options, bool* did_io)
+    : TwoLevelIteratorState(table->rep_->options.prefix_extractor != nullptr),
+      table_(table), read_options_(read_options), did_io_(did_io) {}
+
+  Iterator* NewSecondaryIterator(const Slice& index_value) override {
+    return NewDataBlockIterator(table_->rep_, read_options_, did_io_,
+                                index_value);
+  }
+
+  bool PrefixMayMatch(const Slice& internal_key) override {
+    return table_->PrefixMayMatch(internal_key);
+  }
+
+ private:
+  // Don't own table_
+  BlockBasedTable* table_;
+  const ReadOptions read_options_;
+  // Don't own did_io_
+  bool* did_io_;
+};
+
 // This will be broken if the user specifies an unusual implementation
 // of Options.comparator, or if the user specifies an unusual
 // definition of prefixes in Options.filter_policy.  In particular, we
@@ -857,7 +871,13 @@ Iterator* BlockBasedTable::DataBlockReader(
 // Otherwise, this method guarantees no I/O will be incurred.
 //
 // REQUIRES: this method shouldn't be called while the DB lock is held.
-bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
+bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
+  assert(rep_->options.prefix_extractor != nullptr);
+  auto prefix = rep_->options.prefix_extractor->Transform(
+      ExtractUserKey(internal_key));
+  InternalKey internal_key_prefix(prefix, 0, kTypeValue);
+  auto internal_prefix = internal_key_prefix.Encode();
+
   bool may_match = true;
   Status s;
 
@@ -918,20 +938,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
   return may_match;
 }
 
-Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
-  if (options.prefix) {
-    InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
-    if (!PrefixMayMatch(internal_prefix.Encode())) {
-      // nothing in this file can match the prefix, so we should not
-      // bother doing I/O to this file when iterating.
-      return NewEmptyIterator();
-    }
-  }
-
-  return NewTwoLevelIterator(NewIndexIterator(options),
-                             &BlockBasedTable::DataBlockReader,
-                             const_cast<BlockBasedTable*>(this), options,
-                             rep_->soptions, rep_->internal_comparator);
+Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options) {
+  return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options,
+                                                         nullptr),
+                             NewIndexIterator(read_options));
 }
 
 Status BlockBasedTable::Get(
@@ -962,7 +972,7 @@ Status BlockBasedTable::Get(
     } else {
       bool didIO = false;
       unique_ptr<Iterator> block_iter(
-          DataBlockReader(this, read_options, iiter->value(), &didIO));
+          NewDataBlockIterator(rep_, read_options, &didIO, iiter->value()));
 
       if (read_options.read_tier && block_iter->status().IsIncomplete()) {
         // couldn't get block from block_cache
@@ -1059,10 +1069,8 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
       return HashIndexReader::Create(
           file, index_handle, env, comparator,
           [&](Iterator* index_iter) {
-            return NewTwoLevelIterator(
-                index_iter, &BlockBasedTable::DataBlockReader,
-                const_cast<BlockBasedTable*>(this), ReadOptions(),
-                rep_->soptions, rep_->internal_comparator);
+            return NewTwoLevelIterator(new BlockEntryIteratorState(this,
+                ReadOptions(), nullptr), index_iter);
           },
           rep_->internal_prefix_transform.get(), index_reader);
     }
diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h
index d48c5d2c7..fbe47272e 100644
--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@@ -63,7 +63,7 @@ class BlockBasedTable : public TableReader {
                      unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
                      unique_ptr<TableReader>* table_reader);
 
-  bool PrefixMayMatch(const Slice& internal_prefix) override;
+  bool PrefixMayMatch(const Slice& internal_key);
 
   // Returns a new iterator over the table contents.
   // The result of NewIterator() is initially invalid (caller must
@@ -111,13 +111,9 @@ class BlockBasedTable : public TableReader {
   Rep* rep_;
   bool compaction_optimized_;
 
-  static Iterator* DataBlockReader(void*, const ReadOptions&,
-                                   const EnvOptions& soptions,
-                                   const InternalKeyComparator& icomparator,
-                                   const Slice&, bool for_compaction);
-
-  static Iterator* DataBlockReader(void*, const ReadOptions&, const Slice&,
-                                   bool* didIO, bool for_compaction = false);
+  struct BlockEntryIteratorState;
+  static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
+      bool* didIO, const Slice& index_value);
 
   // For the following two functions:
   // if `no_io == true`, we will not try to read filter/index from sst file
diff --git a/table/plain_table_factory.h b/table/plain_table_factory.h
index b23620785..84af22fb9 100644
--- a/table/plain_table_factory.h
+++ b/table/plain_table_factory.h
@@ -2,8 +2,9 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 
-#ifndef ROCKSDB_LITE
 #pragma once
+
+#ifndef ROCKSDB_LITE
 #include <memory>
 #include <stdint.h>
 
diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc
index ac0505a45..196201730 100644
--- a/table/plain_table_reader.cc
+++ b/table/plain_table_reader.cc
@@ -104,8 +104,8 @@ PlainTableReader::PlainTableReader(
       kHashTableRatio(hash_table_ratio),
       kBloomBitsPerKey(bloom_bits_per_key),
       kIndexIntervalForSamePrefixKeys(index_sparseness),
-      table_properties_(table_properties),
-      data_end_offset_(table_properties_->data_size),
+      table_properties_(nullptr),
+      data_end_offset_(table_properties->data_size),
       user_key_len_(table_properties->fixed_key_len) {
   assert(kHashTableRatio >= 0.0);
 }
@@ -137,7 +137,7 @@ Status PlainTableReader::Open(
       bloom_bits_per_key, hash_table_ratio, index_sparseness, props));
 
   // -- Populate Index
-  s = new_reader->PopulateIndex();
+  s = new_reader->PopulateIndex(props);
   if (!s.ok()) {
     return s;
   }
@@ -149,12 +149,8 @@ Status PlainTableReader::Open(
 void PlainTableReader::SetupForCompaction() {
 }
 
-bool PlainTableReader::PrefixMayMatch(const Slice& internal_prefix) {
-  return true;
-}
-
 Iterator* PlainTableReader::NewIterator(const ReadOptions& options) {
-  return new PlainTableIterator(this, options.prefix_seek);
+  return new PlainTableIterator(this, options_.prefix_extractor != nullptr);
 }
 
 struct PlainTableReader::IndexRecord {
@@ -364,7 +360,10 @@ void PlainTableReader::FillIndexes(
       index_size_, kSubIndexSize);
 }
 
-Status PlainTableReader::PopulateIndex() {
+Status PlainTableReader::PopulateIndex(TableProperties* props) {
+  assert(props != nullptr);
+  table_properties_.reset(props);
+
   // options.prefix_extractor is requried for a hash-based look-up.
   if (options_.prefix_extractor.get() == nullptr && kHashTableRatio != 0) {
     return Status::NotSupported(
@@ -409,6 +408,14 @@ Status PlainTableReader::PopulateIndex() {
   // From the temp data structure, populate indexes.
   FillIndexes(sub_index_size_needed, hash_to_offsets, entries_per_bucket);
 
+  // Fill two table properties.
+  // TODO(sdong): after we have the feature of storing index in file, this
+  // properties need to be populated to index_size instead.
+  props->user_collected_properties["plain_table_hash_table_size"] =
+      std::to_string(index_size_ * 4U);
+  props->user_collected_properties["plain_table_sub_index_size"] =
+      std::to_string(sub_index_size_needed);
+
   return Status::OK();
 }
 
diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h
index debb88372..756439b5c 100644
--- a/table/plain_table_reader.h
+++ b/table/plain_table_reader.h
@@ -2,8 +2,9 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 
-#ifndef ROCKSDB_LITE
 #pragma once
+
+#ifndef ROCKSDB_LITE
 #include <unordered_map>
 #include <memory>
 #include <vector>
@@ -53,8 +54,6 @@ class PlainTableReader: public TableReader {
                      const int bloom_bits_per_key, double hash_table_ratio,
                      size_t index_sparseness);
 
-  bool PrefixMayMatch(const Slice& internal_prefix);
-
   Iterator* NewIterator(const ReadOptions&);
 
   Status Get(const ReadOptions&, const Slice& key, void* arg,
@@ -87,6 +86,9 @@ class PlainTableReader: public TableReader {
   // PopulateIndex() builds index of keys. It must be called before any query
   // to the table.
   //
+  // props: the table properties object that need to be stored. Ownership of
+  //        the object will be passed.
+  //
   // index_ contains buckets size of index_size_, each is a
   // 32-bit integer. The lower 31 bits contain an offset value (explained below)
   // and the first bit of the integer indicates type of the offset.
@@ -122,7 +124,7 @@ class PlainTableReader: public TableReader {
   //    ....
   //   record N file offset:  fixedint32
   // <end>
-  Status PopulateIndex();
+  Status PopulateIndex(TableProperties* props);
 
  private:
   struct IndexRecord;
diff --git a/table/table_reader.h b/table/table_reader.h
index 3d2738c9c..02a2d16dc 100644
--- a/table/table_reader.h
+++ b/table/table_reader.h
@@ -25,12 +25,6 @@ class TableReader {
  public:
   virtual ~TableReader() {}
 
-  // Determine whether there is a chance that the current table file
-  // contains the key a key starting with iternal_prefix. The specific
-  // table implementation can use bloom filter and/or other heuristic
-  // to filter out this table as a whole.
-  virtual bool PrefixMayMatch(const Slice& internal_prefix) = 0;
-
   // Returns a new iterator over the table contents.
   // The result of NewIterator() is initially invalid (caller must
   // call one of the Seek methods on the iterator before using it).
diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc
index 32f6ee618..a0ff0d7f0 100644
--- a/table/table_reader_bench.cc
+++ b/table/table_reader_bench.cc
@@ -68,8 +68,6 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
                           bool through_db, bool measured_by_nanosecond) {
   rocksdb::InternalKeyComparator ikc(opts.comparator);
 
-  Slice prefix = Slice();
-
   std::string file_name = test::TmpDir()
       + "/rocksdb_table_reader_benchmark";
   std::string dbname = test::TmpDir() + "/rocksdb_table_reader_bench_db";
@@ -156,10 +154,6 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
           }
           std::string start_key = MakeKey(r1, r2, through_db);
           std::string end_key = MakeKey(r1, r2 + r2_len, through_db);
-          if (prefix_len < 16) {
-            prefix = Slice(start_key.data(), prefix_len);
-            read_options.prefix = &prefix;
-          }
           uint64_t total_time = 0;
           uint64_t start_time = Now(env, measured_by_nanosecond);
           port::MemoryBarrier();
@@ -254,7 +248,6 @@ int main(int argc, char** argv) {
   options.compression = rocksdb::CompressionType::kNoCompression;
 
   if (FLAGS_plain_table) {
-    ro.prefix_seek = true;
     options.allow_mmap_reads = true;
     env_options.use_mmap_reads = true;
     tf = new rocksdb::PlainTableFactory(16, (FLAGS_prefix_len == 16) ? 0 : 8,
diff --git a/table/table_test.cc b/table/table_test.cc
index 0426122ff..2e21c5064 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -307,11 +307,9 @@ class KeyConvertingIterator: public Iterator {
 class TableConstructor: public Constructor {
  public:
   explicit TableConstructor(const Comparator* cmp,
-                            bool convert_to_internal_key = false,
-                            bool prefix_seek = false)
+                            bool convert_to_internal_key = false)
       : Constructor(cmp),
-        convert_to_internal_key_(convert_to_internal_key),
-        prefix_seek_(prefix_seek) {}
+        convert_to_internal_key_(convert_to_internal_key) {}
   ~TableConstructor() { Reset(); }
 
   virtual Status FinishImpl(const Options& options,
@@ -352,9 +350,6 @@ class TableConstructor: public Constructor {
 
   virtual Iterator* NewIterator() const {
     ReadOptions ro;
-    if (prefix_seek_) {
-      ro.prefix_seek = true;
-    }
     Iterator* iter = table_reader_->NewIterator(ro);
     if (convert_to_internal_key_) {
       return new KeyConvertingIterator(iter);
@@ -388,7 +383,6 @@ class TableConstructor: public Constructor {
     source_.reset();
   }
   bool convert_to_internal_key_;
-  bool prefix_seek_;
 
   uint64_t uniq_id_;
   unique_ptr<StringSink> sink_;
@@ -434,7 +428,7 @@ class MemTableConstructor: public Constructor {
     return Status::OK();
   }
   virtual Iterator* NewIterator() const {
-    return new KeyConvertingIterator(memtable_->NewIterator());
+    return new KeyConvertingIterator(memtable_->NewIterator(ReadOptions()));
   }
 
  private:
@@ -699,7 +693,7 @@ class Harness {
         options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
         options_.allow_mmap_reads = true;
         options_.table_factory.reset(NewPlainTableFactory());
-        constructor_ = new TableConstructor(options_.comparator, true, true);
+        constructor_ = new TableConstructor(options_.comparator, true);
         internal_comparator_.reset(
             new InternalKeyComparator(options_.comparator));
         break;
@@ -709,7 +703,7 @@ class Harness {
         options_.prefix_extractor.reset(NewNoopTransform());
         options_.allow_mmap_reads = true;
         options_.table_factory.reset(NewPlainTableFactory());
-        constructor_ = new TableConstructor(options_.comparator, true, true);
+        constructor_ = new TableConstructor(options_.comparator, true);
         internal_comparator_.reset(
             new InternalKeyComparator(options_.comparator));
         break;
@@ -719,7 +713,7 @@ class Harness {
         options_.prefix_extractor = nullptr;
         options_.allow_mmap_reads = true;
         options_.table_factory.reset(NewTotalOrderPlainTableFactory());
-        constructor_ = new TableConstructor(options_.comparator, true, false);
+        constructor_ = new TableConstructor(options_.comparator, true);
         internal_comparator_.reset(
             new InternalKeyComparator(options_.comparator));
         break;
@@ -1667,7 +1661,7 @@ TEST(MemTableTest, Simple) {
   ColumnFamilyMemTablesDefault cf_mems_default(memtable, &options);
   ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, &cf_mems_default).ok());
 
-  Iterator* iter = memtable->NewIterator();
+  Iterator* iter = memtable->NewIterator(ReadOptions());
   iter->SeekToFirst();
   while (iter->Valid()) {
     fprintf(stderr, "key: '%s' -> '%s'\n",
diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc
index 65a58ad93..990f18184 100644
--- a/table/two_level_iterator.cc
+++ b/table/two_level_iterator.cc
@@ -13,26 +13,17 @@
 #include "rocksdb/table.h"
 #include "table/block.h"
 #include "table/format.h"
-#include "table/iterator_wrapper.h"
 
 namespace rocksdb {
 
 namespace {
 
-typedef Iterator* (*BlockFunction)(void*, const ReadOptions&,
-                                   const EnvOptions& soptions,
-                                   const InternalKeyComparator& icomparator,
-                                   const Slice&, bool for_compaction);
-
 class TwoLevelIterator: public Iterator {
  public:
-  TwoLevelIterator(Iterator* index_iter, BlockFunction block_function,
-                   void* arg, const ReadOptions& options,
-                   const EnvOptions& soptions,
-                   const InternalKeyComparator& internal_comparator,
-                   bool for_compaction);
+  explicit TwoLevelIterator(TwoLevelIteratorState* state,
+      Iterator* first_level_iter);
 
-  virtual ~TwoLevelIterator();
+  virtual ~TwoLevelIterator() {}
 
   virtual void Seek(const Slice& target);
   virtual void SeekToFirst();
@@ -41,22 +32,23 @@ class TwoLevelIterator: public Iterator {
   virtual void Prev();
 
   virtual bool Valid() const {
-    return data_iter_.Valid();
+    return second_level_iter_.Valid();
   }
   virtual Slice key() const {
     assert(Valid());
-    return data_iter_.key();
+    return second_level_iter_.key();
   }
   virtual Slice value() const {
     assert(Valid());
-    return data_iter_.value();
+    return second_level_iter_.value();
   }
   virtual Status status() const {
     // It'd be nice if status() returned a const Status& instead of a Status
-    if (!index_iter_.status().ok()) {
-      return index_iter_.status();
-    } else if (data_iter_.iter() != nullptr && !data_iter_.status().ok()) {
-      return data_iter_.status();
+    if (!first_level_iter_.status().ok()) {
+      return first_level_iter_.status();
+    } else if (second_level_iter_.iter() != nullptr &&
+               !second_level_iter_.status().ok()) {
+      return second_level_iter_.status();
     } else {
       return status_;
     }
@@ -68,135 +60,131 @@ class TwoLevelIterator: public Iterator {
   }
   void SkipEmptyDataBlocksForward();
   void SkipEmptyDataBlocksBackward();
-  void SetDataIterator(Iterator* data_iter);
+  void SetSecondLevelIterator(Iterator* iter);
   void InitDataBlock();
 
-  BlockFunction block_function_;
-  void* arg_;
-  const ReadOptions options_;
-  const EnvOptions& soptions_;
-  const InternalKeyComparator& internal_comparator_;
+  std::unique_ptr<TwoLevelIteratorState> state_;
+  IteratorWrapper first_level_iter_;
+  IteratorWrapper second_level_iter_;  // May be nullptr
   Status status_;
-  IteratorWrapper index_iter_;
-  IteratorWrapper data_iter_; // May be nullptr
-  // If data_iter_ is non-nullptr, then "data_block_handle_" holds the
-  // "index_value" passed to block_function_ to create the data_iter_.
+  // If second_level_iter is non-nullptr, then "data_block_handle_" holds the
+  // "index_value" passed to block_function_ to create the second_level_iter.
   std::string data_block_handle_;
-  bool for_compaction_;
 };
 
-TwoLevelIterator::TwoLevelIterator(
-    Iterator* index_iter, BlockFunction block_function, void* arg,
-    const ReadOptions& options, const EnvOptions& soptions,
-    const InternalKeyComparator& internal_comparator, bool for_compaction)
-    : block_function_(block_function),
-      arg_(arg),
-      options_(options),
-      soptions_(soptions),
-      internal_comparator_(internal_comparator),
-      index_iter_(index_iter),
-      data_iter_(nullptr),
-      for_compaction_(for_compaction) {}
-
-TwoLevelIterator::~TwoLevelIterator() {
-}
+TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state,
+    Iterator* first_level_iter)
+  : state_(state), first_level_iter_(first_level_iter) {}
 
 void TwoLevelIterator::Seek(const Slice& target) {
-  index_iter_.Seek(target);
+  if (state_->check_prefix_may_match &&
+      !state_->PrefixMayMatch(target)) {
+    SetSecondLevelIterator(nullptr);
+    return;
+  }
+  first_level_iter_.Seek(target);
+
   InitDataBlock();
-  if (data_iter_.iter() != nullptr) data_iter_.Seek(target);
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.Seek(target);
+  }
   SkipEmptyDataBlocksForward();
 }
 
 void TwoLevelIterator::SeekToFirst() {
-  index_iter_.SeekToFirst();
+  first_level_iter_.SeekToFirst();
   InitDataBlock();
-  if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.SeekToFirst();
+  }
   SkipEmptyDataBlocksForward();
 }
 
 void TwoLevelIterator::SeekToLast() {
-  index_iter_.SeekToLast();
+  first_level_iter_.SeekToLast();
   InitDataBlock();
-  if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
+  if (second_level_iter_.iter() != nullptr) {
+    second_level_iter_.SeekToLast();
+  }
   SkipEmptyDataBlocksBackward();
 }
 
 void TwoLevelIterator::Next() {
   assert(Valid());
-  data_iter_.Next();
+  second_level_iter_.Next();
   SkipEmptyDataBlocksForward();
 }
 
 void TwoLevelIterator::Prev() {
   assert(Valid());
-  data_iter_.Prev();
+  second_level_iter_.Prev();
   SkipEmptyDataBlocksBackward();
 }
 
 
 void TwoLevelIterator::SkipEmptyDataBlocksForward() {
-  while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
-        !data_iter_.status().IsIncomplete())) {
+  while (second_level_iter_.iter() == nullptr ||
+         (!second_level_iter_.Valid() &&
+         !second_level_iter_.status().IsIncomplete())) {
     // Move to next block
-    if (!index_iter_.Valid()) {
-      SetDataIterator(nullptr);
+    if (!first_level_iter_.Valid()) {
+      SetSecondLevelIterator(nullptr);
       return;
     }
-    index_iter_.Next();
+    first_level_iter_.Next();
     InitDataBlock();
-    if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
+    if (second_level_iter_.iter() != nullptr) {
+      second_level_iter_.SeekToFirst();
+    }
   }
 }
 
 void TwoLevelIterator::SkipEmptyDataBlocksBackward() {
-  while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
-        !data_iter_.status().IsIncomplete())) {
+  while (second_level_iter_.iter() == nullptr ||
+         (!second_level_iter_.Valid() &&
+         !second_level_iter_.status().IsIncomplete())) {
     // Move to next block
-    if (!index_iter_.Valid()) {
-      SetDataIterator(nullptr);
+    if (!first_level_iter_.Valid()) {
+      SetSecondLevelIterator(nullptr);
       return;
     }
-    index_iter_.Prev();
+    first_level_iter_.Prev();
     InitDataBlock();
-    if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
+    if (second_level_iter_.iter() != nullptr) {
+      second_level_iter_.SeekToLast();
+    }
   }
 }
 
-void TwoLevelIterator::SetDataIterator(Iterator* data_iter) {
-  if (data_iter_.iter() != nullptr) SaveError(data_iter_.status());
-  data_iter_.Set(data_iter);
+void TwoLevelIterator::SetSecondLevelIterator(Iterator* iter) {
+  if (second_level_iter_.iter() != nullptr) {
+    SaveError(second_level_iter_.status());
+  }
+  second_level_iter_.Set(iter);
 }
 
 void TwoLevelIterator::InitDataBlock() {
-  if (!index_iter_.Valid()) {
-    SetDataIterator(nullptr);
+  if (!first_level_iter_.Valid()) {
+    SetSecondLevelIterator(nullptr);
   } else {
-    Slice handle = index_iter_.value();
-    if (data_iter_.iter() != nullptr
+    Slice handle = first_level_iter_.value();
+    if (second_level_iter_.iter() != nullptr
         && handle.compare(data_block_handle_) == 0) {
-      // data_iter_ is already constructed with this iterator, so
+      // second_level_iter is already constructed with this iterator, so
       // no need to change anything
     } else {
-      Iterator* iter =
-          (*block_function_)(arg_, options_, soptions_, internal_comparator_,
-                             handle, for_compaction_);
+      Iterator* iter = state_->NewSecondaryIterator(handle);
       data_block_handle_.assign(handle.data(), handle.size());
-      SetDataIterator(iter);
+      SetSecondLevelIterator(iter);
     }
   }
 }
 
 }  // namespace
 
-Iterator* NewTwoLevelIterator(Iterator* index_iter,
-                              BlockFunction block_function, void* arg,
-                              const ReadOptions& options,
-                              const EnvOptions& soptions,
-                              const InternalKeyComparator& internal_comparator,
-                              bool for_compaction) {
-  return new TwoLevelIterator(index_iter, block_function, arg, options,
-                              soptions, internal_comparator, for_compaction);
+Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
+      Iterator* first_level_iter) {
+  return new TwoLevelIterator(state, first_level_iter);
 }
 
 }  // namespace rocksdb
diff --git a/table/two_level_iterator.h b/table/two_level_iterator.h
index d313dcb18..b8083385b 100644
--- a/table/two_level_iterator.h
+++ b/table/two_level_iterator.h
@@ -10,12 +10,26 @@
 #pragma once
 #include "rocksdb/iterator.h"
 #include "rocksdb/env.h"
+#include "table/iterator_wrapper.h"
 
 namespace rocksdb {
 
 struct ReadOptions;
 class InternalKeyComparator;
 
+struct TwoLevelIteratorState {
+  explicit TwoLevelIteratorState(bool check_prefix_may_match)
+    : check_prefix_may_match(check_prefix_may_match) {}
+
+  virtual ~TwoLevelIteratorState() {}
+  virtual Iterator* NewSecondaryIterator(const Slice& handle) = 0;
+  virtual bool PrefixMayMatch(const Slice& internal_key) = 0;
+
+  // If call PrefixMayMatch()
+  bool check_prefix_may_match;
+};
+
+
 // Return a new two level iterator.  A two-level iterator contains an
 // index iterator whose values point to a sequence of blocks where
 // each block is itself a sequence of key,value pairs.  The returned
@@ -25,14 +39,7 @@ class InternalKeyComparator;
 //
 // Uses a supplied function to convert an index_iter value into
 // an iterator over the contents of the corresponding block.
-extern Iterator* NewTwoLevelIterator(
-    Iterator* index_iter,
-    Iterator* (*block_function)(
-        void* arg, const ReadOptions& options, const EnvOptions& soptions,
-        const InternalKeyComparator& internal_comparator,
-        const Slice& index_value, bool for_compaction),
-    void* arg, const ReadOptions& options, const EnvOptions& soptions,
-    const InternalKeyComparator& internal_comparator,
-    bool for_compaction = false);
+extern Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
+      Iterator* first_level_iter);
 
 }  // namespace rocksdb
diff --git a/tools/db_stress.cc b/tools/db_stress.cc
index c7837c38b..8b8523f89 100644
--- a/tools/db_stress.cc
+++ b/tools/db_stress.cc
@@ -547,19 +547,20 @@ class SharedState {
  public:
   static const uint32_t SENTINEL;
 
-  explicit SharedState(StressTest* stress_test) :
-      cv_(&mu_),
-      seed_(FLAGS_seed),
-      max_key_(FLAGS_max_key),
-      log2_keys_per_lock_(FLAGS_log2_keys_per_lock),
-      num_threads_(FLAGS_threads),
-      num_initialized_(0),
-      num_populated_(0),
-      vote_reopen_(0),
-      num_done_(0),
-      start_(false),
-      start_verify_(false),
-      stress_test_(stress_test) {
+  explicit SharedState(StressTest* stress_test)
+      : cv_(&mu_),
+        seed_(FLAGS_seed),
+        max_key_(FLAGS_max_key),
+        log2_keys_per_lock_(FLAGS_log2_keys_per_lock),
+        num_threads_(FLAGS_threads),
+        num_initialized_(0),
+        num_populated_(0),
+        vote_reopen_(0),
+        num_done_(0),
+        start_(false),
+        start_verify_(false),
+        stress_test_(stress_test),
+        verification_failure_(false) {
     if (FLAGS_test_batches_snapshots) {
       fprintf(stdout, "No lock creation because test_batches_snapshots set\n");
       return;
@@ -651,6 +652,10 @@ class SharedState {
     return start_verify_;
   }
 
+  void SetVerificationFailure() { verification_failure_.store(true); }
+
+  bool HasVerificationFailedYet() { return verification_failure_.load(); }
+
   port::Mutex* GetMutexForKey(int cf, long key) {
     return &key_locks_[cf][key >> log2_keys_per_lock_];
   }
@@ -695,6 +700,7 @@ class SharedState {
   bool start_;
   bool start_verify_;
   StressTest* stress_test_;
+  std::atomic<bool> verification_failure_;
 
   std::vector<std::vector<uint32_t>> values_;
   std::vector<std::vector<port::Mutex>> key_locks_;
@@ -752,7 +758,7 @@ class StressTest {
     delete filter_policy_;
   }
 
-  void Run() {
+  bool Run() {
     PrintEnv();
     Open();
     SharedState shared(this);
@@ -814,6 +820,12 @@ class StressTest {
               FLAGS_env->TimeToString((uint64_t) now/1000000).c_str());
     }
     PrintStatistics();
+
+    if (shared.HasVerificationFailedYet()) {
+      printf("Verification failed :(\n");
+      return false;
+    }
+    return true;
   }
 
  private:
@@ -996,7 +1008,6 @@ class StressTest {
       prefixes[i].resize(FLAGS_prefix_size);
       prefix_slices[i] = Slice(prefixes[i]);
       readoptionscopy[i] = readoptions;
-      readoptionscopy[i].prefix_seek = true;
       readoptionscopy[i].snapshot = snapshot;
       iters[i] = db_->NewIterator(readoptionscopy[i], column_family);
       iters[i]->Seek(prefix_slices[i]);
@@ -1062,7 +1073,6 @@ class StressTest {
     const Snapshot* snapshot = db_->GetSnapshot();
     ReadOptions readoptionscopy = readoptions;
     readoptionscopy.snapshot = snapshot;
-    readoptionscopy.prefix_seek = FLAGS_prefix_size > 0;
     unique_ptr<Iterator> iter(db_->NewIterator(readoptionscopy, column_family));
 
     iter->Seek(key);
@@ -1101,7 +1111,10 @@ class StressTest {
 
     thread->stats.Start();
     for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
-      if(i != 0 && (i % (FLAGS_ops_per_thread / (FLAGS_reopen + 1))) == 0) {
+      if (thread->shared->HasVerificationFailedYet()) {
+        break;
+      }
+      if (i != 0 && (i % (FLAGS_ops_per_thread / (FLAGS_reopen + 1))) == 0) {
         {
           thread->stats.FinishedSingleOp();
           MutexLock l(thread->shared->GetMutex());
@@ -1183,7 +1196,6 @@ class StressTest {
         // prefix
         if (!FLAGS_test_batches_snapshots) {
           Slice prefix = Slice(key.data(), FLAGS_prefix_size);
-          read_opts.prefix_seek = true;
           Iterator* iter = db_->NewIterator(read_opts, column_family);
           int64_t count = 0;
           for (iter->Seek(prefix);
@@ -1211,8 +1223,10 @@ class StressTest {
             std::string keystr2 = Key(rand_key);
             Slice k = keystr2;
             Status s = db_->Get(read_opts, column_family, k, &from_db);
-            VerifyValue(rand_column_family, rand_key, read_opts,
-                        *(thread->shared), from_db, s, true);
+            if (VerifyValue(rand_column_family, rand_key, read_opts,
+                            thread->shared, from_db, s, true) == false) {
+              break;
+            }
           }
           thread->shared->Put(rand_column_family, rand_key, value_base);
           if (FLAGS_use_merge) {
@@ -1246,22 +1260,27 @@ class StressTest {
 
   void VerifyDb(ThreadState* thread) const {
     ReadOptions options(FLAGS_verify_checksum, true);
-    const SharedState& shared = *(thread->shared);
-    static const long max_key = shared.GetMaxKey();
-    static const long keys_per_thread = max_key / shared.GetNumThreads();
+    auto shared = thread->shared;
+    static const long max_key = shared->GetMaxKey();
+    static const long keys_per_thread = max_key / shared->GetNumThreads();
     long start = keys_per_thread * thread->tid;
     long end = start + keys_per_thread;
-    if (thread->tid == shared.GetNumThreads() - 1) {
+    if (thread->tid == shared->GetNumThreads() - 1) {
       end = max_key;
     }
     for (size_t cf = 0; cf < column_families_.size(); ++cf) {
+      if (thread->shared->HasVerificationFailedYet()) {
+        break;
+      }
       if (!thread->rand.OneIn(2)) {
         // Use iterator to verify this range
-        options.prefix_seek = FLAGS_prefix_size > 0;
         unique_ptr<Iterator> iter(
             db_->NewIterator(options, column_families_[cf]));
         iter->Seek(Key(start));
         for (long i = start; i < end; i++) {
+          if (thread->shared->HasVerificationFailedYet()) {
+            break;
+          }
           // TODO(ljin): update "long" to uint64_t
           // Reseek when the prefix changes
           if (i % (static_cast<int64_t>(1) << 8 * (8 - FLAGS_prefix_size)) ==
@@ -1279,7 +1298,7 @@ class StressTest {
               from_db = iter->value().ToString();
               iter->Next();
             } else if (iter->key().compare(k) < 0) {
-              VerificationAbort("An out of range key was found", cf, i);
+              VerificationAbort(shared, "An out of range key was found", cf, i);
             }
           } else {
             // The iterator found no value for the key in question, so do not
@@ -1294,6 +1313,9 @@ class StressTest {
       } else {
         // Use Get to verify this range
         for (long i = start; i < end; i++) {
+          if (thread->shared->HasVerificationFailedYet()) {
+            break;
+          }
           std::string from_db;
           std::string keystr = Key(i);
           Slice k = keystr;
@@ -1307,38 +1329,48 @@ class StressTest {
     }
   }
 
-  void VerificationAbort(std::string msg, int cf, long key) const {
-    fprintf(stderr, "Verification failed for column family %d key %ld: %s\n",
-            cf, key, msg.c_str());
-    exit(1);
+  void VerificationAbort(SharedState* shared, std::string msg, int cf,
+                         long key) const {
+    printf("Verification failed for column family %d key %ld: %s\n", cf, key,
+           msg.c_str());
+    shared->SetVerificationFailure();
   }
 
-  void VerifyValue(int cf, long key, const ReadOptions& opts,
-                   const SharedState& shared, const std::string& value_from_db,
+  bool VerifyValue(int cf, long key, const ReadOptions& opts,
+                   SharedState* shared, const std::string& value_from_db,
                    Status s, bool strict = false) const {
+    if (shared->HasVerificationFailedYet()) {
+      return false;
+    }
     // compare value_from_db with the value in the shared state
     char value[100];
-    uint32_t value_base = shared.Get(cf, key);
+    uint32_t value_base = shared->Get(cf, key);
     if (value_base == SharedState::SENTINEL && !strict) {
-      return;
+      return true;
     }
 
     if (s.ok()) {
       if (value_base == SharedState::SENTINEL) {
-        VerificationAbort("Unexpected value found", cf, key);
+        VerificationAbort(shared, "Unexpected value found", cf, key);
+        return false;
       }
       size_t sz = GenerateValue(value_base, value, sizeof(value));
       if (value_from_db.length() != sz) {
-        VerificationAbort("Length of value read is not equal", cf, key);
+        VerificationAbort(shared, "Length of value read is not equal", cf, key);
+        return false;
       }
       if (memcmp(value_from_db.data(), value, sz) != 0) {
-        VerificationAbort("Contents of value read don't match", cf, key);
+        VerificationAbort(shared, "Contents of value read don't match", cf,
+                          key);
+        return false;
       }
     } else {
       if (value_base != SharedState::SENTINEL) {
-        VerificationAbort("Value not found", cf, key);
+        VerificationAbort(shared, "Value not found: " + s.ToString(), cf, key);
+        return false;
       }
     }
+    return true;
   }
 
   static void PrintKeyValue(int cf, uint32_t key, const char* value,
@@ -1693,6 +1725,9 @@ int main(int argc, char** argv) {
   }
 
   rocksdb::StressTest stress;
-  stress.Run();
-  return 0;
+  if (stress.Run()) {
+    return 0;
+  } else {
+    return 1;
+  }
 }
diff --git a/util/hash_linklist_rep.cc b/util/hash_linklist_rep.cc
index 9e77afa3e..64aa2d9e8 100644
--- a/util/hash_linklist_rep.cc
+++ b/util/hash_linklist_rep.cc
@@ -73,9 +73,6 @@ class HashLinkListRep : public MemTableRep {
 
   virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override;
 
-  virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix)
-      override;
-
   virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override;
 
  private:
@@ -429,19 +426,14 @@ MemTableRep::Iterator* HashLinkListRep::GetIterator() {
   return new FullListIterator(list, new_arena);
 }
 
-MemTableRep::Iterator* HashLinkListRep::GetPrefixIterator(
-  const Slice& prefix) {
-  auto bucket = GetBucket(prefix);
+MemTableRep::Iterator* HashLinkListRep::GetIterator(const Slice& slice) {
+  auto bucket = GetBucket(transform_->Transform(slice));
   if (bucket == nullptr) {
     return new EmptyIterator();
   }
   return new Iterator(this, bucket);
 }
 
-MemTableRep::Iterator* HashLinkListRep::GetIterator(const Slice& slice) {
-  return GetPrefixIterator(transform_->Transform(slice));
-}
-
 MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator() {
   return new DynamicIterator(*this);
 }
diff --git a/util/hash_skiplist_rep.cc b/util/hash_skiplist_rep.cc
index e27ec5949..21df9f62b 100644
--- a/util/hash_skiplist_rep.cc
+++ b/util/hash_skiplist_rep.cc
@@ -42,9 +42,6 @@ class HashSkipListRep : public MemTableRep {
 
   virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override;
 
-  virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix)
-      override;
-
   virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override;
 
  private:
@@ -307,18 +304,14 @@ MemTableRep::Iterator* HashSkipListRep::GetIterator() {
   return new Iterator(list, true, new_arena);
 }
 
-MemTableRep::Iterator* HashSkipListRep::GetPrefixIterator(const Slice& prefix) {
-  auto bucket = GetBucket(prefix);
+MemTableRep::Iterator* HashSkipListRep::GetIterator(const Slice& slice) {
+  auto bucket = GetBucket(transform_->Transform(slice));
   if (bucket == nullptr) {
     return new EmptyIterator();
   }
   return new Iterator(bucket, false);
 }
 
-MemTableRep::Iterator* HashSkipListRep::GetIterator(const Slice& slice) {
-  return GetPrefixIterator(transform_->Transform(slice));
-}
-
 MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
   return new DynamicIterator(*this);
 }
diff --git a/util/signal_test.cc b/util/signal_test.cc
index d3446818d..f51fa548e 100644
--- a/util/signal_test.cc
+++ b/util/signal_test.cc
@@ -3,7 +3,7 @@
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
-#include "util/stack_trace.h"
+#include "port/stack_trace.h"
 #include <assert.h>
 
 namespace {
@@ -26,7 +26,7 @@ void f3() {
 }  // namespace
 
 int main() {
-  rocksdb::InstallStackTraceHandler();
+  rocksdb::port::InstallStackTraceHandler();
 
   f3();
 
diff --git a/util/testharness.cc b/util/testharness.cc
index 85716cdae..4208d2c46 100644
--- a/util/testharness.cc
+++ b/util/testharness.cc
@@ -8,11 +8,11 @@
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 
 #include "util/testharness.h"
-
 #include <string>
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include "port/stack_trace.h"
 
 namespace rocksdb {
 namespace test {
@@ -39,6 +39,8 @@ bool RegisterTest(const char* base, const char* name, void (*func)()) {
 }
 
 int RunAllTests() {
+  port::InstallStackTraceHandler();
+
   const char* matcher = getenv("ROCKSDB_TESTS");
 
   int num = 0;
diff --git a/util/testharness.h b/util/testharness.h
index f15917816..52c29848d 100644
--- a/util/testharness.h
+++ b/util/testharness.h
@@ -12,10 +12,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sstream>
+#include "port/stack_trace.h"
 #include "rocksdb/env.h"
 #include "rocksdb/slice.h"
 #include "util/random.h"
-#include "util/stack_trace.h"
 
 namespace rocksdb {
 namespace test {
@@ -59,7 +59,7 @@ class Tester {
   ~Tester() {
     if (!ok_) {
       fprintf(stderr, "%s:%d:%s\n", fname_, line_, ss_.str().c_str());
-      PrintStack(2);
+      port::PrintStack(2);
       exit(1);
     }
   }
diff --git a/util/thread_local.cc b/util/thread_local.cc
index 1b4220b8f..bc8a4c7d2 100644
--- a/util/thread_local.cc
+++ b/util/thread_local.cc
@@ -14,20 +14,14 @@
 
 namespace rocksdb {
 
-std::unique_ptr<ThreadLocalPtr::StaticMeta> ThreadLocalPtr::StaticMeta::inst_;
 port::Mutex ThreadLocalPtr::StaticMeta::mutex_;
 #if !defined(OS_MACOSX)
 __thread ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
 #endif
 
-ThreadLocalPtr::StaticMeta* ThreadLocalPtr::StaticMeta::Instance() {
-  if (UNLIKELY(inst_ == nullptr)) {
-    MutexLock l(&mutex_);
-    if (inst_ == nullptr) {
-      inst_.reset(new StaticMeta());
-    }
-  }
-  return inst_.get();
+ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
+  static ThreadLocalPtr::StaticMeta inst;
+  return &inst;
 }
 
 void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
@@ -216,34 +210,34 @@ void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
 }
 
 ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
-    : id_(StaticMeta::Instance()->GetId()) {
+    : id_(Instance()->GetId()) {
   if (handler != nullptr) {
-    StaticMeta::Instance()->SetHandler(id_, handler);
+    Instance()->SetHandler(id_, handler);
   }
 }
 
 ThreadLocalPtr::~ThreadLocalPtr() {
-  StaticMeta::Instance()->ReclaimId(id_);
+  Instance()->ReclaimId(id_);
 }
 
 void* ThreadLocalPtr::Get() const {
-  return StaticMeta::Instance()->Get(id_);
+  return Instance()->Get(id_);
 }
 
 void ThreadLocalPtr::Reset(void* ptr) {
-  StaticMeta::Instance()->Reset(id_, ptr);
+  Instance()->Reset(id_, ptr);
 }
 
 void* ThreadLocalPtr::Swap(void* ptr) {
-  return StaticMeta::Instance()->Swap(id_, ptr);
+  return Instance()->Swap(id_, ptr);
 }
 
 bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
-  return StaticMeta::Instance()->CompareAndSwap(id_, ptr, expected);
+  return Instance()->CompareAndSwap(id_, ptr, expected);
 }
 
 void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
-  StaticMeta::Instance()->Scrape(id_, ptrs, replacement);
+  Instance()->Scrape(id_, ptrs, replacement);
 }
 
 }  // namespace rocksdb
diff --git a/util/thread_local.h b/util/thread_local.h
index a7728ed64..a037a9ceb 100644
--- a/util/thread_local.h
+++ b/util/thread_local.h
@@ -89,7 +89,7 @@ class ThreadLocalPtr {
 
   class StaticMeta {
    public:
-    static StaticMeta* Instance();
+    StaticMeta();
 
     // Return the next available Id
     uint32_t GetId();
@@ -117,8 +117,6 @@ class ThreadLocalPtr {
     void SetHandler(uint32_t id, UnrefHandler handler);
 
    private:
-    StaticMeta();
-
     // Get UnrefHandler for id with acquiring mutex
     // REQUIRES: mutex locked
     UnrefHandler GetHandler(uint32_t id);
@@ -136,9 +134,6 @@ class ThreadLocalPtr {
 
     static ThreadData* GetThreadLocal();
 
-    // Singleton instance
-    static std::unique_ptr<StaticMeta> inst_;
-
     uint32_t next_instance_id_;
     // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
     // frequently. This also prevents it from blowing up the vector space.
@@ -163,6 +158,8 @@ class ThreadLocalPtr {
     pthread_key_t pthread_key_;
   };
 
+  static StaticMeta* Instance();
+
   const uint32_t id_;
 };
 
diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc
index d273947a8..70dfa956e 100644
--- a/util/thread_local_test.cc
+++ b/util/thread_local_test.cc
@@ -49,7 +49,7 @@ struct Params {
 
 class IDChecker : public ThreadLocalPtr {
  public:
-  static uint32_t PeekId() { return StaticMeta::Instance()->PeekId(); }
+  static uint32_t PeekId() { return Instance()->PeekId(); }
 };
 
 }  // anonymous namespace
diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc
index 26ffcb456..ca1fb504a 100644
--- a/utilities/backupable/backupable_db.cc
+++ b/utilities/backupable/backupable_db.cc
@@ -87,7 +87,8 @@ void BackupableDBOptions::Dump(Logger* logger) const {
 // -------- BackupEngineImpl class ---------
 class BackupEngineImpl : public BackupEngine {
  public:
-  BackupEngineImpl(Env* db_env, const BackupableDBOptions& options);
+  BackupEngineImpl(Env* db_env, const BackupableDBOptions& options,
+                   bool read_only = false);
   ~BackupEngineImpl();
   Status CreateNewBackup(DB* db, bool flush_before_backup = false);
   Status PurgeOldBackups(uint32_t num_backups_to_keep);
@@ -149,7 +150,7 @@ class BackupEngineImpl : public BackupEngine {
 
     Status AddFile(const FileInfo& file_info);
 
-    void Delete();
+    void Delete(bool delete_meta = true);
 
     bool Empty() {
       return files_.empty();
@@ -258,6 +259,7 @@ class BackupEngineImpl : public BackupEngine {
 
   static const size_t kDefaultCopyFileBufferSize = 5 * 1024 * 1024LL;  // 5MB
   size_t copy_file_buffer_size_;
+  bool read_only_;
 };
 
 BackupEngine* BackupEngine::NewBackupEngine(
@@ -266,27 +268,34 @@ BackupEngine* BackupEngine::NewBackupEngine(
 }
 
 BackupEngineImpl::BackupEngineImpl(Env* db_env,
-                                   const BackupableDBOptions& options)
+                                   const BackupableDBOptions& options,
+                                   bool read_only)
     : stop_backup_(false),
       options_(options),
       db_env_(db_env),
       backup_env_(options.backup_env != nullptr ? options.backup_env : db_env_),
-      copy_file_buffer_size_(kDefaultCopyFileBufferSize) {
+      copy_file_buffer_size_(kDefaultCopyFileBufferSize),
+      read_only_(read_only) {
+  if (read_only_) {
+    Log(options_.info_log, "Starting read_only backup engine");
+  }
   options_.Dump(options_.info_log);
 
-  // create all the dirs we need
-  backup_env_->CreateDirIfMissing(GetAbsolutePath());
-  backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_);
-  if (options_.share_table_files) {
-    backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
-    backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
-                              &shared_directory_);
+  if (!read_only_) {
+    // create all the dirs we need
+    backup_env_->CreateDirIfMissing(GetAbsolutePath());
+    backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_);
+    if (options_.share_table_files) {
+      backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
+      backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
+                                &shared_directory_);
+    }
+    backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
+    backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()),
+                              &private_directory_);
+    backup_env_->CreateDirIfMissing(GetBackupMetaDir());
+    backup_env_->NewDirectory(GetBackupMetaDir(), &meta_directory_);
   }
-  backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
-  backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()),
-                            &private_directory_);
-  backup_env_->CreateDirIfMissing(GetBackupMetaDir());
-  backup_env_->NewDirectory(GetBackupMetaDir(), &meta_directory_);
 
   std::vector<std::string> backup_meta_files;
   backup_env_->GetChildren(GetBackupMetaDir(), &backup_meta_files);
@@ -295,8 +304,10 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
     BackupID backup_id = 0;
     sscanf(file.c_str(), "%u", &backup_id);
     if (backup_id == 0 || file != std::to_string(backup_id)) {
-      // invalid file name, delete that
-      backup_env_->DeleteFile(GetBackupMetaDir() + "/" + file);
+      if (!read_only_) {
+        // invalid file name, delete that
+        backup_env_->DeleteFile(GetBackupMetaDir() + "/" + file);
+      }
       continue;
     }
     assert(backups_.find(backup_id) == backups_.end());
@@ -306,6 +317,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
   }
 
   if (options_.destroy_old_data) { // Destory old data
+    assert(!read_only_);
     for (auto& backup : backups_) {
       backup.second.Delete();
       obsolete_backups_.push_back(backup.first);
@@ -319,9 +331,12 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
     for (auto& backup : backups_) {
       Status s = backup.second.LoadFromFile(options_.backup_dir);
       if (!s.ok()) {
-        Log(options_.info_log, "Backup %u corrupted - deleting -- %s",
-            backup.first, s.ToString().c_str());
-        backup.second.Delete();
+        Log(options_.info_log, "Backup %u corrupted -- %s", backup.first,
+            s.ToString().c_str());
+        if (!read_only_) {
+          Log(options_.info_log, "-> Deleting backup %u", backup.first);
+        }
+        backup.second.Delete(!read_only_);
         obsolete_backups_.push_back(backup.first);
       }
     }
@@ -331,6 +346,7 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
     }
 
     Status s = GetLatestBackupFileContents(&latest_backup_id_);
+
     // If latest backup file is corrupted or non-existent
     // set latest backup as the biggest backup we have
     // or 0 if we have no backups
@@ -349,16 +365,18 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
     itr = backups_.erase(itr);
   }
 
-  PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
-  GarbageCollection(true);
-  Log(options_.info_log,
-      "Initialized BackupEngine, the latest backup is %u.",
+  if (!read_only_) {
+    PutLatestBackupFileContents(latest_backup_id_);  // Ignore errors
+    GarbageCollection(true);
+  }
+  Log(options_.info_log, "Initialized BackupEngine, the latest backup is %u.",
       latest_backup_id_);
 }
 
 BackupEngineImpl::~BackupEngineImpl() { LogFlush(options_.info_log); }
 
 Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
+  assert(!read_only_);
   Status s;
   std::vector<std::string> live_files;
   VectorLogPtr live_wal_files;
@@ -499,6 +517,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
 }
 
 Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
+  assert(!read_only_);
   Log(options_.info_log, "Purging old backups, keeping %u",
       num_backups_to_keep);
   while (num_backups_to_keep < backups_.size()) {
@@ -512,6 +531,7 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
 }
 
 Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
+  assert(!read_only_);
   Log(options_.info_log, "Deleting backup %u", backup_id);
   auto backup = backups_.find(backup_id);
   if (backup == backups_.end()) {
@@ -662,6 +682,7 @@ Status BackupEngineImpl::GetLatestBackupFileContents(uint32_t* latest_backup) {
 // do something like 1. delete file, 2. write new file
 // We write to a tmp file and then atomically rename
 Status BackupEngineImpl::PutLatestBackupFileContents(uint32_t latest_backup) {
+  assert(!read_only_);
   Status s;
   unique_ptr<WritableFile> file;
   EnvOptions env_options;
@@ -871,6 +892,7 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir,
 }
 
 void BackupEngineImpl::GarbageCollection(bool full_scan) {
+  assert(!read_only_);
   Log(options_.info_log, "Starting garbage collection");
   std::vector<std::string> to_delete;
   for (auto& itr : backuped_file_infos_) {
@@ -973,7 +995,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(const FileInfo& file_info) {
   return Status::OK();
 }
 
-void BackupEngineImpl::BackupMeta::Delete() {
+void BackupEngineImpl::BackupMeta::Delete(bool delete_meta) {
   for (const auto& file : files_) {
     auto itr = file_infos_->find(file);
     assert(itr != file_infos_->end());
@@ -981,7 +1003,9 @@ void BackupEngineImpl::BackupMeta::Delete() {
   }
   files_.clear();
   // delete meta file
-  env_->DeleteFile(meta_filename_);
+  if (delete_meta) {
+    env_->DeleteFile(meta_filename_);
+  }
   timestamp_ = 0;
 }
 
@@ -1107,6 +1131,45 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
   return s;
 }
 
+// -------- BackupEngineReadOnlyImpl ---------
+class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
+ public:
+  BackupEngineReadOnlyImpl(Env* db_env, const BackupableDBOptions& options) {
+    backup_engine_ = new BackupEngineImpl(db_env, options, true);
+  }
+  virtual ~BackupEngineReadOnlyImpl() {}
+
+  virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) {
+    backup_engine_->GetBackupInfo(backup_info);
+  }
+
+  virtual Status RestoreDBFromBackup(
+      BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) {
+    return backup_engine_->RestoreDBFromBackup(backup_id, db_dir, wal_dir,
+                                               restore_options);
+  }
+
+  virtual Status RestoreDBFromLatestBackup(
+      const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) {
+    return backup_engine_->RestoreDBFromLatestBackup(db_dir, wal_dir,
+                                                     restore_options);
+  }
+
+ private:
+  BackupEngineImpl* backup_engine_;
+};
+
+BackupEngineReadOnly* BackupEngineReadOnly::NewReadOnlyBackupEngine(
+    Env* db_env, const BackupableDBOptions& options) {
+  if (options.destroy_old_data) {
+    assert(false);
+    return nullptr;
+  }
+  return new BackupEngineReadOnlyImpl(db_env, options);
+}
+
 // --- BackupableDB methods --------
 
 BackupableDB::BackupableDB(DB* db, const BackupableDBOptions& options)
diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc
index f6ffd9487..563800556 100644
--- a/utilities/backupable/backupable_db_test.cc
+++ b/utilities/backupable/backupable_db_test.cc
@@ -178,6 +178,12 @@ class TestEnv : public EnvWrapper {
     return EnvWrapper::NewWritableFile(f, r, options);
   }
 
+  virtual Status DeleteFile(const std::string& fname) override {
+    ASSERT_GT(limit_delete_files_, 0);
+    limit_delete_files_--;
+    return EnvWrapper::DeleteFile(fname);
+  }
+
   void AssertWrittenFiles(std::vector<std::string>& should_have_written) {
     sort(should_have_written.begin(), should_have_written.end());
     sort(written_files_.begin(), written_files_.end());
@@ -192,6 +198,8 @@ class TestEnv : public EnvWrapper {
     limit_written_files_ = limit;
   }
 
+  void SetLimitDeleteFiles(uint64_t limit) { limit_delete_files_ = limit; }
+
   void SetDummySequentialFile(bool dummy_sequential_file) {
     dummy_sequential_file_ = dummy_sequential_file;
   }
@@ -200,7 +208,8 @@ class TestEnv : public EnvWrapper {
   bool dummy_sequential_file_ = false;
   std::vector<std::string> written_files_;
   uint64_t limit_written_files_ = 1000000;
-}; // TestEnv
+  uint64_t limit_delete_files_ = 1000000;
+};  // TestEnv
 
 class FileManager : public EnvWrapper {
  public:
@@ -864,7 +873,38 @@ TEST(BackupableDBTest, RateLimiting) {
   }
 }
 
-} // anon namespace
+TEST(BackupableDBTest, ReadOnlyBackupEngine) {
+  DestroyDB(dbname_, Options());
+  OpenBackupableDB(true);
+  FillDB(db_.get(), 0, 100);
+  ASSERT_OK(db_->CreateNewBackup(true));
+  FillDB(db_.get(), 100, 200);
+  ASSERT_OK(db_->CreateNewBackup(true));
+  CloseBackupableDB();
+  DestroyDB(dbname_, Options());
+
+  backupable_options_->destroy_old_data = false;
+  test_backup_env_->ClearWrittenFiles();
+  test_backup_env_->SetLimitDeleteFiles(0);
+  auto read_only_backup_engine =
+      BackupEngineReadOnly::NewReadOnlyBackupEngine(env_, *backupable_options_);
+  std::vector<BackupInfo> backup_info;
+  read_only_backup_engine->GetBackupInfo(&backup_info);
+  ASSERT_EQ(backup_info.size(), 2U);
+
+  RestoreOptions restore_options(false);
+  ASSERT_OK(read_only_backup_engine->RestoreDBFromLatestBackup(
+      dbname_, dbname_, restore_options));
+  delete read_only_backup_engine;
+  std::vector<std::string> should_have_written;
+  test_backup_env_->AssertWrittenFiles(should_have_written);
+
+  DB* db = OpenDB();
+  AssertExists(db, 0, 200);
+  delete db;
+}
+
+}  // anon namespace
 
 } //  namespace rocksdb