Merge branch 'master' into performance

12 years ago · 551e9428ce
parent ef2211a9ca b1d2de4a40
commit 551e9428ce
37 changed files with 523 additions and 851 deletions
--- a/INSTALL.md
+++ b/INSTALL.md
@ -19,13 +19,13 @@ libraries. You are on your own.
 * **Linux**
    * Upgrade your gcc to version at least 4.7 to get C++11 support.
-    * Install gflags. If you're on Ubuntu, here's a nice tutorial:
+    * Install gflags. First, try: `sudo apt-get install libgflags-dev`.
      If this doesn't work and you're using Ubuntu, here's a nice tutorial:
      (http://askubuntu.com/questions/312173/installing-gflags-12-04)
    * Install snappy. This is usually as easy as:
      `sudo apt-get install libsnappy-dev`.
    * Install zlib. Try: `sudo apt-get install zlib1g-dev`.
    * Install bzip2: `sudo apt-get install libbz2-dev`.
    * Install gflags: `sudo apt-get install libgflags-dev`.
 * **OS X**:
    * Install latest C++ compiler that supports C++ 11:
        * Update XCode:  run `xcode-select --install` (or install it from XCode App's settting).
--- a/4
+++ b/4
@ -16,8 +16,8 @@ The core of this code has been derived from open-source leveldb.
 The code under this directory implements a system for maintaining a
 persistent key/value store.
-See doc/index.html for more explanation.
+See doc/index.html and github wiki (https://github.com/facebook/rocksdb/wiki)
-See doc/impl.html for a brief overview of the implementation.
+for more explanation.
 The public interface is in include/*.  Callers should not include or
 rely on the details of any other header files in this package.  Those
--- a/build_tools/regression_build_test.sh
+++ b/build_tools/regression_build_test.sh
@ -65,7 +65,7 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
    --sync=0 \
    --threads=8 > ${STAT_FILE}.overwrite
-# fill up the db for readrandom benchmark
+# fill up the db for readrandom benchmark (1GB total size)
 ./db_bench \
    --benchmarks=fillseq \
    --db=$DATA_DIR \
@ -83,7 +83,7 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
    --sync=0 \
    --threads=1 > /dev/null
-# measure readrandom
+# measure readrandom with 6GB block cache
 ./db_bench \
    --benchmarks=readrandom \
    --db=$DATA_DIR \
@ -102,6 +102,25 @@ OPT=-DNDEBUG make db_bench -j$(nproc)
    --sync=0 \
    --threads=32 > ${STAT_FILE}.readrandom
 # measure readrandom with 300MB block cache
 ./db_bench \
    --benchmarks=readrandom \
    --db=$DATA_DIR \
    --use_existing_db=1 \
    --bloom_bits=10 \
    --num=$NUM \
    --reads=$NUM \
    --cache_size=314572800 \
    --cache_numshardbits=8 \
    --open_files=55000 \
    --disable_seek_compaction=1 \
    --statistics=1 \
    --histogram=1 \
    --disable_data_sync=1 \
    --disable_wal=1 \
    --sync=0 \
    --threads=32 > ${STAT_FILE}.readrandomsmallblockcache
 # measure memtable performance -- none of the data gets flushed to disk
 ./db_bench \
    --benchmarks=fillrandom,readrandom, \
@ -154,5 +173,6 @@ function send_benchmark_to_ods {
 send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
 send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
 send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
 send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache
 send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom
 send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@ -191,6 +191,10 @@ DEFINE_int32(universal_max_merge_width, 0, "The max number of files to compact"
 DEFINE_int32(universal_max_size_amplification_percent, 0,
             "The max size amplification for universal style compaction");
 DEFINE_int32(universal_compression_size_percent, -1,
             "The percentage of the database to compress for universal "
             "compaction. -1 means compress everything.");
 DEFINE_int64(cache_size, -1, "Number of bytes to use as a cache of uncompressed"
             "data. Negative means use default settings.");
@ -325,6 +329,23 @@ DEFINE_string(compression_type, "snappy",
 static enum rocksdb::CompressionType FLAGS_compression_type_e =
    rocksdb::kSnappyCompression;
 DEFINE_int32(compression_level, -1,
             "Compression level. For zlib this should be -1 for the "
             "default level, or between 0 and 9.");
 static bool ValidateCompressionLevel(const char* flagname, int32_t value) {
  if (value < -1 || value > 9) {
    fprintf(stderr, "Invalid value for --%s: %d, must be between -1 and 9\n",
            flagname, value);
    return false;
  }
  return true;
 }
 static const bool FLAGS_compression_level_dummy =
  google::RegisterFlagValidator(&FLAGS_compression_level,
                                &ValidateCompressionLevel);
 DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
             " from this level. Levels with number < min_level_to_compress are"
             " not compressed. Otherwise, apply compression_type to "
@ -434,12 +455,11 @@ static bool ValidatePrefixSize(const char* flagname, int32_t value) {
  }
  return true;
 }
-DEFINE_int32(prefix_size, 0, "Control the prefix size for PrefixHashRep");
+DEFINE_int32(prefix_size, 0, "Control the prefix size for HashSkipList");
 enum RepFactory {
  kSkipList,
  kPrefixHash,
  kUnsorted,
  kVectorRep
 };
 enum RepFactory StringToRepFactory(const char* ctype) {
@ -449,8 +469,6 @@ enum RepFactory StringToRepFactory(const char* ctype) {
    return kSkipList;
  else if (!strcasecmp(ctype, "prefix_hash"))
    return kPrefixHash;
  else if (!strcasecmp(ctype, "unsorted"))
    return kUnsorted;
  else if (!strcasecmp(ctype, "vector"))
    return kVectorRep;
@ -807,9 +825,6 @@ class Benchmark {
      case kSkipList:
        fprintf(stdout, "Memtablerep: skip_list\n");
        break;
      case kUnsorted:
        fprintf(stdout, "Memtablerep: unsorted\n");
        break;
      case kVectorRep:
        fprintf(stdout, "Memtablerep: vector\n");
        break;
@ -1334,14 +1349,8 @@ class Benchmark {
    }
    switch (FLAGS_rep_factory) {
      case kPrefixHash:
-        options.memtable_factory.reset(
+        options.memtable_factory.reset(NewHashSkipListRepFactory(
-          new PrefixHashRepFactory(NewFixedPrefixTransform(FLAGS_prefix_size))
+            NewFixedPrefixTransform(FLAGS_prefix_size)));
        );
        break;
      case kUnsorted:
        options.memtable_factory.reset(
          new UnsortedRepFactory
        );
        break;
      case kSkipList:
        // no need to do anything
@ -1368,6 +1377,7 @@ class Benchmark {
    options.level0_slowdown_writes_trigger =
      FLAGS_level0_slowdown_writes_trigger;
    options.compression = FLAGS_compression_type_e;
    options.compression_opts.level = FLAGS_compression_level;
    options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
    options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;
    if (FLAGS_min_level_to_compress >= 0) {
@ -1429,6 +1439,10 @@ class Benchmark {
      options.compaction_options_universal.max_size_amplification_percent =
        FLAGS_universal_max_size_amplification_percent;
    }
    if (FLAGS_universal_compression_size_percent != -1) {
      options.compaction_options_universal.compression_size_percent =
        FLAGS_universal_compression_size_percent;
    }
    Status s;
    if(FLAGS_readonly) {
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@ -51,6 +51,7 @@
 #include "util/auto_roll_logger.h"
 #include "util/build_version.h"
 #include "util/coding.h"
 #include "util/hash_skiplist_rep.h"
 #include "util/logging.h"
 #include "util/mutexlock.h"
 #include "util/perf_context_imp.h"
@ -163,10 +164,10 @@ Options SanitizeOptions(const std::string& dbname,
    Log(result.info_log, "Compaction filter specified, ignore factory");
  }
  if (result.prefix_extractor) {
-    // If a prefix extractor has been supplied and a PrefixHashRepFactory is
+    // If a prefix extractor has been supplied and a HashSkipListRepFactory is
    // being used, make sure that the latter uses the former as its transform
    // function.
-    auto factory = dynamic_cast<PrefixHashRepFactory*>(
+    auto factory = dynamic_cast<HashSkipListRepFactory*>(
      result.memtable_factory.get());
    if (factory &&
        factory->GetTransform() != result.prefix_extractor) {
@ -236,7 +237,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
      mutex_(options.use_adaptive_mutex),
      shutting_down_(nullptr),
      bg_cv_(&mutex_),
-      mem_rep_factory_(options_.memtable_factory),
+      mem_rep_factory_(options_.memtable_factory.get()),
      mem_(new MemTable(internal_comparator_, mem_rep_factory_,
        NumberLevels(), options_)),
      logfile_number_(0),
@ -516,6 +517,19 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
 // files in sst_delete_files and log_delete_files.
 // It is not necessary to hold the mutex when invoking this method.
 void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
  // free pending memtables
  for (auto m : state.memtables_to_free) {
    delete m;
  }
  // check if there is anything to do
  if (!state.all_files.size() &&
      !state.sst_delete_files.size() &&
      !state.log_delete_files.size()) {
    return;
  }
  // this checks if FindObsoleteFiles() was run before. If not, don't do
  // PurgeObsoleteFiles(). If FindObsoleteFiles() was run, we need to also
  // run PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true
@ -1170,7 +1184,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
  // Replace immutable memtable with the generated Table
  s = imm_.InstallMemtableFlushResults(
    mems, versions_.get(), s, &mutex_, options_.info_log.get(),
-    file_number, pending_outputs_);
+    file_number, pending_outputs_, &deletion_state.memtables_to_free);
  if (s.ok()) {
    if (madeProgress) {
@ -1656,7 +1670,7 @@ Status DBImpl::BackgroundFlush(bool* madeProgress,
 void DBImpl::BackgroundCallFlush() {
  bool madeProgress = false;
-  DeletionState deletion_state;
+  DeletionState deletion_state(options_.max_write_buffer_number);
  assert(bg_flush_scheduled_);
  MutexLock l(&mutex_);
@ -1702,7 +1716,7 @@ void DBImpl::TEST_PurgeObsoleteteWAL() {
 void DBImpl::BackgroundCallCompaction() {
  bool madeProgress = false;
-  DeletionState deletion_state;
+  DeletionState deletion_state(options_.max_write_buffer_number);
  MaybeDumpStats();
@ -1732,6 +1746,7 @@ void DBImpl::BackgroundCallCompaction() {
  // FindObsoleteFiles(). This is because deletion_state does not catch
  // all created files if compaction failed.
  FindObsoleteFiles(deletion_state, !s.ok());
  // delete unnecessary files if any, this is done outside the mutex
  if (deletion_state.HaveSomethingToDelete()) {
    mutex_.Unlock();
@ -2492,25 +2507,20 @@ struct IterState {
 static void CleanupIteratorState(void* arg1, void* arg2) {
  IterState* state = reinterpret_cast<IterState*>(arg1);
-  std::vector<MemTable*> to_delete;
+  DBImpl::DeletionState deletion_state(state->db->GetOptions().
-  to_delete.reserve(state->mem.size());
+                                       max_write_buffer_number);
  state->mu->Lock();
  for (unsigned int i = 0; i < state->mem.size(); i++) {
    MemTable* m = state->mem[i]->Unref();
    if (m != nullptr) {
-      to_delete.push_back(m);
+      deletion_state.memtables_to_free.push_back(m);
    }
  }
  state->version->Unref();
  // delete only the sst obsolete files
  DBImpl::DeletionState deletion_state;
  // fast path FindObsoleteFiles
  state->db->FindObsoleteFiles(deletion_state, false, true);
  state->mu->Unlock();
  state->db->PurgeObsoleteFiles(deletion_state);
  // delete obsolete memtables outside the db-mutex
  for (MemTable* m : to_delete) delete m;
  delete state;
 }
 }  // namespace
@ -2612,8 +2622,10 @@ Status DBImpl::GetImpl(const ReadOptions& options,
  BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
  if (mem->Get(lkey, value, &s, merge_context, options_)) {
    // Done
    RecordTick(options_.statistics.get(), MEMTABLE_HIT);
  } else if (imm.Get(lkey, value, &s, merge_context, options_)) {
    // Done
    RecordTick(options_.statistics.get(), MEMTABLE_HIT);
  } else {
    StopWatchNano from_files_timer(env_, false);
    StartPerfTimer(&from_files_timer);
@ -2622,6 +2634,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
                 options_, value_found);
    have_stat_update = true;
    BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer);
    RecordTick(options_.statistics.get(), MEMTABLE_MISS);
  }
  StopWatchNano post_process_timer(env_, false);
@ -3514,6 +3527,33 @@ void DBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData> *metadata) {
  return versions_->GetLiveFilesMetaData(metadata);
 }
 Status DBImpl::GetDbIdentity(std::string& identity) {
  std::string idfilename = IdentityFileName(dbname_);
  unique_ptr<SequentialFile> idfile;
  const EnvOptions soptions;
  Status s = env_->NewSequentialFile(idfilename, &idfile, soptions);
  if (!s.ok()) {
    return s;
  }
  uint64_t file_size;
  s = env_->GetFileSize(idfilename, &file_size);
  if (!s.ok()) {
    return s;
  }
  char buffer[file_size];
  Slice id;
  s = idfile->Read(file_size, &id, buffer);
  if (!s.ok()) {
    return s;
  }
  identity.assign(id.ToString());
  // If last character is '\n' remove it from identity
  if (identity.size() > 0 && identity.back() == '\n') {
    identity.pop_back();
  }
  return s;
 }
 // Default implementations of convenience methods that subclasses of DB
 // can call if they wish
 Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
--- a/db/db_impl.h
+++ b/db/db_impl.h
@ -85,6 +85,8 @@ class DBImpl : public DB {
  virtual void GetLiveFilesMetaData(
    std::vector<LiveFileMetaData> *metadata);
  virtual Status GetDbIdentity(std::string& identity);
  // Extra methods (for testing) that are not in the public DB interface
  // Compact any files in the named level that overlap [*begin, *end]
@ -129,10 +131,12 @@ class DBImpl : public DB {
  struct DeletionState {
    inline bool HaveSomethingToDelete() const {
-      return all_files.size() ||
+      return  memtables_to_free.size() ||
        all_files.size() ||
        sst_delete_files.size() ||
        log_delete_files.size();
    }
    // a list of all files that we'll consider deleting
    // (every once in a while this is filled up with all files
    // in the DB directory)
@ -147,14 +151,18 @@ class DBImpl : public DB {
    // a list of log files that we need to delete
    std::vector<uint64_t> log_delete_files;
    // a list of memtables to be free
    std::vector<MemTable *> memtables_to_free;
    // the current manifest_file_number, log_number and prev_log_number
    // that corresponds to the set of files in 'live'.
    uint64_t manifest_file_number, log_number, prev_log_number;
-    DeletionState() {
+    explicit DeletionState(const int num_memtables = 0) {
      manifest_file_number = 0;
      log_number = 0;
      prev_log_number = 0;
      memtables_to_free.reserve(num_memtables);
    }
  };
@ -309,7 +317,7 @@ class DBImpl : public DB {
  port::Mutex mutex_;
  port::AtomicPointer shutting_down_;
  port::CondVar bg_cv_;          // Signalled when background work finishes
-  std::shared_ptr<MemTableRepFactory> mem_rep_factory_;
+  MemTableRepFactory* mem_rep_factory_;
  MemTable* mem_;
  MemTableList imm_;             // Memtable that are not changing
  uint64_t logfile_number_;
--- a/db/db_iter.cc
+++ b/db/db_iter.cc
@ -61,7 +61,7 @@ class DBIter: public Iterator {
         const Comparator* cmp, Iterator* iter, SequenceNumber s)
      : dbname_(dbname),
        env_(env),
-        logger_(options.info_log),
+        logger_(options.info_log.get()),
        user_comparator_(cmp),
        user_merge_operator_(options.merge_operator.get()),
        iter_(iter),
@ -123,7 +123,7 @@ class DBIter: public Iterator {
  const std::string* const dbname_;
  Env* const env_;
-  shared_ptr<Logger> logger_;
+  Logger* logger_;
  const Comparator* const user_comparator_;
  const MergeOperator* const user_merge_operator_;
  Iterator* const iter_;
@ -302,7 +302,7 @@ void DBIter::MergeValuesNewToOld() {
      // ignore corruption if there is any.
      const Slice value = iter_->value();
      user_merge_operator_->FullMerge(ikey.user_key, &value, operands,
-                                      &saved_value_, logger_.get());
+                                      &saved_value_, logger_);
      // iter_ is positioned after put
      iter_->Next();
      return;
@ -319,7 +319,7 @@ void DBIter::MergeValuesNewToOld() {
                                               Slice(operands[0]),
                                               Slice(operands[1]),
                                               &merge_result,
-                                               logger_.get())) {
+                                               logger_)) {
          operands.pop_front();
          swap(operands.front(), merge_result);
        } else {
@ -336,7 +336,7 @@ void DBIter::MergeValuesNewToOld() {
  // feed null as the existing value to the merge operator, such that
  // client can differentiate this scenario and do things accordingly.
  user_merge_operator_->FullMerge(saved_key_, nullptr, operands,
-                                  &saved_value_, logger_.get());
+                                  &saved_value_, logger_);
 }
 void DBIter::Prev() {
--- a/db/db_statistics.cc
+++ b/db/db_statistics.cc
@ -0,0 +1,14 @@
 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 #include "db/db_statistics.h"
 namespace rocksdb {
 std::shared_ptr<Statistics> CreateDBStatistics() {
  return std::make_shared<DBStatistics>();
 }
 } // namespace rocksdb
--- a/db/db_statistics.h
+++ b/db/db_statistics.h
@ -58,8 +58,6 @@ class DBStatistics: public Statistics {
  std::vector<HistogramImpl> allHistograms_;
 };
-std::shared_ptr<Statistics> CreateDBStatistics() {
+std::shared_ptr<Statistics> CreateDBStatistics();
  return std::make_shared<DBStatistics>();
 }
 } // namespace rocksdb
--- a/db/db_test.cc
+++ b/db/db_test.cc
@ -245,7 +245,6 @@ class DBTest {
  enum OptionConfig {
    kDefault,
    kVectorRep,
    kUnsortedRep,
    kMergePut,
    kFilter,
    kUncompressed,
@ -256,7 +255,7 @@ class DBTest {
    kCompactOnFlush,
    kPerfOptions,
    kDeletesFilterFirst,
-    kPrefixHashRep,
+    kHashSkipList,
    kUniversalCompaction,
    kCompressedBlockCache,
    kEnd
@ -340,9 +339,9 @@ class DBTest {
  Options CurrentOptions() {
    Options options;
    switch (option_config_) {
-      case kPrefixHashRep:
+      case kHashSkipList:
-        options.memtable_factory.reset(new
+        options.memtable_factory.reset(
-          PrefixHashRepFactory(NewFixedPrefixTransform(1)));
+            NewHashSkipListRepFactory(NewFixedPrefixTransform(1)));
        break;
      case kMergePut:
        options.merge_operator = MergeOperators::CreatePutOperator();
@ -376,9 +375,6 @@ class DBTest {
      case kDeletesFilterFirst:
        options.filter_deletes = true;
        break;
      case kUnsortedRep:
        options.memtable_factory.reset(new UnsortedRepFactory);
        break;
      case kVectorRep:
        options.memtable_factory.reset(new VectorRepFactory(100));
        break;
@ -1776,31 +1772,23 @@ TEST(DBTest, ManifestRollOver) {
 TEST(DBTest, IdentityAcrossRestarts) {
  do {
-    std::string idfilename = IdentityFileName(dbname_);
+    std::string id1;
-    unique_ptr<SequentialFile> idfile;
+    ASSERT_OK(db_->GetDbIdentity(id1));
    const EnvOptions soptions;
    ASSERT_OK(env_->NewSequentialFile(idfilename, &idfile, soptions));
    char buffer1[100];
    Slice id1;
    ASSERT_OK(idfile->Read(100, &id1, buffer1));
    Options options = CurrentOptions();
    Reopen(&options);
-    char buffer2[100];
+    std::string id2;
-    Slice id2;
+    ASSERT_OK(db_->GetDbIdentity(id2));
    ASSERT_OK(env_->NewSequentialFile(idfilename, &idfile, soptions));
    ASSERT_OK(idfile->Read(100, &id2, buffer2));
    // id1 should match id2 because identity was not regenerated
-    ASSERT_EQ(id1.ToString(), id2.ToString());
+    ASSERT_EQ(id1.compare(id2), 0);
    std::string idfilename = IdentityFileName(dbname_);
    ASSERT_OK(env_->DeleteFile(idfilename));
    Reopen(&options);
-    char buffer3[100];
+    std::string id3;
-    Slice id3;
+    ASSERT_OK(db_->GetDbIdentity(id3));
-    ASSERT_OK(env_->NewSequentialFile(idfilename, &idfile, soptions));
+    // id1 should NOT match id3 because identity was regenerated
-    ASSERT_OK(idfile->Read(100, &id3, buffer3));
+    ASSERT_NE(id1.compare(id3), 0);
    // id1 should NOT match id2 because identity was regenerated
    ASSERT_NE(id1.ToString(0), id3.ToString());
  } while (ChangeCompactOptions());
 }
@ -1856,94 +1844,6 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
  }
 }
 // TODO(kailiu) disable the in non-linux platforms to temporarily solve
 // the unit test failure.
 #ifdef OS_LINUX
 TEST(DBTest, CompressedCache) {
  int num_iter = 80;
  // Run this test three iterations.
  // Iteration 1: only a uncompressed block cache
  // Iteration 2: only a compressed block cache
  // Iteration 3: both block cache and compressed cache
  for (int iter = 0; iter < 3; iter++) {
    Options options = CurrentOptions();
    options.write_buffer_size = 64*1024;        // small write buffer
    options.statistics = rocksdb::CreateDBStatistics();
    switch (iter) {
      case 0:
        // only uncompressed block cache
        options.block_cache = NewLRUCache(8*1024);
        options.block_cache_compressed = nullptr;
        break;
      case 1:
        // no block cache, only compressed cache
        options.no_block_cache = true;
        options.block_cache = nullptr;
        options.block_cache_compressed = NewLRUCache(8*1024);
        break;
      case 2:
        // both compressed and uncompressed block cache
        options.block_cache = NewLRUCache(1024);
        options.block_cache_compressed = NewLRUCache(8*1024);
        break;
      default:
        ASSERT_TRUE(false);
    }
    Reopen(&options);
    Random rnd(301);
    // Write 8MB (80 values, each 100K)
    ASSERT_EQ(NumTableFilesAtLevel(0), 0);
    std::vector<std::string> values;
    std::string str;
    for (int i = 0; i < num_iter; i++) {
      if (i % 4 == 0) {        // high compression ratio
        str = RandomString(&rnd, 1000);
      }
      values.push_back(str);
      ASSERT_OK(Put(Key(i), values[i]));
    }
    // flush all data from memtable so that reads are from block cache
    dbfull()->Flush(FlushOptions());
    for (int i = 0; i < num_iter; i++) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    }
    // check that we triggered the appropriate code paths in the cache
    switch (iter) {
      case 0:
        // only uncompressed block cache
        ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_EQ(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      case 1:
        // no block cache, only compressed cache
        ASSERT_EQ(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_GT(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      case 2:
        // both compressed and uncompressed block cache
        ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_GT(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      default:
        ASSERT_TRUE(false);
    }
  }
 }
 #endif
 TEST(DBTest, CompactionTrigger) {
  Options options = CurrentOptions();
  options.write_buffer_size = 100<<10; //100KB
@ -2185,9 +2085,91 @@ TEST(DBTest, UniversalCompactionOptions) {
  }
 }
-// TODO(kailiu) disable the in non-linux platforms to temporarily solve
+#if defined(SNAPPY) && defined(ZLIB) && defined(BZIP2)
-// the unit test failure.
+TEST(DBTest, CompressedCache) {
-#ifdef OS_LINUX
+  int num_iter = 80;
  // Run this test three iterations.
  // Iteration 1: only a uncompressed block cache
  // Iteration 2: only a compressed block cache
  // Iteration 3: both block cache and compressed cache
  for (int iter = 0; iter < 3; iter++) {
    Options options = CurrentOptions();
    options.write_buffer_size = 64*1024;        // small write buffer
    options.statistics = rocksdb::CreateDBStatistics();
    switch (iter) {
      case 0:
        // only uncompressed block cache
        options.block_cache = NewLRUCache(8*1024);
        options.block_cache_compressed = nullptr;
        break;
      case 1:
        // no block cache, only compressed cache
        options.no_block_cache = true;
        options.block_cache = nullptr;
        options.block_cache_compressed = NewLRUCache(8*1024);
        break;
      case 2:
        // both compressed and uncompressed block cache
        options.block_cache = NewLRUCache(1024);
        options.block_cache_compressed = NewLRUCache(8*1024);
        break;
      default:
        ASSERT_TRUE(false);
    }
    Reopen(&options);
    Random rnd(301);
    // Write 8MB (80 values, each 100K)
    ASSERT_EQ(NumTableFilesAtLevel(0), 0);
    std::vector<std::string> values;
    std::string str;
    for (int i = 0; i < num_iter; i++) {
      if (i % 4 == 0) {        // high compression ratio
        str = RandomString(&rnd, 1000);
      }
      values.push_back(str);
      ASSERT_OK(Put(Key(i), values[i]));
    }
    // flush all data from memtable so that reads are from block cache
    dbfull()->Flush(FlushOptions());
    for (int i = 0; i < num_iter; i++) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    }
    // check that we triggered the appropriate code paths in the cache
    switch (iter) {
      case 0:
        // only uncompressed block cache
        ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_EQ(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      case 1:
        // no block cache, only compressed cache
        ASSERT_EQ(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_GT(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      case 2:
        // both compressed and uncompressed block cache
        ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
                  0);
        ASSERT_GT(options.statistics.get()->getTickerCount
                  (BLOCK_CACHE_COMPRESSED_MISS), 0);
        break;
      default:
        ASSERT_TRUE(false);
    }
  }
 }
 static std::string CompressibleString(Random* rnd, int len) {
  std::string r;
  test::CompressibleString(rnd, 0.8, len, &r);
@ -4535,6 +4517,10 @@ class ModelDB: public DB {
    return Status::OK();
  }
  virtual Status GetDbIdentity(std::string& identity) {
    return Status::OK();
  }
  virtual SequenceNumber GetLatestSequenceNumber() const {
    return 0;
  }
@ -4647,7 +4633,7 @@ TEST(DBTest, Randomized) {
      // TODO(sanjay): Test Get() works
      int p = rnd.Uniform(100);
      int minimum = 0;
-      if (option_config_ == kPrefixHashRep) {
+      if (option_config_ == kHashSkipList) {
        minimum = 1;
      }
      if (p < 45) {                               // Put
@ -4817,90 +4803,82 @@ void PrefixScanInit(DBTest *dbtest) {
 }
 TEST(DBTest, PrefixScan) {
-  for (int it = 0; it < 2; ++it) {
+  ReadOptions ro = ReadOptions();
-    ReadOptions ro = ReadOptions();
+  int count;
-    int count;
+  Slice prefix;
-    Slice prefix;
+  Slice key;
-    Slice key;
+  char buf[100];
-    char buf[100];
+  Iterator* iter;
-    Iterator* iter;
+  snprintf(buf, sizeof(buf), "03______:");
-    snprintf(buf, sizeof(buf), "03______:");
+  prefix = Slice(buf, 8);
-    prefix = Slice(buf, 8);
+  key = Slice(buf, 9);
-    key = Slice(buf, 9);
+  auto prefix_extractor = NewFixedPrefixTransform(8);
-    auto prefix_extractor = NewFixedPrefixTransform(8);
+  // db configs
-    // db configs
+  env_->count_random_reads_ = true;
-    env_->count_random_reads_ = true;
+  Options options = CurrentOptions();
-    Options options = CurrentOptions();
+  options.env = env_;
-    options.env = env_;
+  options.no_block_cache = true;
-    options.no_block_cache = true;
+  options.filter_policy =  NewBloomFilterPolicy(10);
-    options.filter_policy =  NewBloomFilterPolicy(10);
+  options.prefix_extractor = prefix_extractor;
-    options.prefix_extractor = prefix_extractor;
+  options.whole_key_filtering = false;
-    options.whole_key_filtering = false;
+  options.disable_auto_compactions = true;
-    options.disable_auto_compactions = true;
+  options.max_background_compactions = 2;
-    options.max_background_compactions = 2;
+  options.create_if_missing = true;
-    options.create_if_missing = true;
+  options.disable_seek_compaction = true;
-    options.disable_seek_compaction = true;
+  options.memtable_factory.reset(NewHashSkipListRepFactory(prefix_extractor));
    if (it == 0) {
      options.memtable_factory.reset(NewHashSkipListRepFactory(
            prefix_extractor));
    } else {
      options.memtable_factory = std::make_shared<PrefixHashRepFactory>(
          prefix_extractor);
    }
-    // prefix specified, with blooms: 2 RAND I/Os
+  // prefix specified, with blooms: 2 RAND I/Os
-    // SeekToFirst
+  // SeekToFirst
-    DestroyAndReopen(&options);
+  DestroyAndReopen(&options);
-    PrefixScanInit(this);
+  PrefixScanInit(this);
-    count = 0;
+  count = 0;
-    env_->random_read_counter_.Reset();
+  env_->random_read_counter_.Reset();
-    ro.prefix = &prefix;
+  ro.prefix = &prefix;
-    iter = db_->NewIterator(ro);
+  iter = db_->NewIterator(ro);
-    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-      assert(iter->key().starts_with(prefix));
+    assert(iter->key().starts_with(prefix));
-      count++;
+    count++;
-    }
+  }
-    ASSERT_OK(iter->status());
+  ASSERT_OK(iter->status());
-    delete iter;
+  delete iter;
-    ASSERT_EQ(count, 2);
+  ASSERT_EQ(count, 2);
-    ASSERT_EQ(env_->random_read_counter_.Read(), 2);
+  ASSERT_EQ(env_->random_read_counter_.Read(), 2);
-    // prefix specified, with blooms: 2 RAND I/Os
+  // prefix specified, with blooms: 2 RAND I/Os
-    // Seek
+  // Seek
-    DestroyAndReopen(&options);
+  DestroyAndReopen(&options);
-    PrefixScanInit(this);
+  PrefixScanInit(this);
-    count = 0;
+  count = 0;
-    env_->random_read_counter_.Reset();
+  env_->random_read_counter_.Reset();
-    ro.prefix = &prefix;
+  ro.prefix = &prefix;
-    iter = db_->NewIterator(ro);
+  iter = db_->NewIterator(ro);
-    for (iter->Seek(key); iter->Valid(); iter->Next()) {
+  for (iter->Seek(key); iter->Valid(); iter->Next()) {
-      assert(iter->key().starts_with(prefix));
+    assert(iter->key().starts_with(prefix));
-      count++;
+    count++;
-    }
+  }
-    ASSERT_OK(iter->status());
+  ASSERT_OK(iter->status());
-    delete iter;
+  delete iter;
-    ASSERT_EQ(count, 2);
+  ASSERT_EQ(count, 2);
-    ASSERT_EQ(env_->random_read_counter_.Read(), 2);
+  ASSERT_EQ(env_->random_read_counter_.Read(), 2);
-    // no prefix specified: 11 RAND I/Os
+  // no prefix specified: 11 RAND I/Os
-    DestroyAndReopen(&options);
+  DestroyAndReopen(&options);
-    PrefixScanInit(this);
+  PrefixScanInit(this);
-    count = 0;
+  count = 0;
-    env_->random_read_counter_.Reset();
+  env_->random_read_counter_.Reset();
-    iter = db_->NewIterator(ReadOptions());
+  iter = db_->NewIterator(ReadOptions());
-    for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
+  for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
-      if (! iter->key().starts_with(prefix)) {
+    if (! iter->key().starts_with(prefix)) {
-        break;
+      break;
      }
      count++;
    }
-    ASSERT_OK(iter->status());
+    count++;
    delete iter;
    ASSERT_EQ(count, 2);
    ASSERT_EQ(env_->random_read_counter_.Read(), 11);
    Close();
    delete options.filter_policy;
  }
  ASSERT_OK(iter->status());
  delete iter;
  ASSERT_EQ(count, 2);
  ASSERT_EQ(env_->random_read_counter_.Read(), 11);
  Close();
  delete options.filter_policy;
 }
 std::string MakeKey(unsigned int num) {
--- a/db/memtable.cc
+++ b/db/memtable.cc
@ -36,7 +36,7 @@ struct hash<rocksdb::Slice> {
 namespace rocksdb {
 MemTable::MemTable(const InternalKeyComparator& cmp,
-                   std::shared_ptr<MemTableRepFactory> table_factory,
+                   MemTableRepFactory* table_factory,
                   int numlevel,
                   const Options& options)
    : comparator_(cmp),
@ -281,7 +281,7 @@ bool MemTable::Update(SequenceNumber seq, ValueType type,
  Slice memkey = lkey.memtable_key();
  std::shared_ptr<MemTableRep::Iterator> iter(
-    table_.get()->GetIterator(lkey.user_key()));
+    table_->GetIterator(lkey.user_key()));
  iter->Seek(key, memkey.data());
  if (iter->Valid()) {
--- a/db/memtable.h
+++ b/db/memtable.h
@ -36,7 +36,7 @@ class MemTable {
  // is zero and the caller must call Ref() at least once.
  explicit MemTable(
    const InternalKeyComparator& comparator,
-    std::shared_ptr<MemTableRepFactory> table_factory,
+    MemTableRepFactory* table_factory,
    int numlevel = 7,
    const Options& options = Options());
--- a/db/memtablelist.cc
+++ b/db/memtablelist.cc
@ -80,7 +80,8 @@ Status MemTableList::InstallMemtableFlushResults(
                      VersionSet* vset, Status flushStatus,
                      port::Mutex* mu, Logger* info_log,
                      uint64_t file_number,
-                      std::set<uint64_t>& pending_outputs) {
+                      std::set<uint64_t>& pending_outputs,
                      std::vector<MemTable*>* to_delete) {
  mu->AssertHeld();
  // If the flush was not successful, then just reset state.
@ -151,7 +152,9 @@ Status MemTableList::InstallMemtableFlushResults(
        // executing compaction threads do not mistakenly assume that this
        // file is not live.
        pending_outputs.erase(m->file_number_);
-        m->Unref();
+        if (m->Unref() != nullptr) {
          to_delete->push_back(m);
        }
        size_--;
      } else {
        //commit failed. setup state so that we can flush again.
--- a/db/memtablelist.h
+++ b/db/memtablelist.h
@ -65,7 +65,8 @@ class MemTableList {
                      VersionSet* vset, Status flushStatus,
                      port::Mutex* mu, Logger* info_log,
                      uint64_t file_number,
-                      std::set<uint64_t>& pending_outputs);
+                      std::set<uint64_t>& pending_outputs,
                      std::vector<MemTable*>* to_delete);
  // New memtables are inserted at the front of the list.
  // Takes ownership of the referenced held on *m by the caller of Add().
--- a/db/perf_context_test.cc
+++ b/db/perf_context_test.cc
@ -38,8 +38,8 @@ std::shared_ptr<DB> OpenDb() {
    if (FLAGS_use_set_based_memetable) {
      auto prefix_extractor = rocksdb::NewFixedPrefixTransform(0);
-      options.memtable_factory =
+      options.memtable_factory.reset(
-        std::make_shared<rocksdb::PrefixHashRepFactory>(prefix_extractor);
+          NewHashSkipListRepFactory(prefix_extractor));
    }
    Status s = DB::Open(options, kDbName,  &db);
--- a/db/prefix_test.cc
+++ b/db/prefix_test.cc
@ -11,7 +11,6 @@
 #include "util/testharness.h"
 DEFINE_bool(use_prefix_hash_memtable, true, "");
 DEFINE_bool(use_nolock_version, true, "");
 DEFINE_bool(trigger_deadlock, false,
            "issue delete in range scan to trigger PrefixHashMap deadlock");
 DEFINE_uint64(bucket_count, 100000, "number of buckets");
--- a/db/repair.cc
+++ b/db/repair.cc
@ -196,7 +196,7 @@ class Repairer {
    std::string scratch;
    Slice record;
    WriteBatch batch;
-    MemTable* mem = new MemTable(icmp_, options_.memtable_factory,
+    MemTable* mem = new MemTable(icmp_, options_.memtable_factory.get(),
      options_.num_levels);
    mem->Ref();
    int counter = 0;
@ -227,7 +227,7 @@ class Repairer {
                        table_cache_, iter, &meta,
                        icmp_.user_comparator(), 0, 0, true);
    delete iter;
-    mem->Unref();
+    delete mem->Unref();
    mem = nullptr;
    if (status.ok()) {
      if (meta.file_size > 0) {
--- a/db/write_batch_test.cc
+++ b/db/write_batch_test.cc
@ -22,7 +22,7 @@ namespace rocksdb {
 static std::string PrintContents(WriteBatch* b) {
  InternalKeyComparator cmp(BytewiseComparator());
  auto factory = std::make_shared<SkipListFactory>();
-  MemTable* mem = new MemTable(cmp, factory);
+  MemTable* mem = new MemTable(cmp, factory.get());
  mem->Ref();
  std::string state;
  Options options;
@ -69,7 +69,7 @@ static std::string PrintContents(WriteBatch* b) {
  } else if (count != WriteBatchInternal::Count(b)) {
    state.append("CountMismatch()");
  }
-  mem->Unref();
+  delete mem->Unref();
  return state;
 }
--- a/include/rocksdb/compaction_filter.h
+++ b/include/rocksdb/compaction_filter.h
@ -40,6 +40,16 @@ class CompactionFilter {
  // When the value is to be preserved, the application has the option
  // to modify the existing_value and pass it back through new_value.
  // value_changed needs to be set to true in this case.
  //
  // If multithreaded compaction is being used *and* a single CompactionFilter
  // instance was supplied via Options::compaction_filter, this method may be
  // called from different threads concurrently.  The application must ensure
  // that the call is thread-safe.
  //
  // If the CompactionFilter was created by a factory, then it will only ever
  // be used by a single thread that is doing the compaction run, and this
  // call does not need to be thread-safe.  However, multiple filters may be
  // in existence and operating concurrently.
  virtual bool Filter(int level,
                      const Slice& key,
                      const Slice& existing_value,
--- a/include/rocksdb/db.h
+++ b/include/rocksdb/db.h
@ -273,7 +273,7 @@ class DB {
  // Sets iter to an iterator that is positioned at a write-batch containing
  // seq_number. If the sequence number is non existent, it returns an iterator
  // at the first available seq_no after the requested seq_no
-  // Returns Status::Ok if iterator is valid
+  // Returns Status::OK if iterator is valid
  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
  // use this api, else the WAL files will get
  // cleared aggressively and the iterator might keep getting invalid before
@ -292,6 +292,11 @@ class DB {
    std::vector<LiveFileMetaData> *metadata) {
  }
  // Sets the globally unique ID created at database creation time by invoking
  // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
  // be set properly
  virtual Status GetDbIdentity(std::string& identity) = 0;
 private:
  // No copying allowed
  DB(const DB&);
--- a/include/rocksdb/memtablerep.h
+++ b/include/rocksdb/memtablerep.h
@ -17,21 +17,13 @@
 // The factory will be passed an Arena object when a new MemTableRep is
 // requested. The API for this object is in rocksdb/arena.h.
 //
-// Users can implement their own memtable representations. We include four
+// Users can implement their own memtable representations. We include three
 // types built in:
 //  - SkipListRep: This is the default; it is backed by a skip list.
-//  - TransformRep: This is backed by an custom hash map.
+//  - HashSkipListRep: The memtable rep that is best used for keys that are
-// On construction, they are given a SliceTransform object. This
+//  structured like "prefix:suffix" where iteration withing a prefix is
-// object is applied to the user key of stored items which indexes into the
+//  common and iteration across different prefixes is rare. It is backed by
-// hash map to yield a skiplist containing all records that share the same
+//  a hash map where each bucket is a skip list.
 // user key under the transform function.
 //  - UnsortedRep: A subclass of TransformRep where the transform function is
 // the identity function. Optimized for point lookups.
 //  - PrefixHashRep: A subclass of TransformRep where the transform function is
 // a fixed-size prefix extractor. If you use PrefixHashRepFactory, the transform
 // must be identical to options.prefix_extractor, otherwise it will be discarded
 // and the default will be used. It is optimized for ranged scans over a
 // prefix.
 //  - VectorRep: This is backed by an unordered std::vector. On iteration, the
 // vector is sorted. It is intelligent about sorting; once the MarkReadOnly()
 // has been called, the vector will only be sorted once. It is optimized for
@ -186,16 +178,14 @@ public:
  }
 };
-// TransformReps are backed by an unordered map of buffers to buckets. When
+// HashSkipListRep is backed by hash map of buckets. Each bucket is a skip
-// looking up a key, the user key is extracted and a user-supplied transform
+// list. All the keys with the same prefix will be in the same bucket.
-// function (see rocksdb/slice_transform.h) is applied to get the key into the
+// The prefix is determined using user supplied SliceTransform. It has
-// unordered map. This allows the user to bin user keys based on arbitrary
+// to match prefix_extractor in options.prefix_extractor.
 // criteria. Two example implementations are UnsortedRepFactory and
 // PrefixHashRepFactory.
 //
 // Iteration over the entire collection is implemented by dumping all the keys
-// into an std::set. Thus, these data structures are best used when iteration
+// into a separate skip list. Thus, these data structures are best used when
-// over the entire collection is rare.
+// iteration over the entire collection is rare.
 //
 // Parameters:
 //   transform: The SliceTransform to bucket user keys on. TransformRepFactory
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@ -15,11 +15,9 @@
 #include <vector>
 #include <stdint.h>
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/slice_transform.h"
 #include "rocksdb/slice_transform.h"
 #include "rocksdb/statistics.h"
 #include "rocksdb/table_properties.h"
 #include "rocksdb/universal_compaction.h"
@ -95,16 +93,33 @@ struct Options {
  // Default: nullptr
  shared_ptr<MergeOperator> merge_operator;
-  // The client must provide compaction_filter_factory if it requires a new
+  // A single CompactionFilter instance to call into during compaction.
  // compaction filter to be used for different compaction processes
  // Allows an application to modify/delete a key-value during background
  // compaction.
-  // Ideally, client should specify only one of filter or factory.
+  //
  // If the client requires a new compaction filter to be used for different
  // compaction runs, it can specify compaction_filter_factory instead of this
  // option.  The client should specify only one of the two.
  // compaction_filter takes precedence over compaction_filter_factory if
  // client specifies both.
  //
  // If multithreaded compaction is being used, the supplied CompactionFilter
  // instance may be used from different threads concurrently and so should be
  // thread-safe.
  //
  // Default: nullptr
  const CompactionFilter* compaction_filter;
  // This is a factory that provides compaction filter objects which allow
  // an application to modify/delete a key-value during background compaction.
  //
  // A new filter will be created on each compaction run.  If multithreaded
  // compaction is being used, each created CompactionFilter will only be used
  // from a single thread and so does not need to be thread-safe.
  //
  // Default: a factory that doesn't provide any object
  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
  // If true, the database will be created if it is missing.
  // Default: false
  bool create_if_missing;
@ -602,11 +617,6 @@ struct Options {
  // Table and TableBuilder.
  std::shared_ptr<TableFactory> table_factory;
  // This is a factory that provides compaction filter objects which allow
  // an application to modify/delete a key-value during background compaction.
  // Default: a factory that doesn't provide any object
  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
  // This option allows user to to collect their own interested statistics of
  // the tables.
  // Default: emtpy vector -- no user-defined statistics collection will be
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@ -51,6 +51,11 @@ enum Tickers {
  // # of times bloom filter has avoided file reads.
  BLOOM_FILTER_USEFUL,
  // # of memtable hits.
  MEMTABLE_HIT,
  // # of memtable misses.
  MEMTABLE_MISS,
  /**
   * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
   * There are 3 reasons currently.
@ -125,6 +130,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
  { BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss" },
  { BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit" },
  { BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
  { MEMTABLE_HIT, "rocksdb.memtable.hit" },
  { MEMTABLE_MISS, "rocksdb.memtable.miss" },
  { COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
  { COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
  { COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user" },
--- a/include/utilities/stackable_db.h
+++ b/include/utilities/stackable_db.h
@ -140,6 +140,10 @@ class StackableDB : public DB {
    return db_->DeleteFile(name);
  }
  virtual Status GetDbIdentity(std::string& identity) {
    return db_->GetDbIdentity(identity);
  }
  virtual Status GetUpdatesSince(SequenceNumber seq_number,
                                 unique_ptr<TransactionLogIterator>* iter)
    override {
--- a/table/block_based_table_builder.cc
+++ b/table/block_based_table_builder.cc
@ -100,9 +100,10 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
    rep_->filter_block->StartBlock(0);
  }
  if (options.block_cache_compressed.get() != nullptr) {
-    BlockBasedTable::GenerateCachePrefix(options.block_cache_compressed, file,
+    BlockBasedTable::GenerateCachePrefix(
-                               &rep_->compressed_cache_key_prefix[0],
+        options.block_cache_compressed.get(), file,
-                               &rep_->compressed_cache_key_prefix_size);
+        &rep_->compressed_cache_key_prefix[0],
        &rep_->compressed_cache_key_prefix_size);
  }
 }
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@ -97,18 +97,18 @@ void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
  rep->cache_key_prefix_size = 0;
  rep->compressed_cache_key_prefix_size = 0;
  if (rep->options.block_cache != nullptr) {
-    GenerateCachePrefix(rep->options.block_cache, rep->file.get(),
+    GenerateCachePrefix(rep->options.block_cache.get(), rep->file.get(),
                        &rep->cache_key_prefix[0],
                        &rep->cache_key_prefix_size);
  }
  if (rep->options.block_cache_compressed != nullptr) {
-    GenerateCachePrefix(rep->options.block_cache_compressed, rep->file.get(),
+    GenerateCachePrefix(rep->options.block_cache_compressed.get(),
-                        &rep->compressed_cache_key_prefix[0],
+                        rep->file.get(), &rep->compressed_cache_key_prefix[0],
                        &rep->compressed_cache_key_prefix_size);
  }
 }
-void BlockBasedTable::GenerateCachePrefix(shared_ptr<Cache> cc,
+void BlockBasedTable::GenerateCachePrefix(Cache* cc,
    RandomAccessFile* file, char* buffer, size_t* size) {
  // generate an id from the file
@ -122,7 +122,7 @@ void BlockBasedTable::GenerateCachePrefix(shared_ptr<Cache> cc,
  }
 }
-void BlockBasedTable::GenerateCachePrefix(shared_ptr<Cache> cc,
+void BlockBasedTable::GenerateCachePrefix(Cache* cc,
    WritableFile* file, char* buffer, size_t* size) {
  // generate an id from the file
--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@ -167,9 +167,9 @@ class BlockBasedTable : public TableReader {
    rep_ = rep;
  }
  // Generate a cache key prefix from the file
-  static void GenerateCachePrefix(shared_ptr<Cache> cc,
+  static void GenerateCachePrefix(Cache* cc,
    RandomAccessFile* file, char* buffer, size_t* size);
-  static void GenerateCachePrefix(shared_ptr<Cache> cc,
+  static void GenerateCachePrefix(Cache* cc,
    WritableFile* file, char* buffer, size_t* size);
  // The longest prefix of the cache key used to identify blocks.
--- a/table/table_test.cc
+++ b/table/table_test.cc
@ -370,15 +370,15 @@ class MemTableConstructor: public Constructor {
      : Constructor(cmp),
        internal_comparator_(cmp),
        table_factory_(new SkipListFactory) {
-    memtable_ = new MemTable(internal_comparator_, table_factory_);
+    memtable_ = new MemTable(internal_comparator_, table_factory_.get());
    memtable_->Ref();
  }
  ~MemTableConstructor() {
-    memtable_->Unref();
+    delete memtable_->Unref();
  }
  virtual Status FinishImpl(const Options& options, const KVMap& data) {
-    memtable_->Unref();
+    delete memtable_->Unref();
-    memtable_ = new MemTable(internal_comparator_, table_factory_);
+    memtable_ = new MemTable(internal_comparator_, table_factory_.get());
    memtable_->Ref();
    int seq = 1;
    for (KVMap::const_iterator it = data.begin();
@ -930,19 +930,19 @@ TEST(TableTest, NumBlockStat) {
 class BlockCacheProperties {
 public:
-  explicit BlockCacheProperties(std::shared_ptr<Statistics> statistics) {
+  explicit BlockCacheProperties(Statistics* statistics) {
    block_cache_miss =
-      statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
+      statistics->getTickerCount(BLOCK_CACHE_MISS);
    block_cache_hit =
-      statistics.get()->getTickerCount(BLOCK_CACHE_HIT);
+      statistics->getTickerCount(BLOCK_CACHE_HIT);
    index_block_cache_miss =
-      statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_MISS);
+      statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
    index_block_cache_hit =
-      statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_HIT);
+      statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
    data_block_cache_miss =
-      statistics.get()->getTickerCount(BLOCK_CACHE_DATA_MISS);
+      statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
    data_block_cache_hit =
-      statistics.get()->getTickerCount(BLOCK_CACHE_DATA_HIT);
+      statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
  }
  // Check if the fetched props matches the expected ones.
@ -993,7 +993,7 @@ TEST(TableTest, BlockCacheTest) {
  // At first, no block will be accessed.
  {
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    // index will be added to block cache.
    props.AssertEqual(
        1,  // index block miss
@ -1006,7 +1006,7 @@ TEST(TableTest, BlockCacheTest) {
  // Only index block will be accessed
  {
    iter.reset(c.NewIterator());
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    // NOTE: to help better highlight the "detla" of each ticker, I use
    // <last_value> + <added_value> to indicate the increment of changed
    // value; other numbers remain the same.
@ -1021,7 +1021,7 @@ TEST(TableTest, BlockCacheTest) {
  // Only data block will be accessed
  {
    iter->SeekToFirst();
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    props.AssertEqual(
        1,
        1,
@ -1034,7 +1034,7 @@ TEST(TableTest, BlockCacheTest) {
  {
    iter.reset(c.NewIterator());
    iter->SeekToFirst();
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    props.AssertEqual(
        1,
        1 + 1,  // index block hit
@ -1047,14 +1047,14 @@ TEST(TableTest, BlockCacheTest) {
  // -- PART 2: Open without block cache
  options.block_cache.reset();
-  options.statistics = CreateDBStatistics();  // reset the props
+  options.statistics = CreateDBStatistics();  // reset the stats
  c.Reopen(options);
  {
    iter.reset(c.NewIterator());
    iter->SeekToFirst();
    ASSERT_EQ("key", iter->key().ToString());
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    // Nothing is affected at all
    props.AssertEqual(0, 0, 0, 0);
  }
@ -1065,7 +1065,7 @@ TEST(TableTest, BlockCacheTest) {
  options.block_cache = NewLRUCache(1);
  c.Reopen(options);
  {
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    props.AssertEqual(
        1,  // index block miss
        0,
@ -1080,7 +1080,7 @@ TEST(TableTest, BlockCacheTest) {
    // It first cache index block then data block. But since the cache size
    // is only 1, index block will be purged after data block is inserted.
    iter.reset(c.NewIterator());
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    props.AssertEqual(
        1 + 1,  // index block miss
        0,
@ -1093,7 +1093,7 @@ TEST(TableTest, BlockCacheTest) {
    // SeekToFirst() accesses data block. With similar reason, we expect data
    // block's cache miss.
    iter->SeekToFirst();
-    BlockCacheProperties props(options.statistics);
+    BlockCacheProperties props(options.statistics.get());
    props.AssertEqual(
        2,
        0,
@ -1268,7 +1268,7 @@ class MemTableTest { };
 TEST(MemTableTest, Simple) {
  InternalKeyComparator cmp(BytewiseComparator());
  auto table_factory = std::make_shared<SkipListFactory>();
-  MemTable* memtable = new MemTable(cmp, table_factory);
+  MemTable* memtable = new MemTable(cmp, table_factory.get());
  memtable->Ref();
  WriteBatch batch;
  Options options;
@ -1289,7 +1289,7 @@ TEST(MemTableTest, Simple) {
  }
  delete iter;
-  memtable->Unref();
+  delete memtable->Unref();
 }
--- a/tools/db_stress.cc
+++ b/tools/db_stress.cc
@ -305,8 +305,7 @@ DEFINE_bool(filter_deletes, false, "On true, deletes use KeyMayExist to drop"
 enum RepFactory {
  kSkipList,
-  kPrefixHash,
+  kHashSkipList,
  kUnsorted,
  kVectorRep
 };
 enum RepFactory StringToRepFactory(const char* ctype) {
@ -315,9 +314,7 @@ enum RepFactory StringToRepFactory(const char* ctype) {
  if (!strcasecmp(ctype, "skip_list"))
    return kSkipList;
  else if (!strcasecmp(ctype, "prefix_hash"))
-    return kPrefixHash;
+    return kHashSkipList;
  else if (!strcasecmp(ctype, "unsorted"))
    return kUnsorted;
  else if (!strcasecmp(ctype, "vector"))
    return kVectorRep;
@ -335,7 +332,7 @@ static bool ValidatePrefixSize(const char* flagname, int32_t value) {
  }
  return true;
 }
-DEFINE_int32(prefix_size, 0, "Control the prefix size for PrefixHashRep");
+DEFINE_int32(prefix_size, 0, "Control the prefix size for HashSkipListRep");
 static const bool FLAGS_prefix_size_dummy =
  google::RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize);
@ -1338,12 +1335,9 @@ class StressTest {
      case kSkipList:
        memtablerep = "skip_list";
        break;
-      case kPrefixHash:
+      case kHashSkipList:
        memtablerep = "prefix_hash";
        break;
      case kUnsorted:
        memtablerep = "unsorted";
        break;
      case kVectorRep:
        memtablerep = "vector";
        break;
@ -1393,21 +1387,15 @@ class StressTest {
      FLAGS_delete_obsolete_files_period_micros;
    options.max_manifest_file_size = 1024;
    options.filter_deletes = FLAGS_filter_deletes;
-    if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kPrefixHash)) {
+    if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kHashSkipList)) {
      fprintf(stderr,
            "prefix_size should be non-zero iff memtablerep == prefix_hash\n");
      exit(1);
    }
    switch (FLAGS_rep_factory) {
-      case kPrefixHash:
+      case kHashSkipList:
-        options.memtable_factory.reset(
+        options.memtable_factory.reset(NewHashSkipListRepFactory(
-          new PrefixHashRepFactory(NewFixedPrefixTransform(FLAGS_prefix_size))
+            NewFixedPrefixTransform(FLAGS_prefix_size)));
        );
        break;
      case kUnsorted:
        options.memtable_factory.reset(
          new UnsortedRepFactory()
        );
        break;
      case kSkipList:
        // no need to do anything
--- a/util/hash_skiplist_rep.cc
+++ b/util/hash_skiplist_rep.cc
@ -4,6 +4,8 @@
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 #include "util/hash_skiplist_rep.h"
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/arena.h"
 #include "rocksdb/slice.h"
@ -309,39 +311,12 @@ std::shared_ptr<MemTableRep::Iterator>
 } // anon namespace
-class HashSkipListRepFactory : public MemTableRepFactory {
+std::shared_ptr<MemTableRep> HashSkipListRepFactory::CreateMemTableRep(
- public:
+    MemTableRep::KeyComparator& compare, Arena* arena) {
-  explicit HashSkipListRepFactory(
+  return std::make_shared<HashSkipListRep>(compare, arena, transform_,
-    const SliceTransform* transform,
+                                           bucket_count_, skiplist_height_,
-    size_t bucket_count,
+                                           skiplist_branching_factor_);
-    int32_t skiplist_height,
+}
    int32_t skiplist_branching_factor)
      : transform_(transform),
        bucket_count_(bucket_count),
        skiplist_height_(skiplist_height),
        skiplist_branching_factor_(skiplist_branching_factor) { }
  virtual ~HashSkipListRepFactory() { delete transform_; }
  virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
      MemTableRep::KeyComparator& compare, Arena* arena) override {
    return std::make_shared<HashSkipListRep>(compare, arena, transform_,
                                             bucket_count_, skiplist_height_,
                                             skiplist_branching_factor_);
  }
  virtual const char* Name() const override {
    return "HashSkipListRepFactory";
  }
  const SliceTransform* GetTransform() { return transform_; }
 private:
  const SliceTransform* transform_;
  const size_t bucket_count_;
  const int32_t skiplist_height_;
  const int32_t skiplist_branching_factor_;
 };
 MemTableRepFactory* NewHashSkipListRepFactory(
    const SliceTransform* transform, size_t bucket_count,
--- a/util/hash_skiplist_rep.h
+++ b/util/hash_skiplist_rep.h
@ -0,0 +1,45 @@
 // Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree. An additional grant
 // of patent rights can be found in the PATENTS file in the same directory.
 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 #pragma once
 #include "rocksdb/slice_transform.h"
 #include "rocksdb/memtablerep.h"
 namespace rocksdb {
 class HashSkipListRepFactory : public MemTableRepFactory {
 public:
  explicit HashSkipListRepFactory(
    const SliceTransform* transform,
    size_t bucket_count,
    int32_t skiplist_height,
    int32_t skiplist_branching_factor)
      : transform_(transform),
        bucket_count_(bucket_count),
        skiplist_height_(skiplist_height),
        skiplist_branching_factor_(skiplist_branching_factor) { }
  virtual ~HashSkipListRepFactory() { delete transform_; }
  virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
      MemTableRep::KeyComparator& compare, Arena* arena) override;
  virtual const char* Name() const override {
    return "HashSkipListRepFactory";
  }
  const SliceTransform* GetTransform() { return transform_; }
 private:
  const SliceTransform* transform_;
  const size_t bucket_count_;
  const int32_t skiplist_height_;
  const int32_t skiplist_branching_factor_;
 };
 }
--- a/util/ldb_cmd.cc
+++ b/util/ldb_cmd.cc
@ -1226,25 +1226,41 @@ void ChangeCompactionStyleCommand::DoCommand() {
 class InMemoryHandler : public WriteBatch::Handler {
 public:
  InMemoryHandler(stringstream& row, bool print_values) : Handler(),row_(row) {
    print_values_ = print_values;
  }
-  virtual void Put(const Slice& key, const Slice& value) {
+  void commonPutMerge(const Slice& key, const Slice& value) {
-    putMap_[key.ToString()] = value.ToString();
+    string k = LDBCommand::StringToHex(key.ToString());
    if (print_values_) {
      string v = LDBCommand::StringToHex(value.ToString());
      row_ << k << " : ";
      row_ << v << " ";
    } else {
      row_ << k << " ";
    }
  }
-  virtual void Delete(const Slice& key) {
+
-    deleteList_.push_back(key.ToString(true));
+  virtual void Put(const Slice& key, const Slice& value) {
    row_ << "PUT : ";
    commonPutMerge(key, value);
  }
  virtual ~InMemoryHandler() { };
-  map<string, string> PutMap() {
+  virtual void Merge(const Slice& key, const Slice& value) {
-    return putMap_;
+    row_ << "MERGE : ";
    commonPutMerge(key, value);
  }
-  vector<string> DeleteList() {
+
-    return deleteList_;
+  virtual void Delete(const Slice& key) {
    row_ <<",DELETE : ";
    row_ << LDBCommand::StringToHex(key.ToString()) << " ";
  }
  virtual ~InMemoryHandler() { };
 private:
-  map<string, string> putMap_;
+  stringstream & row_;
-  vector<string> deleteList_;
+  bool print_values_;
 };
 const string WALDumperCommand::ARG_WAL_FILE = "walfile";
@ -1322,26 +1338,8 @@ void WALDumperCommand::DoCommand() {
        row<<WriteBatchInternal::Count(&batch)<<",";
        row<<WriteBatchInternal::ByteSize(&batch)<<",";
        row<<reader.LastRecordOffset()<<",";
-        InMemoryHandler handler;
+        InMemoryHandler handler(row, print_values_);
        batch.Iterate(&handler);
        row << "PUT : ";
        if (print_values_) {
          for (auto& kv : handler.PutMap()) {
            string k = StringToHex(kv.first);
            string v = StringToHex(kv.second);
            row << k << " : ";
            row << v << " ";
          }
        }
        else {
          for(auto& kv : handler.PutMap()) {
            row << StringToHex(kv.first) << " ";
          }
        }
        row<<",DELETE : ";
        for(string& s : handler.DeleteList()) {
          row << StringToHex(s) << " ";
        }
        row<<"\n";
      }
      cout<<row.str();
--- a/util/options.cc
+++ b/util/options.cc
@ -25,6 +25,9 @@ Options::Options()
    : comparator(BytewiseComparator()),
      merge_operator(nullptr),
      compaction_filter(nullptr),
      compaction_filter_factory(
          std::shared_ptr<CompactionFilterFactory>(
            new DefaultCompactionFilterFactory())),
      create_if_missing(false),
      error_if_exists(false),
      paranoid_checks(false),
@ -97,9 +100,6 @@ Options::Options()
      memtable_factory(std::shared_ptr<SkipListFactory>(new SkipListFactory)),
      table_factory(
        std::shared_ptr<TableFactory>(new BlockBasedTableFactory())),
      compaction_filter_factory(
          std::shared_ptr<CompactionFilterFactory>(
            new DefaultCompactionFilterFactory())),
      inplace_update_support(false),
      inplace_update_num_locks(10000) {
  assert(memtable_factory.get() != nullptr);
@ -278,6 +278,9 @@ Options::Dump(Logger* log) const
    Log(log,"Options.compaction_options_universal."
            "max_size_amplification_percent: %u",
        compaction_options_universal.max_size_amplification_percent);
    Log(log,
        "Options.compaction_options_universal.compression_size_percent: %u",
        compaction_options_universal.compression_size_percent);
    std::string collector_names;
    for (auto collector : table_properties_collectors) {
      collector_names.append(collector->Name());
--- a/util/stl_wrappers.h
+++ b/util/stl_wrappers.h
@ -28,24 +28,5 @@ namespace stl_wrappers {
    }
  };
  struct Hash {
    inline size_t operator()(const char* buf) const {
      Slice internal_key = GetLengthPrefixedSlice(buf);
      Slice value =
        GetLengthPrefixedSlice(internal_key.data() + internal_key.size());
      unsigned int hval = MurmurHash(internal_key.data(), internal_key.size(),
        0);
      hval = MurmurHash(value.data(), value.size(), hval);
      return hval;
    }
  };
  struct KeyEqual : private Base {
    explicit KeyEqual(const MemTableRep::KeyComparator& compare)
      : Base(compare) { }
    inline bool operator()(const char* a, const char* b) const {
      return this->compare_(a, b) == 0;
    }
  };
 }
 }
--- a/util/transformrep.cc
+++ b/util/transformrep.cc
@ -1,426 +0,0 @@
 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under the BSD-style license found in the
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 #include <unordered_map>
 #include <set>
 #include <vector>
 #include <algorithm>
 #include <iostream>
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/arena.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/slice_transform.h"
 #include "db/memtable.h"
 #include "port/port.h"
 #include "util/mutexlock.h"
 #include "util/murmurhash.h"
 #include "util/stl_wrappers.h"
 namespace std {
 template <>
 struct hash<rocksdb::Slice> {
  size_t operator()(const rocksdb::Slice& slice) const {
    return MurmurHash(slice.data(), slice.size(), 0);
  }
 };
 }
 namespace rocksdb {
 namespace {
 using namespace stl_wrappers;
 class TransformRep : public MemTableRep {
 public:
  TransformRep(const KeyComparator& compare, Arena* arena,
    const SliceTransform* transform, size_t bucket_size,
    size_t num_locks);
  virtual void Insert(const char* key) override;
  virtual bool Contains(const char* key) const override;
  virtual size_t ApproximateMemoryUsage() override;
  virtual ~TransformRep() { }
  virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override;
  virtual std::shared_ptr<MemTableRep::Iterator> GetIterator(
    const Slice& slice) override;
  virtual std::shared_ptr<MemTableRep::Iterator> GetDynamicPrefixIterator()
      override {
    return std::make_shared<DynamicPrefixIterator>(*this);
  }
  std::shared_ptr<MemTableRep::Iterator> GetTransformIterator(
    const Slice& transformed);
 private:
  friend class DynamicPrefixIterator;
  typedef std::set<const char*, Compare> Bucket;
  typedef std::unordered_map<Slice, std::shared_ptr<Bucket>> BucketMap;
  // Maps slices (which are transformed user keys) to buckets of keys sharing
  // the same transform.
  BucketMap buckets_;
  // rwlock_ protects access to the buckets_ data structure itself. Each bucket
  // has its own read-write lock as well.
  mutable port::RWMutex rwlock_;
  // Keep track of approximately how much memory is being used.
  size_t memory_usage_ = 0;
  // The user-supplied transform whose domain is the user keys.
  const SliceTransform* transform_;
  // Get a bucket from buckets_. If the bucket hasn't been initialized yet,
  // initialize it before returning. Must be externally synchronized.
  std::shared_ptr<Bucket>& GetBucket(const Slice& transformed);
  port::RWMutex* GetLock(const Slice& transformed) const;
  mutable std::vector<port::RWMutex> locks_;
  const KeyComparator& compare_;
  class Iterator : public MemTableRep::Iterator {
   public:
    explicit Iterator(std::shared_ptr<Bucket> items);
    virtual ~Iterator() { };
    // Returns true iff the iterator is positioned at a valid node.
    virtual bool Valid() const;
    // Returns the key at the current position.
    // REQUIRES: Valid()
    virtual const char* key() const;
    // Advances to the next position.
    // REQUIRES: Valid()
    virtual void Next();
    // Advances to the previous position.
    // REQUIRES: Valid()
    virtual void Prev();
    // Advance to the first entry with a key >= target
    virtual void Seek(const Slice& user_key, const char* memtable_key);
    // Position at the first entry in collection.
    // Final state of iterator is Valid() iff collection is not empty.
    virtual void SeekToFirst();
    // Position at the last entry in collection.
    // Final state of iterator is Valid() iff collection is not empty.
    virtual void SeekToLast();
   private:
    std::shared_ptr<Bucket> items_;
    Bucket::const_iterator cit_;
    std::string tmp_;       // For passing to EncodeKey
  };
  class EmptyIterator : public MemTableRep::Iterator {
    // This is used when there wasn't a bucket. It is cheaper than
    // instantiating an empty bucket over which to iterate.
   public:
    virtual bool Valid() const {
      return false;
    }
    virtual const char* key() const {
      assert(false);
      return nullptr;
    }
    virtual void Next() { }
    virtual void Prev() { }
    virtual void Seek(const Slice& user_key, const char* memtable_key) { }
    virtual void SeekToFirst() { }
    virtual void SeekToLast() { }
    static std::shared_ptr<EmptyIterator> GetInstance();
   private:
    static std::shared_ptr<EmptyIterator> instance;
    EmptyIterator() { }
  };
  class TransformIterator : public Iterator {
   public:
    explicit TransformIterator(std::shared_ptr<Bucket> items,
      port::RWMutex* rwlock);
    virtual ~TransformIterator() { }
   private:
    const ReadLock l_;
  };
  class DynamicPrefixIterator : public MemTableRep::Iterator {
   private:
    // the underlying memtable rep
    const TransformRep& memtable_rep_;
    // the result of a prefix seek
    std::unique_ptr<MemTableRep::Iterator> bucket_iterator_;
   public:
    explicit DynamicPrefixIterator(const TransformRep& memtable_rep)
    : memtable_rep_(memtable_rep) {}
    virtual ~DynamicPrefixIterator() { };
    // Returns true iff the iterator is positioned at a valid node.
    virtual bool Valid() const {
      return bucket_iterator_ && bucket_iterator_->Valid();
    }
    // Returns the key at the current position.
    // REQUIRES: Valid()
    virtual const char* key() const {
      assert(Valid());
      return bucket_iterator_->key();
    }
    // Advances to the next position.
    // REQUIRES: Valid()
    virtual void Next() {
      assert(Valid());
      bucket_iterator_->Next();
    }
    // Advances to the previous position.
    // REQUIRES: Valid()
    virtual void Prev() {
      assert(Valid());
      bucket_iterator_->Prev();
    }
    // Advance to the first entry with a key >= target within the
    // same bucket as target
    virtual void Seek(const Slice& user_key, const char* memtable_key) {
      Slice prefix = memtable_rep_.transform_->Transform(user_key);
      ReadLock l(&memtable_rep_.rwlock_);
      auto bucket = memtable_rep_.buckets_.find(prefix);
      if (bucket == memtable_rep_.buckets_.end()) {
        bucket_iterator_.reset(nullptr);
      } else {
        bucket_iterator_.reset(
          new TransformIterator(bucket->second, memtable_rep_.GetLock(prefix)));
        bucket_iterator_->Seek(user_key, memtable_key);
      }
    }
    // Position at the first entry in collection.
    // Final state of iterator is Valid() iff collection is not empty.
    virtual void SeekToFirst() {
      // Prefix iterator does not support total order.
      // We simply set the iterator to invalid state
      bucket_iterator_.reset(nullptr);
    }
    // Position at the last entry in collection.
    // Final state of iterator is Valid() iff collection is not empty.
    virtual void SeekToLast() {
      // Prefix iterator does not support total order.
      // We simply set the iterator to invalid state
      bucket_iterator_.reset(nullptr);
    }
  };
 };
 class PrefixHashRep : public TransformRep {
 public:
  PrefixHashRep(const KeyComparator& compare, Arena* arena,
    const SliceTransform* transform, size_t bucket_size,
    size_t num_locks)
  : TransformRep(compare, arena, transform,
    bucket_size, num_locks) { }
  virtual std::shared_ptr<MemTableRep::Iterator> GetPrefixIterator(
    const Slice& prefix) override;
 };
 std::shared_ptr<TransformRep::Bucket>& TransformRep::GetBucket(
  const Slice& transformed) {
  WriteLock l(&rwlock_);
  auto& bucket = buckets_[transformed];
  if (!bucket) {
    bucket.reset(
      new decltype(buckets_)::mapped_type::element_type(Compare(compare_)));
    // To memory_usage_ we add the size of the std::set and the size of the
    // std::pair (decltype(buckets_)::value_type) which includes the
    // Slice and the std::shared_ptr
    memory_usage_ += sizeof(*bucket) +
                     sizeof(decltype(buckets_)::value_type);
  }
  return bucket;
 }
 port::RWMutex* TransformRep::GetLock(const Slice& transformed) const {
  return &locks_[std::hash<Slice>()(transformed) % locks_.size()];
 }
 TransformRep::TransformRep(const KeyComparator& compare, Arena* arena,
  const SliceTransform* transform, size_t bucket_size,
  size_t num_locks)
  : buckets_(bucket_size),
    transform_(transform),
    locks_(num_locks),
    compare_(compare) { }
 void TransformRep::Insert(const char* key) {
  assert(!Contains(key));
  auto transformed = transform_->Transform(UserKey(key));
  auto& bucket = GetBucket(transformed);
  WriteLock bl(GetLock(transformed));
  bucket->insert(key);
  memory_usage_ += sizeof(key);
 }
 bool TransformRep::Contains(const char* key) const {
  ReadLock l(&rwlock_);
  auto transformed = transform_->Transform(UserKey(key));
  auto bucket = buckets_.find(transformed);
  if (bucket == buckets_.end()) {
    return false;
  }
  ReadLock bl(GetLock(transformed));
  return bucket->second->count(key) != 0;
 }
 size_t TransformRep::ApproximateMemoryUsage() {
  return memory_usage_;
 }
 std::shared_ptr<TransformRep::EmptyIterator>
  TransformRep::EmptyIterator::GetInstance() {
  if (!instance) {
    instance.reset(new TransformRep::EmptyIterator);
  }
  return instance;
 }
 TransformRep::Iterator::Iterator(std::shared_ptr<Bucket> items)
  : items_(items),
    cit_(items_->begin()) { }
 // Returns true iff the iterator is positioned at a valid node.
 bool TransformRep::Iterator::Valid() const {
  return cit_ != items_->end();
 }
 // Returns the key at the current position.
 // REQUIRES: Valid()
 const char* TransformRep::Iterator::key() const {
  assert(Valid());
  return *cit_;
 }
 // Advances to the next position.
 // REQUIRES: Valid()
 void TransformRep::Iterator::Next() {
  assert(Valid());
  if (cit_ == items_->end()) {
    return;
  }
  ++cit_;
 }
 // Advances to the previous position.
 // REQUIRES: Valid()
 void TransformRep::Iterator::Prev() {
  assert(Valid());
  if (cit_ == items_->begin()) {
    // If you try to go back from the first element, the iterator should be
    // invalidated. So we set it to past-the-end. This means that you can
    // treat the container circularly.
    cit_ = items_->end();
  } else {
    --cit_;
  }
 }
 // Advance to the first entry with a key >= target
 void TransformRep::Iterator::Seek(const Slice& user_key,
                                  const char* memtable_key) {
  const char* encoded_key =
    (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key);
  cit_ = items_->lower_bound(encoded_key);
 }
 // Position at the first entry in collection.
 // Final state of iterator is Valid() iff collection is not empty.
 void TransformRep::Iterator::SeekToFirst() {
  cit_ = items_->begin();
 }
 void TransformRep::Iterator::SeekToLast() {
  cit_ = items_->end();
  if (items_->size() != 0) {
    --cit_;
  }
 }
 TransformRep::TransformIterator::TransformIterator(
  std::shared_ptr<Bucket> items, port::RWMutex* rwlock)
  : Iterator(items), l_(rwlock) { }
 std::shared_ptr<MemTableRep::Iterator> TransformRep::GetIterator() {
  auto items = std::make_shared<Bucket>(Compare(compare_));
  // Hold read locks on all locks
  ReadLock l(&rwlock_);
  std::for_each(locks_.begin(), locks_.end(), [] (port::RWMutex& lock) {
    lock.ReadLock();
  });
  for (auto& bucket : buckets_) {
    items->insert(bucket.second->begin(), bucket.second->end());
  }
  std::for_each(locks_.begin(), locks_.end(), [] (port::RWMutex& lock) {
    lock.Unlock();
  });
  return std::make_shared<Iterator>(std::move(items));
 }
 std::shared_ptr<MemTableRep::Iterator> TransformRep::GetTransformIterator(
  const Slice& transformed) {
  ReadLock l(&rwlock_);
  auto bucket = buckets_.find(transformed);
  if (bucket == buckets_.end()) {
    return EmptyIterator::GetInstance();
  }
  return std::make_shared<TransformIterator>(bucket->second,
    GetLock(transformed));
 }
 std::shared_ptr<MemTableRep::Iterator> TransformRep::GetIterator(
  const Slice& slice) {
  auto transformed = transform_->Transform(slice);
  return GetTransformIterator(transformed);
 }
 std::shared_ptr<TransformRep::EmptyIterator>
  TransformRep::EmptyIterator::instance;
 } // anon namespace
 std::shared_ptr<MemTableRep> TransformRepFactory::CreateMemTableRep(
  MemTableRep::KeyComparator& compare, Arena* arena) {
  return std::make_shared<TransformRep>(compare, arena, transform_,
    bucket_count_, num_locks_);
 }
 std::shared_ptr<MemTableRep> PrefixHashRepFactory::CreateMemTableRep(
  MemTableRep::KeyComparator& compare, Arena* arena) {
  return std::make_shared<PrefixHashRep>(compare, arena, transform_,
    bucket_count_, num_locks_);
 }
 std::shared_ptr<MemTableRep::Iterator> PrefixHashRep::GetPrefixIterator(
  const Slice& prefix) {
  return TransformRep::GetTransformIterator(prefix);
 }
 } // namespace rocksdb
--- a/utilities/ttl/db_ttl.cc
+++ b/utilities/ttl/db_ttl.cc
@ -291,6 +291,10 @@ Status DBWithTTL::DeleteFile(std::string name) {
  return db_->DeleteFile(name);
 }
 Status DBWithTTL::GetDbIdentity(std::string& identity) {
  return db_->GetDbIdentity(identity);
 }
 Status DBWithTTL::GetUpdatesSince(
    SequenceNumber seq_number,
    unique_ptr<TransactionLogIterator>* iter) {
--- a/utilities/ttl/db_ttl.h
+++ b/utilities/ttl/db_ttl.h
@ -84,6 +84,8 @@ class DBWithTTL : public StackableDB {
  virtual Status DeleteFile(std::string name);
  virtual Status GetDbIdentity(std::string& identity);
  virtual SequenceNumber GetLatestSequenceNumber() const;
  virtual Status GetUpdatesSince(SequenceNumber seq_number,