diff --git a/INSTALL.md b/INSTALL.md index 7db22da57..ed1cbfba1 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -25,9 +25,13 @@ libraries. You are on your own. `sudo apt-get install libsnappy-dev`. * Install zlib. Try: `sudo apt-get install zlib1g-dev`. * Install bzip2: `sudo apt-get install libbz2-dev`. + * Install gflags: `sudo apt-get install libgflags-dev`. * **OS X**: - * Update your xcode to the latest version to get the compiler with - C++ 11 support. + * Install latest C++ compiler that supports C++ 11: + * Update XCode: run `xcode-select --install` (or install it from XCode App's settting). + * Install via [homebrew](http://brew.sh/). + * If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line. + * run `brew tap homebrew/dupes; brew install gcc47 --use-llvm` to install gcc 4.7 (or higher). * Install zlib, bzip2 and snappy libraries for compression. * Install gflags. We have included a script `build_tools/mac-install-gflags.sh`, which should automatically install it. diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 5b3d58c8c..59e2e4619 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -174,6 +174,12 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then # Also don't need any compilation tests if compiling on fbcode true else + # do fPIC on 64 bit in non-fbcode environment + case "$TARGET_OS" in + x86_64) + PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -fPIC" + esac + # If -std=c++0x works, use . Otherwise use port_posix.h. $CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null < diff --git a/build_tools/fbcode.gcc471.sh b/build_tools/fbcode.gcc471.sh index 015c512ab..e8a0cdeaa 100644 --- a/build_tools/fbcode.gcc471.sh +++ b/build_tools/fbcode.gcc471.sh @@ -51,7 +51,7 @@ CXX="$TOOLCHAIN_EXECUTABLES/gcc/gcc-4.7.1-glibc-2.14.1/bin/g++ $JINCLUDE $SNAPPY AR=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ar RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib -CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic -fPIC" +CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic" CFLAGS+=" -I $TOOLCHAIN_LIB_BASE/jemalloc/$TOOL_JEMALLOC/include -DHAVE_JEMALLOC" CFLAGS+=" $LIBGCC_INCLUDE $GLIBC_INCLUDE" CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT" diff --git a/build_tools/fbcode.gcc481.sh b/build_tools/fbcode.gcc481.sh index 6d8b9c766..7ca337cf2 100644 --- a/build_tools/fbcode.gcc481.sh +++ b/build_tools/fbcode.gcc481.sh @@ -59,7 +59,7 @@ CXX="$TOOLCHAIN_EXECUTABLES/gcc/gcc-4.8.1/cc6c9dc/bin/g++ $JINCLUDE $SNAPPY_INCL AR=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ar RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib -CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic -fPIC" +CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic" CFLAGS+=" -nostdlib $LIBGCC_INCLUDE $GLIBC_INCLUDE" CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT" CFLAGS+=" -DSNAPPY -DGFLAGS -DZLIB -DBZIP2" diff --git a/build_tools/regression_build_test.sh b/build_tools/regression_build_test.sh index 25a96d655..6ede47466 100755 --- a/build_tools/regression_build_test.sh +++ b/build_tools/regression_build_test.sh @@ -22,25 +22,15 @@ function cleanup { rm -f $STAT_FILE.fillseq rm -f $STAT_FILE.readrandom rm -f $STAT_FILE.overwrite + rm -f $STAT_FILE.memtablefillreadrandom } trap cleanup EXIT -function send_to_ods { - key="$1" - value="$2" - - if [ -z "$value" ];then - echo >&2 "ERROR: Key $key doesn't have a value." - return - fi - curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \ - --connect-timeout 60 -} - make clean -make db_bench -j$(nproc) +OPT=-DNDEBUG make db_bench -j$(nproc) +# measure fillseq + fill up the DB for overwrite benchmark ./db_bench \ --benchmarks=fillseq \ --db=$DATA_DIR \ @@ -57,6 +47,7 @@ make db_bench -j$(nproc) --disable_wal=1 \ --sync=0 > ${STAT_FILE}.fillseq +# measure overwrite performance ./db_bench \ --benchmarks=overwrite \ --db=$DATA_DIR \ @@ -74,27 +65,94 @@ make db_bench -j$(nproc) --sync=0 \ --threads=8 > ${STAT_FILE}.overwrite +# fill up the db for readrandom benchmark +./db_bench \ + --benchmarks=fillseq \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --bloom_bits=10 \ + --num=$NUM \ + --writes=$NUM \ + --cache_size=6442450944 \ + --cache_numshardbits=6 \ + --open_files=55000 \ + --statistics=1 \ + --histogram=1 \ + --disable_data_sync=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=1 > /dev/null + +# measure readrandom ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ - --reads=$((NUM / 100)) \ + --reads=$NUM \ --cache_size=6442450944 \ - --cache_numshardbits=6 \ + --cache_numshardbits=8 \ + --open_files=55000 \ + --disable_seek_compaction=1 \ + --statistics=1 \ + --histogram=1 \ + --disable_data_sync=1 \ + --disable_wal=1 \ + --sync=0 \ + --threads=32 > ${STAT_FILE}.readrandom + +# measure memtable performance -- none of the data gets flushed to disk +./db_bench \ + --benchmarks=fillrandom,readrandom, \ + --db=$DATA_DIR \ + --use_existing_db=0 \ + --num=$((NUM / 10)) \ + --reads=$NUM \ + --cache_size=6442450944 \ + --cache_numshardbits=8 \ + --write_buffer_size=1000000000 \ --open_files=55000 \ + --disable_seek_compaction=1 \ --statistics=1 \ --histogram=1 \ --disable_data_sync=1 \ --disable_wal=1 \ --sync=0 \ - --threads=128 > ${STAT_FILE}.readrandom + --value_size=10 \ + --threads=32 > ${STAT_FILE}.memtablefillreadrandom -OVERWRITE_OPS=$(awk '/overwrite/ {print $5}' $STAT_FILE.overwrite) -FILLSEQ_OPS=$(awk '/fillseq/ {print $5}' $STAT_FILE.fillseq) -READRANDOM_OPS=$(awk '/readrandom/ {print $5}' $STAT_FILE.readrandom) +# send data to ods +function send_to_ods { + key="$1" + value="$2" + + if [ -z "$value" ];then + echo >&2 "ERROR: Key $key doesn't have a value." + return + fi + curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \ + --connect-timeout 60 +} + +function send_benchmark_to_ods { + bench="$1" + bench_key="$2" + file="$3" + + QPS=$(grep $bench $file | awk '{print $5}') + P50_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $3}' ) + P75_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $5}' ) + P99_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $7}' ) + + send_to_ods rocksdb.build.$bench_key.qps $QPS + send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS + send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS + send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS +} -send_to_ods rocksdb.build.overwrite.qps $OVERWRITE_OPS -send_to_ods rocksdb.build.fillseq.qps $FILLSEQ_OPS -send_to_ods rocksdb.build.readrandom.qps $READRANDOM_OPS +send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite +send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq +send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom +send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom +send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom diff --git a/db/builder.cc b/db/builder.cc index b3bf894ef..ad1334a15 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -112,6 +112,7 @@ Status BuildTable(const std::string& dbname, if (this_ikey.type == kTypeMerge) { // Handle merge-type keys using the MergeHelper + // TODO: pass statistics to MergeUntil merge.MergeUntil(iter, 0 /* don't worry about snapshot */); iterator_at_next = true; if (merge.IsSuccess()) { @@ -188,10 +189,10 @@ Status BuildTable(const std::string& dbname, // Finish and check for file errors if (s.ok() && !options.disableDataSync) { if (options.use_fsync) { - StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS); + StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS); s = file->Fsync(); } else { - StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS); + StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS); s = file->Sync(); } } diff --git a/db/db_impl.cc b/db/db_impl.cc index a4e28b032..436b0c9d7 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -300,6 +300,9 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) } DBImpl::~DBImpl() { + std::vector to_delete; + to_delete.reserve(options_.max_write_buffer_number); + // Wait for background work to finish if (flush_on_destroy_ && mem_->GetFirstSequenceNumber() != 0) { FlushMemTable(FlushOptions()); @@ -317,8 +320,14 @@ DBImpl::~DBImpl() { env_->UnlockFile(db_lock_); } - if (mem_ != nullptr) mem_->Unref(); - imm_.UnrefAll(); + if (mem_ != nullptr) { + delete mem_->Unref(); + } + + imm_.UnrefAll(&to_delete); + for (MemTable* m: to_delete) { + delete m; + } LogFlush(options_.info_log); } @@ -404,7 +413,7 @@ const Status DBImpl::CreateArchivalDirectory() { } void DBImpl::PrintStatistics() { - auto dbstats = options_.statistics; + auto dbstats = options_.statistics.get(); if (dbstats) { Log(options_.info_log, "STATISTCS:\n %s", @@ -860,7 +869,7 @@ Status DBImpl::Recover(VersionEdit* edit, MemTable* external_table, if (versions_->LastSequence() < max_sequence) { versions_->SetLastSequence(max_sequence); } - SetTickerCount(options_.statistics, SEQUENCE_NUMBER, + SetTickerCount(options_.statistics.get(), SEQUENCE_NUMBER, versions_->LastSequence()); } } @@ -954,7 +963,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, // file-systems cause the DB::Open() to fail. break; } - mem->Unref(); + delete mem->Unref(); mem = nullptr; } } @@ -965,7 +974,9 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, // file-systems cause the DB::Open() to fail. } - if (mem != nullptr && !external_table) mem->Unref(); + if (mem != nullptr && !external_table) { + delete mem->Unref(); + } return status; } @@ -1297,7 +1308,7 @@ SequenceNumber DBImpl::GetLatestSequenceNumber() const { Status DBImpl::GetUpdatesSince(SequenceNumber seq, unique_ptr* iter) { - RecordTick(options_.statistics, GET_UPDATES_SINCE_CALLS); + RecordTick(options_.statistics.get(), GET_UPDATES_SINCE_CALLS); if (seq > versions_->LastSequence()) { return Status::IOError("Requested sequence not yet written in the db"); } @@ -1971,10 +1982,12 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, // Finish and check for file errors if (s.ok() && !options_.disableDataSync) { if (options_.use_fsync) { - StopWatch sw(env_, options_.statistics, COMPACTION_OUTFILE_SYNC_MICROS); + StopWatch sw(env_, options_.statistics.get(), + COMPACTION_OUTFILE_SYNC_MICROS); s = compact->outfile->Fsync(); } else { - StopWatch sw(env_, options_.statistics, COMPACTION_OUTFILE_SYNC_MICROS); + StopWatch sw(env_, options_.statistics.get(), + COMPACTION_OUTFILE_SYNC_MICROS); s = compact->outfile->Sync(); } } @@ -2212,7 +2225,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, ParseInternalKey(key, &ikey); // no value associated with delete value.clear(); - RecordTick(options_.statistics, COMPACTION_KEY_DROP_USER); + RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_USER); } else if (value_changed) { value = compaction_filter_value; } @@ -2238,7 +2251,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, // TODO: why not > ? assert(last_sequence_for_key >= ikey.sequence); drop = true; // (A) - RecordTick(options_.statistics, COMPACTION_KEY_DROP_NEWER_ENTRY); + RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_NEWER_ENTRY); } else if (ikey.type == kTypeDeletion && ikey.sequence <= earliest_snapshot && compact->compaction->IsBaseLevelForKey(ikey.user_key)) { @@ -2250,7 +2263,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, // few iterations of this loop (by rule (A) above). // Therefore this deletion marker is obsolete and can be dropped. drop = true; - RecordTick(options_.statistics, COMPACTION_KEY_DROP_OBSOLETE); + RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_OBSOLETE); } else if (ikey.type == kTypeMerge) { // We know the merge type entry is not hidden, otherwise we would // have hit (A) @@ -2259,7 +2272,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, // logic could also be nicely re-used for memtable flush purge // optimization in BuildTable. merge.MergeUntil(input.get(), prev_snapshot, bottommost_level, - options_.statistics); + options_.statistics.get()); current_entry_is_merging = true; if (merge.IsSuccess()) { // Successfully found Put/Delete/(end-of-key-range) while merging @@ -2412,8 +2425,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, CompactionStats stats; stats.micros = env_->NowMicros() - start_micros - imm_micros; - if (options_.statistics) { - options_.statistics->measureTime(COMPACTION_TIME, stats.micros); + if (options_.statistics.get()) { + options_.statistics.get()->measureTime(COMPACTION_TIME, stats.micros); } stats.files_in_leveln = compact->compaction->num_input_files(0); stats.files_in_levelnp1 = compact->compaction->num_input_files(1); @@ -2478,9 +2491,14 @@ struct IterState { static void CleanupIteratorState(void* arg1, void* arg2) { IterState* state = reinterpret_cast(arg1); + std::vector to_delete; + to_delete.reserve(state->mem.size()); state->mu->Lock(); for (unsigned int i = 0; i < state->mem.size(); i++) { - state->mem[i]->Unref(); + MemTable* m = state->mem[i]->Unref(); + if (m != nullptr) { + to_delete.push_back(m); + } } state->version->Unref(); // delete only the sst obsolete files @@ -2489,6 +2507,9 @@ static void CleanupIteratorState(void* arg1, void* arg2) { state->db->FindObsoleteFiles(deletion_state, false, true); state->mu->Unlock(); state->db->PurgeObsoleteFiles(deletion_state); + + // delete obsolete memtables outside the db-mutex + for (MemTable* m : to_delete) delete m; delete state; } } // namespace @@ -2554,10 +2575,12 @@ Status DBImpl::GetImpl(const ReadOptions& options, bool* value_found) { Status s; - StopWatch sw(env_, options_.statistics, DB_GET); + StopWatch sw(env_, options_.statistics.get(), DB_GET); StopWatchNano snapshot_timer(env_, false); StartPerfTimer(&snapshot_timer); SequenceNumber snapshot; + std::vector to_delete; + to_delete.reserve(options_.max_write_buffer_number); mutex_.Lock(); if (options.snapshot != nullptr) { snapshot = reinterpret_cast(options.snapshot)->number_; @@ -2608,28 +2631,35 @@ Status DBImpl::GetImpl(const ReadOptions& options, have_stat_update && current->UpdateStats(stats)) { MaybeScheduleFlushOrCompaction(); } - mem->Unref(); - imm.UnrefAll(); + MemTable* m = mem->Unref(); + imm.UnrefAll(&to_delete); current->Unref(); mutex_.Unlock(); + // free up all obsolete memtables outside the mutex + delete m; + for (MemTable* v: to_delete) delete v; + LogFlush(options_.info_log); // Note, tickers are atomic now - no lock protection needed any more. - RecordTick(options_.statistics, NUMBER_KEYS_READ); - RecordTick(options_.statistics, BYTES_READ, value->size()); - BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer); + RecordTick(options_.statistics.get(), NUMBER_KEYS_READ); + RecordTick(options_.statistics.get(), BYTES_READ, value->size()); + BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer); return s; } std::vector DBImpl::MultiGet(const ReadOptions& options, const std::vector& keys, std::vector* values) { - - StopWatch sw(env_, options_.statistics, DB_MULTIGET); + StopWatch sw(env_, options_.statistics.get(), DB_MULTIGET); StopWatchNano snapshot_timer(env_, false); StartPerfTimer(&snapshot_timer); + SequenceNumber snapshot; + std::vector to_delete; + to_delete.reserve(options_.max_write_buffer_number); + mutex_.Lock(); if (options.snapshot != nullptr) { snapshot = reinterpret_cast(options.snapshot)->number_; @@ -2694,15 +2724,20 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, have_stat_update && current->UpdateStats(stats)) { MaybeScheduleFlushOrCompaction(); } - mem->Unref(); - imm.UnrefAll(); + MemTable* m = mem->Unref(); + imm.UnrefAll(&to_delete); current->Unref(); mutex_.Unlock(); + // free up all obsolete memtables outside the mutex + delete m; + for (MemTable* v: to_delete) delete v; + LogFlush(options_.info_log); - RecordTick(options_.statistics, NUMBER_MULTIGET_CALLS); - RecordTick(options_.statistics, NUMBER_MULTIGET_KEYS_READ, numKeys); - RecordTick(options_.statistics, NUMBER_MULTIGET_BYTES_READ, bytesRead); + + RecordTick(options_.statistics.get(), NUMBER_MULTIGET_CALLS); + RecordTick(options_.statistics.get(), NUMBER_MULTIGET_KEYS_READ, numKeys); + RecordTick(options_.statistics.get(), NUMBER_MULTIGET_BYTES_READ, bytesRead); BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer); return statList; @@ -2780,7 +2815,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { w.disableWAL = options.disableWAL; w.done = false; - StopWatch sw(env_, options_.statistics, DB_WRITE); + StopWatch sw(env_, options_.statistics.get(), DB_WRITE); MutexLock l(&mutex_); writers_.push_back(&w); while (!w.done && &w != writers_.front()) { @@ -2813,8 +2848,9 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { int my_batch_count = WriteBatchInternal::Count(updates); last_sequence += my_batch_count; // Record statistics - RecordTick(options_.statistics, NUMBER_KEYS_WRITTEN, my_batch_count); - RecordTick(options_.statistics, + RecordTick(options_.statistics.get(), + NUMBER_KEYS_WRITTEN, my_batch_count); + RecordTick(options_.statistics.get(), BYTES_WRITTEN, WriteBatchInternal::ByteSize(updates)); if (options.disableWAL) { @@ -2829,10 +2865,10 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { status = log_->AddRecord(WriteBatchInternal::Contents(updates)); if (status.ok() && options.sync) { if (options_.use_fsync) { - StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS); + StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS); status = log_->file()->Fsync(); } else { - StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS); + StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS); status = log_->file()->Sync(); } } @@ -2851,7 +2887,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { // have succeeded in memtable but Status reports error for all writes. throw std::runtime_error("In memory WriteBatch corruption!"); } - SetTickerCount(options_.statistics, SEQUENCE_NUMBER, last_sequence); + SetTickerCount(options_.statistics.get(), + SEQUENCE_NUMBER, last_sequence); } StartPerfTimer(&pre_post_process_timer); LogFlush(options_.info_log); @@ -3003,7 +3040,7 @@ Status DBImpl::MakeRoomForWrite(bool force) { mutex_.Unlock(); uint64_t delayed; { - StopWatch sw(env_, options_.statistics, STALL_L0_SLOWDOWN_COUNT); + StopWatch sw(env_, options_.statistics.get(), STALL_L0_SLOWDOWN_COUNT); env_->SleepForMicroseconds( SlowdownAmount(versions_->NumLevelFiles(0), options_.level0_slowdown_writes_trigger, @@ -3011,7 +3048,7 @@ Status DBImpl::MakeRoomForWrite(bool force) { ); delayed = sw.ElapsedMicros(); } - RecordTick(options_.statistics, STALL_L0_SLOWDOWN_MICROS, delayed); + RecordTick(options_.statistics.get(), STALL_L0_SLOWDOWN_MICROS, delayed); stall_level0_slowdown_ += delayed; stall_level0_slowdown_count_++; allow_delay = false; // Do not delay a single write more than once @@ -3031,12 +3068,13 @@ Status DBImpl::MakeRoomForWrite(bool force) { Log(options_.info_log, "wait for memtable compaction...\n"); uint64_t stall; { - StopWatch sw(env_, options_.statistics, + StopWatch sw(env_, options_.statistics.get(), STALL_MEMTABLE_COMPACTION_COUNT); bg_cv_.Wait(); stall = sw.ElapsedMicros(); } - RecordTick(options_.statistics, STALL_MEMTABLE_COMPACTION_MICROS, stall); + RecordTick(options_.statistics.get(), + STALL_MEMTABLE_COMPACTION_MICROS, stall); stall_memtable_compaction_ += stall; stall_memtable_compaction_count_++; } else if (versions_->NumLevelFiles(0) >= @@ -3046,11 +3084,12 @@ Status DBImpl::MakeRoomForWrite(bool force) { Log(options_.info_log, "wait for fewer level0 files...\n"); uint64_t stall; { - StopWatch sw(env_, options_.statistics, STALL_L0_NUM_FILES_COUNT); + StopWatch sw(env_, options_.statistics.get(), + STALL_L0_NUM_FILES_COUNT); bg_cv_.Wait(); stall = sw.ElapsedMicros(); } - RecordTick(options_.statistics, STALL_L0_NUM_FILES_MICROS, stall); + RecordTick(options_.statistics.get(), STALL_L0_NUM_FILES_MICROS, stall); stall_level0_num_files_ += stall; stall_level0_num_files_count_++; } else if ( @@ -3062,7 +3101,8 @@ Status DBImpl::MakeRoomForWrite(bool force) { mutex_.Unlock(); uint64_t delayed; { - StopWatch sw(env_, options_.statistics, HARD_RATE_LIMIT_DELAY_COUNT); + StopWatch sw(env_, options_.statistics.get(), + HARD_RATE_LIMIT_DELAY_COUNT); env_->SleepForMicroseconds(1000); delayed = sw.ElapsedMicros(); } @@ -3071,7 +3111,8 @@ Status DBImpl::MakeRoomForWrite(bool force) { // Make sure the following value doesn't round to zero. uint64_t rate_limit = std::max((delayed / 1000), (uint64_t) 1); rate_limit_delay_millis += rate_limit; - RecordTick(options_.statistics, RATE_LIMIT_DELAY_MILLIS, rate_limit); + RecordTick(options_.statistics.get(), + RATE_LIMIT_DELAY_MILLIS, rate_limit); if (options_.rate_limit_delay_max_milliseconds > 0 && rate_limit_delay_millis >= (unsigned)options_.rate_limit_delay_max_milliseconds) { @@ -3086,7 +3127,8 @@ Status DBImpl::MakeRoomForWrite(bool force) { // TODO: add statistics mutex_.Unlock(); { - StopWatch sw(env_, options_.statistics, SOFT_RATE_LIMIT_DELAY_COUNT); + StopWatch sw(env_, options_.statistics.get(), + SOFT_RATE_LIMIT_DELAY_COUNT); env_->SleepForMicroseconds(SlowdownAmount( score, options_.soft_rate_limit, @@ -3096,27 +3138,40 @@ Status DBImpl::MakeRoomForWrite(bool force) { } allow_soft_rate_limit_delay = false; mutex_.Lock(); + } else { - // Attempt to switch to a new memtable and trigger compaction of old - DelayLoggingAndReset(); + unique_ptr lfile; + MemTable* memtmp = nullptr; + + // Attempt to switch to a new memtable and trigger compaction of old. + // Do this without holding the dbmutex lock. assert(versions_->PrevLogNumber() == 0); uint64_t new_log_number = versions_->NewFileNumber(); - unique_ptr lfile; - EnvOptions soptions(storage_options_); - soptions.use_mmap_writes = false; - s = env_->NewWritableFile( + mutex_.Unlock(); + { + EnvOptions soptions(storage_options_); + soptions.use_mmap_writes = false; + DelayLoggingAndReset(); + s = env_->NewWritableFile( LogFileName(options_.wal_dir, new_log_number), &lfile, soptions ); + if (s.ok()) { + // Our final size should be less than write_buffer_size + // (compression, etc) but err on the side of caution. + lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size); + memtmp = new MemTable( + internal_comparator_, mem_rep_factory_, NumberLevels(), options_); + } + } + mutex_.Lock(); if (!s.ok()) { // Avoid chewing through file number space in a tight loop. versions_->ReuseFileNumber(new_log_number); + assert (!memtmp); break; } - // Our final size should be less than write_buffer_size - // (compression, etc) but err on the side of caution. - lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size); logfile_number_ = new_log_number; log_.reset(new log::Writer(std::move(lfile))); mem_->SetNextLogNumber(logfile_number_); @@ -3124,8 +3179,7 @@ Status DBImpl::MakeRoomForWrite(bool force) { if (force) { imm_.FlushRequested(); } - mem_ = new MemTable( - internal_comparator_, mem_rep_factory_, NumberLevels(), options_); + mem_ = memtmp; mem_->Ref(); Log(options_.info_log, "New memtable created with log file: #%lu\n", @@ -3138,6 +3192,14 @@ Status DBImpl::MakeRoomForWrite(bool force) { return s; } +Env* DBImpl::GetEnv() const { + return env_; +} + +const Options& DBImpl::GetOptions() const { + return options_; +} + bool DBImpl::GetProperty(const Slice& property, std::string* value) { value->clear(); diff --git a/db/db_impl.h b/db/db_impl.h index dc4c20a51..8a57b92f5 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -67,6 +67,8 @@ class DBImpl : public DB { virtual int NumberLevels(); virtual int MaxMemCompactionLevel(); virtual int Level0StopWriteTrigger(); + virtual Env* GetEnv() const; + virtual const Options& GetOptions() const; virtual Status Flush(const FlushOptions& options); virtual Status DisableFileDeletions(); virtual Status EnableFileDeletions(); diff --git a/db/db_iter.cc b/db/db_iter.cc index 9187313f2..0ee421005 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -69,7 +69,7 @@ class DBIter: public Iterator { direction_(kForward), valid_(false), current_entry_is_merged_(false), - statistics_(options.statistics) { + statistics_(options.statistics.get()) { RecordTick(statistics_, NO_ITERATORS, 1); max_skip_ = options.max_sequential_skip_in_iterations; } @@ -136,7 +136,7 @@ class DBIter: public Iterator { Direction direction_; bool valid_; bool current_entry_is_merged_; - std::shared_ptr statistics_; + Statistics* statistics_; uint64_t max_skip_; // No copying allowed diff --git a/db/db_test.cc b/db/db_test.cc index ed7425521..c9139282f 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -4503,6 +4503,14 @@ class ModelDB: public DB { return -1; } + virtual Env* GetEnv() const { + return nullptr; + } + + virtual const Options& GetOptions() const { + return options_; + } + virtual Status Flush(const rocksdb::FlushOptions& options) { Status ret; return ret; diff --git a/db/memtable.cc b/db/memtable.cc index dce0c382f..082d468d7 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -17,9 +17,10 @@ #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" #include "util/coding.h" -#include "util/mutexlock.h" #include "util/murmurhash.h" +#include "util/mutexlock.h" #include "util/perf_context_imp.h" +#include "util/statistics_imp.h" #include "util/stop_watch.h" namespace std { @@ -208,7 +209,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, assert(merge_operator); if (!merge_operator->FullMerge(key.user_key(), &v, *operands, value, logger.get())) { - RecordTick(options.statistics, NUMBER_MERGE_FAILURES); + RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES); *s = Status::Corruption("Error: Could not perform merge."); } } else { @@ -226,7 +227,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, *s = Status::OK(); if (!merge_operator->FullMerge(key.user_key(), nullptr, *operands, value, logger.get())) { - RecordTick(options.statistics, NUMBER_MERGE_FAILURES); + RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES); *s = Status::Corruption("Error: Could not perform merge."); } } else { diff --git a/db/memtable.h b/db/memtable.h index 9efb16431..751de3186 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -39,16 +39,20 @@ class MemTable { int numlevel = 7, const Options& options = Options()); + ~MemTable(); + // Increase reference count. void Ref() { ++refs_; } - // Drop reference count. Delete if no more references exist. - void Unref() { + // Drop reference count. + // If the refcount goes to zero return this memtable, otherwise return null + MemTable* Unref() { --refs_; assert(refs_ >= 0); if (refs_ <= 0) { - delete this; + return this; } + return nullptr; } // Returns an estimate of the number of bytes of data in use by this @@ -129,7 +133,6 @@ class MemTable { void MarkImmutable() { table_->MarkReadOnly(); } private: - ~MemTable(); // Private since only Unref() should be used to delete it friend class MemTableIterator; friend class MemTableBackwardIterator; friend class MemTableList; diff --git a/db/memtablelist.cc b/db/memtablelist.cc index 3f2a88592..4453d1721 100644 --- a/db/memtablelist.cc +++ b/db/memtablelist.cc @@ -28,10 +28,15 @@ void MemTableList::RefAll() { } } -// Drop reference count on all underling memtables -void MemTableList::UnrefAll() { +// Drop reference count on all underling memtables. If the +// refcount of an underlying memtable drops to zero, then +// return it in to_delete vector. +void MemTableList::UnrefAll(std::vector* to_delete) { for (auto &memtable : memlist_) { - memtable->Unref(); + MemTable* m = memtable->Unref(); + if (m != nullptr) { + to_delete->push_back(m); + } } } diff --git a/db/memtablelist.h b/db/memtablelist.h index 20ea9ecda..ef10526c9 100644 --- a/db/memtablelist.h +++ b/db/memtablelist.h @@ -44,8 +44,10 @@ class MemTableList { // Increase reference count on all underling memtables void RefAll(); - // Drop reference count on all underling memtables - void UnrefAll(); + // Drop reference count on all underling memtables. If the refcount + // on an underlying memtable drops to zero, then return it in + // to_delete vector. + void UnrefAll(std::vector* to_delete); // Returns the total number of memtables in the list int size(); diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 9d757a5e6..a7e2df0a3 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -8,6 +8,7 @@ #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/merge_operator.h" +#include "util/statistics_imp.h" #include #include @@ -20,7 +21,7 @@ namespace rocksdb { // operands_ stores the list of merge operands encountered while merging. // keys_[i] corresponds to operands_[i] for each i. void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before, - bool at_bottom, shared_ptr stats) { + bool at_bottom, Statistics* stats) { // Get a copy of the internal key, before it's invalidated by iter->Next() // Also maintain the list of merge operands seen. keys_.clear(); diff --git a/db/merge_helper.h b/db/merge_helper.h index 34e2edd94..6fe9bfb23 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -8,7 +8,6 @@ #include "db/dbformat.h" #include "rocksdb/slice.h" -#include "rocksdb/statistics.h" #include #include @@ -18,6 +17,7 @@ class Comparator; class Iterator; class Logger; class MergeOperator; +class Statistics; class MergeHelper { public: @@ -46,7 +46,7 @@ class MergeHelper { // at_bottom: (IN) true if the iterator covers the bottem level, which means // we could reach the start of the history of this user key. void MergeUntil(Iterator* iter, SequenceNumber stop_before = 0, - bool at_bottom = false, shared_ptr stats=nullptr); + bool at_bottom = false, Statistics* stats = nullptr); // Query the merge result // These are valid until the next MergeUntil call diff --git a/db/table_cache.cc b/db/table_cache.cc index a1f466b5a..e18c20c99 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -65,12 +65,12 @@ Status TableCache::FindTable(const EnvOptions& toptions, unique_ptr file; unique_ptr table_reader; s = env_->NewRandomAccessFile(fname, &file, toptions); - RecordTick(options_->statistics, NO_FILE_OPENS); + RecordTick(options_->statistics.get(), NO_FILE_OPENS); if (s.ok()) { if (options_->advise_random_on_open) { file->Hint(RandomAccessFile::RANDOM); } - StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS); + StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS); s = options_->table_factory->GetTableReader(*options_, toptions, std::move(file), file_size, &table_reader); @@ -78,7 +78,7 @@ Status TableCache::FindTable(const EnvOptions& toptions, if (!s.ok()) { assert(table_reader == nullptr); - RecordTick(options_->statistics, NO_FILE_ERRORS); + RecordTick(options_->statistics.get(), NO_FILE_ERRORS); // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { diff --git a/db/table_properties_collector.cc b/db/table_properties_collector.cc index d0cd1520d..3654663c1 100644 --- a/db/table_properties_collector.cc +++ b/db/table_properties_collector.cc @@ -74,7 +74,7 @@ std::string TableProperties::ToString( ); AppendProperty( result, - "(estimated) table size=", + "(estimated) table size", data_size + index_size + filter_size, prop_delim, kv_delim diff --git a/db/version_set.cc b/db/version_set.cc index 349abfbaa..3d4f84484 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -290,7 +290,7 @@ struct Saver { std::deque* merge_operands; // the merge operations encountered Logger* logger; bool didIO; // did we do any disk io? - shared_ptr statistics; + Statistics* statistics; }; } @@ -439,7 +439,7 @@ void Version::Get(const ReadOptions& options, saver.merge_operands = operands; saver.logger = logger.get(); saver.didIO = false; - saver.statistics = db_options.statistics; + saver.statistics = db_options.statistics.get(); stats->seek_file = nullptr; stats->seek_file_level = -1; @@ -458,7 +458,9 @@ void Version::Get(const ReadOptions& options, // Get the list of files to search in this level FileMetaData* const* files = &files_[level][0]; important_files.clear(); - important_files.reserve(num_files); + if (level == 0) { + important_files.reserve(num_files); + } // Some files may overlap each other. We find // all files that overlap user_key and process them in order from @@ -566,7 +568,7 @@ void Version::Get(const ReadOptions& options, value, logger.get())) { *status = Status::OK(); } else { - RecordTick(db_options.statistics, NUMBER_MERGE_FAILURES); + RecordTick(db_options.statistics.get(), NUMBER_MERGE_FAILURES); *status = Status::Corruption("could not perform end-of-key merge for ", user_key); } @@ -1296,10 +1298,12 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, } if (s.ok()) { if (options_->use_fsync) { - StopWatch sw(env_, options_->statistics, MANIFEST_FILE_SYNC_MICROS); + StopWatch sw(env_, options_->statistics.get(), + MANIFEST_FILE_SYNC_MICROS); s = descriptor_log_->file()->Fsync(); } else { - StopWatch sw(env_, options_->statistics, MANIFEST_FILE_SYNC_MICROS); + StopWatch sw(env_, options_->statistics.get(), + MANIFEST_FILE_SYNC_MICROS); s = descriptor_log_->file()->Sync(); } } diff --git a/db/write_batch.cc b/db/write_batch.cc index 134cfb63c..c04930bbf 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -20,15 +20,14 @@ // data: uint8[len] #include "rocksdb/write_batch.h" - #include "rocksdb/options.h" -#include "rocksdb/statistics.h" #include "db/dbformat.h" #include "db/db_impl.h" #include "db/memtable.h" #include "db/snapshot.h" #include "db/write_batch_internal.h" #include "util/coding.h" +#include "util/statistics_imp.h" #include namespace rocksdb { @@ -197,7 +196,7 @@ class MemTableInserter : public WriteBatch::Handler { virtual void Put(const Slice& key, const Slice& value) { if (options_->inplace_update_support && mem_->Update(sequence_, kTypeValue, key, value)) { - RecordTick(options_->statistics, NUMBER_KEYS_UPDATED); + RecordTick(options_->statistics.get(), NUMBER_KEYS_UPDATED); } else { mem_->Add(sequence_, kTypeValue, key, value); } @@ -215,7 +214,7 @@ class MemTableInserter : public WriteBatch::Handler { ropts.snapshot = &read_from_snapshot; std::string value; if (!db_->KeyMayExist(ropts, key, &value)) { - RecordTick(options_->statistics, NUMBER_FILTERED_DELETES); + RecordTick(options_->statistics.get(), NUMBER_FILTERED_DELETES); return; } } diff --git a/doc/index.html b/doc/index.html index 8c0c9de5a..84c4d132a 100644 --- a/doc/index.html +++ b/doc/index.html @@ -80,7 +80,7 @@ Such problems can be avoided by using the WriteBatch class to atomically apply a set of updates:

-  #include "leveldb/write_batch.h"
+  #include "rocksdb/write_batch.h"
   ...
   std::string value;
   rocksdb::Status s = db->Get(rocksdb::ReadOptions(), key1, &value);
diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h
index 260202091..73f9ac4da 100644
--- a/include/rocksdb/db.h
+++ b/include/rocksdb/db.h
@@ -228,6 +228,12 @@ class DB {
   // Number of files in level-0 that would stop writes.
   virtual int Level0StopWriteTrigger() = 0;
 
+  // Get Env object from the DB
+  virtual Env* GetEnv() const = 0;
+
+  // Get DB Options that we use
+  virtual const Options& GetOptions() const = 0;
+
   // Flush all mem-table data.
   virtual Status Flush(const FlushOptions& options) = 0;
 
diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h
index 7d6a53ff8..102a4be58 100644
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -276,27 +276,6 @@ class Statistics {
 // Create a concrete DBStatistics object
 std::shared_ptr CreateDBStatistics();
 
-// Ease of Use functions
-inline void RecordTick(std::shared_ptr statistics,
-                       Tickers ticker,
-                       uint64_t count = 1) {
-  assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
-  assert(TickersNameMap.size() == TICKER_ENUM_MAX);
-  if (statistics) {
-    statistics->recordTick(ticker, count);
-  }
-}
-
-inline void SetTickerCount(std::shared_ptr statistics,
-                           Tickers ticker,
-                           uint64_t count) {
-  assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
-  assert(TickersNameMap.size() == TICKER_ENUM_MAX);
-  if (statistics) {
-    statistics->setTickerCount(ticker, count);
-  }
-}
-
 }  // namespace rocksdb
 
 #endif  // STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
diff --git a/include/utilities/stackable_db.h b/include/utilities/stackable_db.h
index f15a22e12..dc26ed852 100644
--- a/include/utilities/stackable_db.h
+++ b/include/utilities/stackable_db.h
@@ -10,152 +10,144 @@ namespace rocksdb {
 // This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d
 class StackableDB : public DB {
  public:
-  explicit StackableDB(StackableDB* sdb) : sdb_(sdb) {}
+  // StackableDB is the owner of db now!
+  explicit StackableDB(DB* db) : db_(db) {}
 
-  // Returns the DB object that is the lowermost component in the stack of DBs
-  virtual DB* GetRawDB() {
-    return sdb_->GetRawDB();
+  ~StackableDB() {
+    delete db_;
   }
 
-  // convert a DB to StackableDB
-  // TODO: This function does not work yet. Passing nullptr to StackableDB in
-  //       NewStackableDB's constructor will cause segfault on object's usage
-  static StackableDB* DBToStackableDB(DB* db) {
-    class NewStackableDB : public StackableDB {
-     public:
-      NewStackableDB(DB* db)
-        : StackableDB(nullptr),
-          db_(db) {}
-
-      DB* GetRawDB() {
-        return db_;
-      }
-
-     private:
-      DB* db_;
-    };
-    return new NewStackableDB(db);
+  virtual DB* GetBaseDB() {
+    return db_;
   }
 
   virtual Status Put(const WriteOptions& options,
                      const Slice& key,
                      const Slice& val) override {
-    return sdb_->Put(options, key, val);
+    return db_->Put(options, key, val);
   }
 
   virtual Status Get(const ReadOptions& options,
                      const Slice& key,
                      std::string* value) override {
-    return sdb_->Get(options, key, value);
+    return db_->Get(options, key, value);
   }
 
   virtual std::vector MultiGet(const ReadOptions& options,
                                        const std::vector& keys,
                                        std::vector* values)
     override {
-      return sdb_->MultiGet(options, keys, values);
+      return db_->MultiGet(options, keys, values);
   }
 
   virtual bool KeyMayExist(const ReadOptions& options,
                            const Slice& key,
                            std::string* value,
                            bool* value_found = nullptr) override {
-    return sdb_->KeyMayExist(options, key, value, value_found);
+    return db_->KeyMayExist(options, key, value, value_found);
   }
 
   virtual Status Delete(const WriteOptions& wopts, const Slice& key) override {
-    return sdb_->Delete(wopts, key);
+    return db_->Delete(wopts, key);
   }
 
   virtual Status Merge(const WriteOptions& options,
                        const Slice& key,
                        const Slice& value) override {
-    return sdb_->Merge(options, key, value);
+    return db_->Merge(options, key, value);
   }
 
 
   virtual Status Write(const WriteOptions& opts, WriteBatch* updates)
     override {
-      return sdb_->Write(opts, updates);
+      return db_->Write(opts, updates);
   }
 
   virtual Iterator* NewIterator(const ReadOptions& opts) override {
-    return sdb_->NewIterator(opts);
+    return db_->NewIterator(opts);
   }
 
   virtual const Snapshot* GetSnapshot() override {
-    return sdb_->GetSnapshot();
+    return db_->GetSnapshot();
   }
 
   virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
-    return sdb_->ReleaseSnapshot(snapshot);
+    return db_->ReleaseSnapshot(snapshot);
   }
 
   virtual bool GetProperty(const Slice& property, std::string* value)
     override {
-      return sdb_->GetProperty(property, value);
+      return db_->GetProperty(property, value);
   }
 
   virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes)
     override {
-      return sdb_->GetApproximateSizes(r, n, sizes);
+      return db_->GetApproximateSizes(r, n, sizes);
   }
 
   virtual void CompactRange(const Slice* begin, const Slice* end,
                             bool reduce_level = false,
                             int target_level = -1) override {
-    return sdb_->CompactRange(begin, end, reduce_level, target_level);
+    return db_->CompactRange(begin, end, reduce_level, target_level);
   }
 
   virtual int NumberLevels() override {
-    return sdb_->NumberLevels();
+    return db_->NumberLevels();
   }
 
   virtual int MaxMemCompactionLevel() override {
-    return sdb_->MaxMemCompactionLevel();
+    return db_->MaxMemCompactionLevel();
   }
 
   virtual int Level0StopWriteTrigger() override {
-    return sdb_->Level0StopWriteTrigger();
+    return db_->Level0StopWriteTrigger();
+  }
+
+  virtual Env* GetEnv() const override {
+    return db_->GetEnv();
+  }
+
+  virtual const Options& GetOptions() const override {
+    return db_->GetOptions();
   }
 
   virtual Status Flush(const FlushOptions& fopts) override {
-    return sdb_->Flush(fopts);
+    return db_->Flush(fopts);
   }
 
   virtual Status DisableFileDeletions() override {
-    return sdb_->DisableFileDeletions();
+    return db_->DisableFileDeletions();
   }
 
   virtual Status EnableFileDeletions() override {
-    return sdb_->EnableFileDeletions();
+    return db_->EnableFileDeletions();
   }
 
   virtual Status GetLiveFiles(std::vector& vec, uint64_t* mfs,
                               bool flush_memtable = true) override {
-      return sdb_->GetLiveFiles(vec, mfs, flush_memtable);
+      return db_->GetLiveFiles(vec, mfs, flush_memtable);
   }
 
   virtual SequenceNumber GetLatestSequenceNumber() const override {
-    return sdb_->GetLatestSequenceNumber();
+    return db_->GetLatestSequenceNumber();
   }
 
   virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
-    return sdb_->GetSortedWalFiles(files);
+    return db_->GetSortedWalFiles(files);
   }
 
   virtual Status DeleteFile(std::string name) override {
-    return sdb_->DeleteFile(name);
+    return db_->DeleteFile(name);
   }
 
   virtual Status GetUpdatesSince(SequenceNumber seq_number,
                                  unique_ptr* iter)
     override {
-      return sdb_->GetUpdatesSince(seq_number, iter);
+      return db_->GetUpdatesSince(seq_number, iter);
   }
 
  protected:
-  StackableDB* sdb_;
+  DB* db_;
 };
 
 } //  namespace rocksdb
diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc
index a5bf216dc..f846b1ffd 100644
--- a/table/block_based_table_builder.cc
+++ b/table/block_based_table_builder.cc
@@ -272,7 +272,8 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
                                            CompressionType type,
                                            BlockHandle* handle) {
   Rep* r = rep_;
-  StopWatch sw(r->options.env, r->options.statistics, WRITE_RAW_BLOCK_MICROS);
+  StopWatch sw(r->options.env, r->options.statistics.get(),
+               WRITE_RAW_BLOCK_MICROS);
   handle->set_offset(r->offset);
   handle->set_size(block_contents.size());
   r->status = r->file->Append(block_contents);
@@ -530,8 +531,8 @@ Status BlockBasedTableBuilder::Finish() {
     Log(
         r->options.info_log,
         "Table was constructed:\n"
-        "  basic properties: %s\n"
-        "  user collected properties: %s",
+        "  [basic properties]: %s\n"
+        "  [user collected properties]: %s",
         r->props.ToString().c_str(),
         user_collected.c_str()
     );
diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc
index 43734ea71..836f6edf6 100644
--- a/table/block_based_table_factory.cc
+++ b/table/block_based_table_factory.cc
@@ -29,7 +29,8 @@ Status BlockBasedTableFactory::GetTableReader(
 TableBuilder* BlockBasedTableFactory::GetTableBuilder(
     const Options& options, WritableFile* file,
     CompressionType compression_type) const {
-  auto flush_block_policy_factory = flush_block_policy_factory_.get();
+  auto flush_block_policy_factory = 
+    table_options_.flush_block_policy_factory.get();
 
   // if flush block policy factory is not set, we'll create the default one
   // from the options.
@@ -54,7 +55,8 @@ TableBuilder* BlockBasedTableFactory::GetTableBuilder(
   // options.
   // We can safely delete flush_block_policy_factory since it will only be used
   // during the construction of `BlockBasedTableBuilder`.
-  if (flush_block_policy_factory != flush_block_policy_factory_.get()) {
+  if (flush_block_policy_factory != 
+      table_options_.flush_block_policy_factory.get()) {
     delete flush_block_policy_factory;
   }
 
diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h
index d6ead29a0..ee525816f 100644
--- a/table/block_based_table_factory.h
+++ b/table/block_based_table_factory.h
@@ -31,14 +31,18 @@ class BlockBasedTableBuilder;
 
 class BlockBasedTableFactory: public TableFactory {
 public:
-  // @flush_block_policy_factory creates the instances of flush block policy.
-  // which provides a configurable way to determine when to flush a block in
-  // the block based tables.  If not set, table builder will use the default
-  // block flush policy, which cut blocks by block size (please refer to
-  // `FlushBlockBySizePolicy`).
-  BlockBasedTableFactory(
-      FlushBlockPolicyFactory* flush_block_policy_factory = nullptr) :
-      flush_block_policy_factory_(flush_block_policy_factory) {
+  struct TableOptions {
+    // @flush_block_policy_factory creates the instances of flush block policy.
+    // which provides a configurable way to determine when to flush a block in
+    // the block based tables.  If not set, table builder will use the default
+    // block flush policy, which cut blocks by block size (please refer to
+    // `FlushBlockBySizePolicy`).
+    std::shared_ptr flush_block_policy_factory;
+  };
+
+  BlockBasedTableFactory() : BlockBasedTableFactory(TableOptions()) { }
+  BlockBasedTableFactory(const TableOptions& table_options): 
+      table_options_(table_options) { 
   }
 
   ~BlockBasedTableFactory() {
@@ -58,7 +62,8 @@ public:
                                     override;
 
  private:
-  std::unique_ptr flush_block_policy_factory_;
+  TableOptions table_options_;
 };
 
+
 }  // namespace rocksdb
diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc
index 5a2690103..095c2999c 100644
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -200,7 +200,7 @@ Cache::Handle* GetFromBlockCache(
     const Slice& key,
     Tickers block_cache_miss_ticker,
     Tickers block_cache_hit_ticker,
-    std::shared_ptr statistics) {
+    Statistics* statistics) {
   auto cache_handle = block_cache->Lookup(key);
   if (cache_handle != nullptr) {
     BumpPerfCount(&perf_context.block_cache_hit_count);
@@ -515,7 +515,7 @@ Status BlockBasedTable::GetBlock(
     CachableEntry* entry) {
   bool no_io = options.read_tier == kBlockCacheTier;
   Cache* block_cache = table->rep_->options.block_cache.get();
-  auto statistics = table->rep_->options.statistics;
+  Statistics* statistics = table->rep_->options.statistics.get();
   Status s;
 
   if (block_cache != nullptr) {
@@ -532,7 +532,7 @@ Status BlockBasedTable::GetBlock(
         key,
         block_cache_miss_ticker,
         block_cache_hit_ticker,
-        table->rep_->options.statistics
+        statistics
     );
 
     if (entry->cache_handle != nullptr) {
@@ -593,7 +593,7 @@ Iterator* BlockBasedTable::BlockReader(void* arg,
   Cache* block_cache = table->rep_->options.block_cache.get();
   Cache* block_cache_compressed = table->rep_->options.
                                     block_cache_compressed.get();
-  std::shared_ptr statistics = table->rep_->options.statistics;
+  Statistics* statistics = table->rep_->options.statistics.get();
   Block* block = nullptr;
   Block* cblock = nullptr;
   Cache::Handle* cache_handle = nullptr;
@@ -791,12 +791,13 @@ BlockBasedTable::GetFilter(bool no_io) const {
       cache_key
   );
 
+  Statistics* statistics = rep_->options.statistics.get();
   auto cache_handle = GetFromBlockCache(
     block_cache,
     key,
     BLOCK_CACHE_FILTER_MISS,
     BLOCK_CACHE_FILTER_HIT,
-    rep_->options.statistics
+    statistics
   );
 
   FilterBlockReader* filter = nullptr;
@@ -824,7 +825,7 @@ BlockBasedTable::GetFilter(bool no_io) const {
 
         cache_handle = block_cache->Insert(
           key, filter, filter_size, &DeleteCachedFilter);
-        RecordTick(rep_->options.statistics, BLOCK_CACHE_ADD);
+        RecordTick(statistics, BLOCK_CACHE_ADD);
       }
     }
   }
@@ -945,9 +946,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
     filter_entry.Release(rep_->options.block_cache.get());
   }
 
-  RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_CHECKED);
+  Statistics* statistics = rep_->options.statistics.get();
+  RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
   if (!may_match) {
-    RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_USEFUL);
+    RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
   }
 
   return may_match;
@@ -997,7 +999,7 @@ Status BlockBasedTable::Get(
       // Not found
       // TODO: think about interaction with Merge. If a user key cannot
       // cross one data block, we should be fine.
-      RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
+      RecordTick(rep_->options.statistics.get(), BLOOM_FILTER_USEFUL);
       break;
     } else {
       bool didIO = false;
diff --git a/util/env_posix.cc b/util/env_posix.cc
index 1643d2927..28901be3f 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -1402,6 +1402,15 @@ class PosixEnv : public Env {
         fprintf(stdout,
                 "Created bg thread 0x%lx\n",
                 (unsigned long)t);
+
+        // Set the thread name to aid debugging
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) && (__GLIBC_PREREQ(2, 12))
+        char name_buf[16];
+        snprintf(name_buf, sizeof name_buf, "rocksdb:bg%zu", bgthreads_.size());
+        name_buf[sizeof name_buf - 1] = '\0';
+        pthread_setname_np(t, name_buf);
+#endif
+
         bgthreads_.push_back(t);
       }
 
diff --git a/util/statistics_imp.h b/util/statistics_imp.h
new file mode 100644
index 000000000..0dc8884c1
--- /dev/null
+++ b/util/statistics_imp.h
@@ -0,0 +1,32 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+#pragma once
+#include "rocksdb/statistics.h"
+
+namespace rocksdb {
+
+// Utility functions
+inline void RecordTick(Statistics* statistics,
+                       Tickers ticker,
+                       uint64_t count = 1) {
+  assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
+  assert(TickersNameMap.size() == TICKER_ENUM_MAX);
+  if (statistics) {
+    statistics->recordTick(ticker, count);
+  }
+}
+
+inline void SetTickerCount(Statistics* statistics,
+                           Tickers ticker,
+                           uint64_t count) {
+  assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
+  assert(TickersNameMap.size() == TICKER_ENUM_MAX);
+  if (statistics) {
+    statistics->setTickerCount(ticker, count);
+  }
+}
+
+}
diff --git a/util/stop_watch.h b/util/stop_watch.h
index f251b6bc1..e36bcb7ec 100644
--- a/util/stop_watch.h
+++ b/util/stop_watch.h
@@ -5,16 +5,16 @@
 //
 #pragma once
 #include "rocksdb/env.h"
-#include "rocksdb/statistics.h"
+#include "util/statistics_imp.h"
 
 namespace rocksdb {
 // Auto-scoped.
 // Records the statistic into the corresponding histogram.
 class StopWatch {
  public:
-  StopWatch(
+  explicit StopWatch(
     Env * const env,
-    std::shared_ptr statistics = nullptr,
+    Statistics* statistics = nullptr,
     const Histograms histogram_name = DB_GET) :
       env_(env),
       start_time_(env->NowMicros()),
@@ -36,7 +36,7 @@ class StopWatch {
  private:
   Env* const env_;
   const uint64_t start_time_;
-  std::shared_ptr statistics_;
+  Statistics* statistics_;
   const Histograms histogram_name_;
 
 };
@@ -44,7 +44,7 @@ class StopWatch {
 // a nano second precision stopwatch
 class StopWatchNano {
  public:
-  StopWatchNano(Env* const env, bool auto_start = false)
+  explicit StopWatchNano(Env* const env, bool auto_start = false)
       : env_(env), start_(0) {
     if (auto_start) {
       Start();
diff --git a/utilities/ttl/db_ttl.cc b/utilities/ttl/db_ttl.cc
index a019102d9..abe7408a6 100644
--- a/utilities/ttl/db_ttl.cc
+++ b/utilities/ttl/db_ttl.cc
@@ -254,6 +254,14 @@ int DBWithTTL::Level0StopWriteTrigger() {
   return db_->Level0StopWriteTrigger();
 }
 
+Env* DBWithTTL::GetEnv() const {
+  return db_->GetEnv();
+}
+
+const Options& DBWithTTL::GetOptions() const {
+  return db_->GetOptions();
+}
+
 Status DBWithTTL::Flush(const FlushOptions& fopts) {
   return db_->Flush(fopts);
 }
diff --git a/utilities/ttl/db_ttl.h b/utilities/ttl/db_ttl.h
index ffee0ccf2..d09bae966 100644
--- a/utilities/ttl/db_ttl.h
+++ b/utilities/ttl/db_ttl.h
@@ -67,6 +67,10 @@ class DBWithTTL : public StackableDB {
 
   virtual int Level0StopWriteTrigger();
 
+  virtual Env* GetEnv() const;
+
+  virtual const Options& GetOptions() const;
+
   virtual Status Flush(const FlushOptions& fopts);
 
   virtual Status DisableFileDeletions();
@@ -88,7 +92,7 @@ class DBWithTTL : public StackableDB {
   // Simulate a db crash, no elegant closing of database.
   void TEST_Destroy_DBWithTtl();
 
-  virtual DB* GetRawDB() {
+  virtual DB* GetBaseDB() {
     return db_;
   }