Merge branch 'master' into perf

main
Kai Liu 11 years ago
commit 1966b63137
  1. 8
      INSTALL.md
  2. 6
      build_tools/build_detect_platform
  3. 2
      build_tools/fbcode.gcc471.sh
  4. 2
      build_tools/fbcode.gcc481.sh
  5. 102
      build_tools/regression_build_test.sh
  6. 5
      db/builder.cc
  7. 172
      db/db_impl.cc
  8. 2
      db/db_impl.h
  9. 4
      db/db_iter.cc
  10. 8
      db/db_test.cc
  11. 7
      db/memtable.cc
  12. 11
      db/memtable.h
  13. 11
      db/memtablelist.cc
  14. 6
      db/memtablelist.h
  15. 3
      db/merge_helper.cc
  16. 4
      db/merge_helper.h
  17. 6
      db/table_cache.cc
  18. 2
      db/table_properties_collector.cc
  19. 16
      db/version_set.cc
  20. 7
      db/write_batch.cc
  21. 2
      doc/index.html
  22. 6
      include/rocksdb/db.h
  23. 21
      include/rocksdb/statistics.h
  24. 86
      include/utilities/stackable_db.h
  25. 7
      table/block_based_table_builder.cc
  26. 6
      table/block_based_table_factory.cc
  27. 23
      table/block_based_table_factory.h
  28. 20
      table/block_based_table_reader.cc
  29. 9
      util/env_posix.cc
  30. 32
      util/statistics_imp.h
  31. 10
      util/stop_watch.h
  32. 8
      utilities/ttl/db_ttl.cc
  33. 6
      utilities/ttl/db_ttl.h

@ -25,9 +25,13 @@ libraries. You are on your own.
`sudo apt-get install libsnappy-dev`.
* Install zlib. Try: `sudo apt-get install zlib1g-dev`.
* Install bzip2: `sudo apt-get install libbz2-dev`.
* Install gflags: `sudo apt-get install libgflags-dev`.
* **OS X**:
* Update your xcode to the latest version to get the compiler with
C++ 11 support.
* Install latest C++ compiler that supports C++ 11:
* Update XCode: run `xcode-select --install` (or install it from XCode App's settting).
* Install via [homebrew](http://brew.sh/).
* If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line.
* run `brew tap homebrew/dupes; brew install gcc47 --use-llvm` to install gcc 4.7 (or higher).
* Install zlib, bzip2 and snappy libraries for compression.
* Install gflags. We have included a script
`build_tools/mac-install-gflags.sh`, which should automatically install it.

@ -174,6 +174,12 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Also don't need any compilation tests if compiling on fbcode
true
else
# do fPIC on 64 bit in non-fbcode environment
case "$TARGET_OS" in
x86_64)
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -fPIC"
esac
# If -std=c++0x works, use <atomic>. Otherwise use port_posix.h.
$CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <atomic>

@ -51,7 +51,7 @@ CXX="$TOOLCHAIN_EXECUTABLES/gcc/gcc-4.7.1-glibc-2.14.1/bin/g++ $JINCLUDE $SNAPPY
AR=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ar
RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic -fPIC"
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic"
CFLAGS+=" -I $TOOLCHAIN_LIB_BASE/jemalloc/$TOOL_JEMALLOC/include -DHAVE_JEMALLOC"
CFLAGS+=" $LIBGCC_INCLUDE $GLIBC_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT"

@ -59,7 +59,7 @@ CXX="$TOOLCHAIN_EXECUTABLES/gcc/gcc-4.8.1/cc6c9dc/bin/g++ $JINCLUDE $SNAPPY_INCL
AR=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ar
RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic -fPIC"
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic"
CFLAGS+=" -nostdlib $LIBGCC_INCLUDE $GLIBC_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT"
CFLAGS+=" -DSNAPPY -DGFLAGS -DZLIB -DBZIP2"

@ -22,25 +22,15 @@ function cleanup {
rm -f $STAT_FILE.fillseq
rm -f $STAT_FILE.readrandom
rm -f $STAT_FILE.overwrite
rm -f $STAT_FILE.memtablefillreadrandom
}
trap cleanup EXIT
function send_to_ods {
key="$1"
value="$2"
if [ -z "$value" ];then
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
--connect-timeout 60
}
make clean
make db_bench -j$(nproc)
OPT=-DNDEBUG make db_bench -j$(nproc)
# measure fillseq + fill up the DB for overwrite benchmark
./db_bench \
--benchmarks=fillseq \
--db=$DATA_DIR \
@ -57,6 +47,7 @@ make db_bench -j$(nproc)
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq
# measure overwrite performance
./db_bench \
--benchmarks=overwrite \
--db=$DATA_DIR \
@ -74,27 +65,94 @@ make db_bench -j$(nproc)
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite
# fill up the db for readrandom benchmark
./db_bench \
--benchmarks=fillseq \
--db=$DATA_DIR \
--use_existing_db=0 \
--bloom_bits=10 \
--num=$NUM \
--writes=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=1 > /dev/null
# measure readrandom
./db_bench \
--benchmarks=readrandom \
--db=$DATA_DIR \
--use_existing_db=1 \
--bloom_bits=10 \
--num=$NUM \
--reads=$((NUM / 100)) \
--reads=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=6 \
--cache_numshardbits=8 \
--open_files=55000 \
--disable_seek_compaction=1 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=32 > ${STAT_FILE}.readrandom
# measure memtable performance -- none of the data gets flushed to disk
./db_bench \
--benchmarks=fillrandom,readrandom, \
--db=$DATA_DIR \
--use_existing_db=0 \
--num=$((NUM / 10)) \
--reads=$NUM \
--cache_size=6442450944 \
--cache_numshardbits=8 \
--write_buffer_size=1000000000 \
--open_files=55000 \
--disable_seek_compaction=1 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=128 > ${STAT_FILE}.readrandom
--value_size=10 \
--threads=32 > ${STAT_FILE}.memtablefillreadrandom
OVERWRITE_OPS=$(awk '/overwrite/ {print $5}' $STAT_FILE.overwrite)
FILLSEQ_OPS=$(awk '/fillseq/ {print $5}' $STAT_FILE.fillseq)
READRANDOM_OPS=$(awk '/readrandom/ {print $5}' $STAT_FILE.readrandom)
# send data to ods
function send_to_ods {
key="$1"
value="$2"
if [ -z "$value" ];then
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
--connect-timeout 60
}
function send_benchmark_to_ods {
bench="$1"
bench_key="$2"
file="$3"
QPS=$(grep $bench $file | awk '{print $5}')
P50_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $3}' )
P75_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $5}' )
P99_MICROS=$(grep $bench $file -A 4 | tail -n1 | awk '{print $7}' )
send_to_ods rocksdb.build.$bench_key.qps $QPS
send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS
send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS
send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS
}
send_to_ods rocksdb.build.overwrite.qps $OVERWRITE_OPS
send_to_ods rocksdb.build.fillseq.qps $FILLSEQ_OPS
send_to_ods rocksdb.build.readrandom.qps $READRANDOM_OPS
send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite
send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq
send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom
send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom
send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom

@ -112,6 +112,7 @@ Status BuildTable(const std::string& dbname,
if (this_ikey.type == kTypeMerge) {
// Handle merge-type keys using the MergeHelper
// TODO: pass statistics to MergeUntil
merge.MergeUntil(iter, 0 /* don't worry about snapshot */);
iterator_at_next = true;
if (merge.IsSuccess()) {
@ -188,10 +189,10 @@ Status BuildTable(const std::string& dbname,
// Finish and check for file errors
if (s.ok() && !options.disableDataSync) {
if (options.use_fsync) {
StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS);
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
s = file->Fsync();
} else {
StopWatch sw(env, options.statistics, TABLE_SYNC_MICROS);
StopWatch sw(env, options.statistics.get(), TABLE_SYNC_MICROS);
s = file->Sync();
}
}

@ -300,6 +300,9 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
}
DBImpl::~DBImpl() {
std::vector<MemTable*> to_delete;
to_delete.reserve(options_.max_write_buffer_number);
// Wait for background work to finish
if (flush_on_destroy_ && mem_->GetFirstSequenceNumber() != 0) {
FlushMemTable(FlushOptions());
@ -317,8 +320,14 @@ DBImpl::~DBImpl() {
env_->UnlockFile(db_lock_);
}
if (mem_ != nullptr) mem_->Unref();
imm_.UnrefAll();
if (mem_ != nullptr) {
delete mem_->Unref();
}
imm_.UnrefAll(&to_delete);
for (MemTable* m: to_delete) {
delete m;
}
LogFlush(options_.info_log);
}
@ -404,7 +413,7 @@ const Status DBImpl::CreateArchivalDirectory() {
}
void DBImpl::PrintStatistics() {
auto dbstats = options_.statistics;
auto dbstats = options_.statistics.get();
if (dbstats) {
Log(options_.info_log,
"STATISTCS:\n %s",
@ -860,7 +869,7 @@ Status DBImpl::Recover(VersionEdit* edit, MemTable* external_table,
if (versions_->LastSequence() < max_sequence) {
versions_->SetLastSequence(max_sequence);
}
SetTickerCount(options_.statistics, SEQUENCE_NUMBER,
SetTickerCount(options_.statistics.get(), SEQUENCE_NUMBER,
versions_->LastSequence());
}
}
@ -954,7 +963,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
// file-systems cause the DB::Open() to fail.
break;
}
mem->Unref();
delete mem->Unref();
mem = nullptr;
}
}
@ -965,7 +974,9 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
// file-systems cause the DB::Open() to fail.
}
if (mem != nullptr && !external_table) mem->Unref();
if (mem != nullptr && !external_table) {
delete mem->Unref();
}
return status;
}
@ -1297,7 +1308,7 @@ SequenceNumber DBImpl::GetLatestSequenceNumber() const {
Status DBImpl::GetUpdatesSince(SequenceNumber seq,
unique_ptr<TransactionLogIterator>* iter) {
RecordTick(options_.statistics, GET_UPDATES_SINCE_CALLS);
RecordTick(options_.statistics.get(), GET_UPDATES_SINCE_CALLS);
if (seq > versions_->LastSequence()) {
return Status::IOError("Requested sequence not yet written in the db");
}
@ -1971,10 +1982,12 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
// Finish and check for file errors
if (s.ok() && !options_.disableDataSync) {
if (options_.use_fsync) {
StopWatch sw(env_, options_.statistics, COMPACTION_OUTFILE_SYNC_MICROS);
StopWatch sw(env_, options_.statistics.get(),
COMPACTION_OUTFILE_SYNC_MICROS);
s = compact->outfile->Fsync();
} else {
StopWatch sw(env_, options_.statistics, COMPACTION_OUTFILE_SYNC_MICROS);
StopWatch sw(env_, options_.statistics.get(),
COMPACTION_OUTFILE_SYNC_MICROS);
s = compact->outfile->Sync();
}
}
@ -2212,7 +2225,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
ParseInternalKey(key, &ikey);
// no value associated with delete
value.clear();
RecordTick(options_.statistics, COMPACTION_KEY_DROP_USER);
RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_USER);
} else if (value_changed) {
value = compaction_filter_value;
}
@ -2238,7 +2251,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
// TODO: why not > ?
assert(last_sequence_for_key >= ikey.sequence);
drop = true; // (A)
RecordTick(options_.statistics, COMPACTION_KEY_DROP_NEWER_ENTRY);
RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_NEWER_ENTRY);
} else if (ikey.type == kTypeDeletion &&
ikey.sequence <= earliest_snapshot &&
compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
@ -2250,7 +2263,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
// few iterations of this loop (by rule (A) above).
// Therefore this deletion marker is obsolete and can be dropped.
drop = true;
RecordTick(options_.statistics, COMPACTION_KEY_DROP_OBSOLETE);
RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_OBSOLETE);
} else if (ikey.type == kTypeMerge) {
// We know the merge type entry is not hidden, otherwise we would
// have hit (A)
@ -2259,7 +2272,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
// logic could also be nicely re-used for memtable flush purge
// optimization in BuildTable.
merge.MergeUntil(input.get(), prev_snapshot, bottommost_level,
options_.statistics);
options_.statistics.get());
current_entry_is_merging = true;
if (merge.IsSuccess()) {
// Successfully found Put/Delete/(end-of-key-range) while merging
@ -2412,8 +2425,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros - imm_micros;
if (options_.statistics) {
options_.statistics->measureTime(COMPACTION_TIME, stats.micros);
if (options_.statistics.get()) {
options_.statistics.get()->measureTime(COMPACTION_TIME, stats.micros);
}
stats.files_in_leveln = compact->compaction->num_input_files(0);
stats.files_in_levelnp1 = compact->compaction->num_input_files(1);
@ -2478,9 +2491,14 @@ struct IterState {
static void CleanupIteratorState(void* arg1, void* arg2) {
IterState* state = reinterpret_cast<IterState*>(arg1);
std::vector<MemTable*> to_delete;
to_delete.reserve(state->mem.size());
state->mu->Lock();
for (unsigned int i = 0; i < state->mem.size(); i++) {
state->mem[i]->Unref();
MemTable* m = state->mem[i]->Unref();
if (m != nullptr) {
to_delete.push_back(m);
}
}
state->version->Unref();
// delete only the sst obsolete files
@ -2489,6 +2507,9 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
state->db->FindObsoleteFiles(deletion_state, false, true);
state->mu->Unlock();
state->db->PurgeObsoleteFiles(deletion_state);
// delete obsolete memtables outside the db-mutex
for (MemTable* m : to_delete) delete m;
delete state;
}
} // namespace
@ -2554,10 +2575,12 @@ Status DBImpl::GetImpl(const ReadOptions& options,
bool* value_found) {
Status s;
StopWatch sw(env_, options_.statistics, DB_GET);
StopWatch sw(env_, options_.statistics.get(), DB_GET);
StopWatchNano snapshot_timer(env_, false);
StartPerfTimer(&snapshot_timer);
SequenceNumber snapshot;
std::vector<MemTable*> to_delete;
to_delete.reserve(options_.max_write_buffer_number);
mutex_.Lock();
if (options.snapshot != nullptr) {
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
@ -2608,28 +2631,35 @@ Status DBImpl::GetImpl(const ReadOptions& options,
have_stat_update && current->UpdateStats(stats)) {
MaybeScheduleFlushOrCompaction();
}
mem->Unref();
imm.UnrefAll();
MemTable* m = mem->Unref();
imm.UnrefAll(&to_delete);
current->Unref();
mutex_.Unlock();
// free up all obsolete memtables outside the mutex
delete m;
for (MemTable* v: to_delete) delete v;
LogFlush(options_.info_log);
// Note, tickers are atomic now - no lock protection needed any more.
RecordTick(options_.statistics, NUMBER_KEYS_READ);
RecordTick(options_.statistics, BYTES_READ, value->size());
BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer);
RecordTick(options_.statistics.get(), NUMBER_KEYS_READ);
RecordTick(options_.statistics.get(), BYTES_READ, value->size());
BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer);
return s;
}
std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
const std::vector<Slice>& keys,
std::vector<std::string>* values) {
StopWatch sw(env_, options_.statistics, DB_MULTIGET);
StopWatch sw(env_, options_.statistics.get(), DB_MULTIGET);
StopWatchNano snapshot_timer(env_, false);
StartPerfTimer(&snapshot_timer);
SequenceNumber snapshot;
std::vector<MemTable*> to_delete;
to_delete.reserve(options_.max_write_buffer_number);
mutex_.Lock();
if (options.snapshot != nullptr) {
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
@ -2694,15 +2724,20 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
have_stat_update && current->UpdateStats(stats)) {
MaybeScheduleFlushOrCompaction();
}
mem->Unref();
imm.UnrefAll();
MemTable* m = mem->Unref();
imm.UnrefAll(&to_delete);
current->Unref();
mutex_.Unlock();
// free up all obsolete memtables outside the mutex
delete m;
for (MemTable* v: to_delete) delete v;
LogFlush(options_.info_log);
RecordTick(options_.statistics, NUMBER_MULTIGET_CALLS);
RecordTick(options_.statistics, NUMBER_MULTIGET_KEYS_READ, numKeys);
RecordTick(options_.statistics, NUMBER_MULTIGET_BYTES_READ, bytesRead);
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_CALLS);
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_KEYS_READ, numKeys);
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_BYTES_READ, bytesRead);
BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer);
return statList;
@ -2780,7 +2815,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
w.disableWAL = options.disableWAL;
w.done = false;
StopWatch sw(env_, options_.statistics, DB_WRITE);
StopWatch sw(env_, options_.statistics.get(), DB_WRITE);
MutexLock l(&mutex_);
writers_.push_back(&w);
while (!w.done && &w != writers_.front()) {
@ -2813,8 +2848,9 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
int my_batch_count = WriteBatchInternal::Count(updates);
last_sequence += my_batch_count;
// Record statistics
RecordTick(options_.statistics, NUMBER_KEYS_WRITTEN, my_batch_count);
RecordTick(options_.statistics,
RecordTick(options_.statistics.get(),
NUMBER_KEYS_WRITTEN, my_batch_count);
RecordTick(options_.statistics.get(),
BYTES_WRITTEN,
WriteBatchInternal::ByteSize(updates));
if (options.disableWAL) {
@ -2829,10 +2865,10 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
if (status.ok() && options.sync) {
if (options_.use_fsync) {
StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS);
StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS);
status = log_->file()->Fsync();
} else {
StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS);
StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS);
status = log_->file()->Sync();
}
}
@ -2851,7 +2887,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
// have succeeded in memtable but Status reports error for all writes.
throw std::runtime_error("In memory WriteBatch corruption!");
}
SetTickerCount(options_.statistics, SEQUENCE_NUMBER, last_sequence);
SetTickerCount(options_.statistics.get(),
SEQUENCE_NUMBER, last_sequence);
}
StartPerfTimer(&pre_post_process_timer);
LogFlush(options_.info_log);
@ -3003,7 +3040,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
mutex_.Unlock();
uint64_t delayed;
{
StopWatch sw(env_, options_.statistics, STALL_L0_SLOWDOWN_COUNT);
StopWatch sw(env_, options_.statistics.get(), STALL_L0_SLOWDOWN_COUNT);
env_->SleepForMicroseconds(
SlowdownAmount(versions_->NumLevelFiles(0),
options_.level0_slowdown_writes_trigger,
@ -3011,7 +3048,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
);
delayed = sw.ElapsedMicros();
}
RecordTick(options_.statistics, STALL_L0_SLOWDOWN_MICROS, delayed);
RecordTick(options_.statistics.get(), STALL_L0_SLOWDOWN_MICROS, delayed);
stall_level0_slowdown_ += delayed;
stall_level0_slowdown_count_++;
allow_delay = false; // Do not delay a single write more than once
@ -3031,12 +3068,13 @@ Status DBImpl::MakeRoomForWrite(bool force) {
Log(options_.info_log, "wait for memtable compaction...\n");
uint64_t stall;
{
StopWatch sw(env_, options_.statistics,
StopWatch sw(env_, options_.statistics.get(),
STALL_MEMTABLE_COMPACTION_COUNT);
bg_cv_.Wait();
stall = sw.ElapsedMicros();
}
RecordTick(options_.statistics, STALL_MEMTABLE_COMPACTION_MICROS, stall);
RecordTick(options_.statistics.get(),
STALL_MEMTABLE_COMPACTION_MICROS, stall);
stall_memtable_compaction_ += stall;
stall_memtable_compaction_count_++;
} else if (versions_->NumLevelFiles(0) >=
@ -3046,11 +3084,12 @@ Status DBImpl::MakeRoomForWrite(bool force) {
Log(options_.info_log, "wait for fewer level0 files...\n");
uint64_t stall;
{
StopWatch sw(env_, options_.statistics, STALL_L0_NUM_FILES_COUNT);
StopWatch sw(env_, options_.statistics.get(),
STALL_L0_NUM_FILES_COUNT);
bg_cv_.Wait();
stall = sw.ElapsedMicros();
}
RecordTick(options_.statistics, STALL_L0_NUM_FILES_MICROS, stall);
RecordTick(options_.statistics.get(), STALL_L0_NUM_FILES_MICROS, stall);
stall_level0_num_files_ += stall;
stall_level0_num_files_count_++;
} else if (
@ -3062,7 +3101,8 @@ Status DBImpl::MakeRoomForWrite(bool force) {
mutex_.Unlock();
uint64_t delayed;
{
StopWatch sw(env_, options_.statistics, HARD_RATE_LIMIT_DELAY_COUNT);
StopWatch sw(env_, options_.statistics.get(),
HARD_RATE_LIMIT_DELAY_COUNT);
env_->SleepForMicroseconds(1000);
delayed = sw.ElapsedMicros();
}
@ -3071,7 +3111,8 @@ Status DBImpl::MakeRoomForWrite(bool force) {
// Make sure the following value doesn't round to zero.
uint64_t rate_limit = std::max((delayed / 1000), (uint64_t) 1);
rate_limit_delay_millis += rate_limit;
RecordTick(options_.statistics, RATE_LIMIT_DELAY_MILLIS, rate_limit);
RecordTick(options_.statistics.get(),
RATE_LIMIT_DELAY_MILLIS, rate_limit);
if (options_.rate_limit_delay_max_milliseconds > 0 &&
rate_limit_delay_millis >=
(unsigned)options_.rate_limit_delay_max_milliseconds) {
@ -3086,7 +3127,8 @@ Status DBImpl::MakeRoomForWrite(bool force) {
// TODO: add statistics
mutex_.Unlock();
{
StopWatch sw(env_, options_.statistics, SOFT_RATE_LIMIT_DELAY_COUNT);
StopWatch sw(env_, options_.statistics.get(),
SOFT_RATE_LIMIT_DELAY_COUNT);
env_->SleepForMicroseconds(SlowdownAmount(
score,
options_.soft_rate_limit,
@ -3096,27 +3138,40 @@ Status DBImpl::MakeRoomForWrite(bool force) {
}
allow_soft_rate_limit_delay = false;
mutex_.Lock();
} else {
// Attempt to switch to a new memtable and trigger compaction of old
DelayLoggingAndReset();
unique_ptr<WritableFile> lfile;
MemTable* memtmp = nullptr;
// Attempt to switch to a new memtable and trigger compaction of old.
// Do this without holding the dbmutex lock.
assert(versions_->PrevLogNumber() == 0);
uint64_t new_log_number = versions_->NewFileNumber();
unique_ptr<WritableFile> lfile;
EnvOptions soptions(storage_options_);
soptions.use_mmap_writes = false;
s = env_->NewWritableFile(
mutex_.Unlock();
{
EnvOptions soptions(storage_options_);
soptions.use_mmap_writes = false;
DelayLoggingAndReset();
s = env_->NewWritableFile(
LogFileName(options_.wal_dir, new_log_number),
&lfile,
soptions
);
if (s.ok()) {
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size);
memtmp = new MemTable(
internal_comparator_, mem_rep_factory_, NumberLevels(), options_);
}
}
mutex_.Lock();
if (!s.ok()) {
// Avoid chewing through file number space in a tight loop.
versions_->ReuseFileNumber(new_log_number);
assert (!memtmp);
break;
}
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size);
logfile_number_ = new_log_number;
log_.reset(new log::Writer(std::move(lfile)));
mem_->SetNextLogNumber(logfile_number_);
@ -3124,8 +3179,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
if (force) {
imm_.FlushRequested();
}
mem_ = new MemTable(
internal_comparator_, mem_rep_factory_, NumberLevels(), options_);
mem_ = memtmp;
mem_->Ref();
Log(options_.info_log,
"New memtable created with log file: #%lu\n",
@ -3138,6 +3192,14 @@ Status DBImpl::MakeRoomForWrite(bool force) {
return s;
}
Env* DBImpl::GetEnv() const {
return env_;
}
const Options& DBImpl::GetOptions() const {
return options_;
}
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
value->clear();

@ -67,6 +67,8 @@ class DBImpl : public DB {
virtual int NumberLevels();
virtual int MaxMemCompactionLevel();
virtual int Level0StopWriteTrigger();
virtual Env* GetEnv() const;
virtual const Options& GetOptions() const;
virtual Status Flush(const FlushOptions& options);
virtual Status DisableFileDeletions();
virtual Status EnableFileDeletions();

@ -69,7 +69,7 @@ class DBIter: public Iterator {
direction_(kForward),
valid_(false),
current_entry_is_merged_(false),
statistics_(options.statistics) {
statistics_(options.statistics.get()) {
RecordTick(statistics_, NO_ITERATORS, 1);
max_skip_ = options.max_sequential_skip_in_iterations;
}
@ -136,7 +136,7 @@ class DBIter: public Iterator {
Direction direction_;
bool valid_;
bool current_entry_is_merged_;
std::shared_ptr<Statistics> statistics_;
Statistics* statistics_;
uint64_t max_skip_;
// No copying allowed

@ -4503,6 +4503,14 @@ class ModelDB: public DB {
return -1;
}
virtual Env* GetEnv() const {
return nullptr;
}
virtual const Options& GetOptions() const {
return options_;
}
virtual Status Flush(const rocksdb::FlushOptions& options) {
Status ret;
return ret;

@ -17,9 +17,10 @@
#include "rocksdb/iterator.h"
#include "rocksdb/merge_operator.h"
#include "util/coding.h"
#include "util/mutexlock.h"
#include "util/murmurhash.h"
#include "util/mutexlock.h"
#include "util/perf_context_imp.h"
#include "util/statistics_imp.h"
#include "util/stop_watch.h"
namespace std {
@ -208,7 +209,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
assert(merge_operator);
if (!merge_operator->FullMerge(key.user_key(), &v, *operands,
value, logger.get())) {
RecordTick(options.statistics, NUMBER_MERGE_FAILURES);
RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES);
*s = Status::Corruption("Error: Could not perform merge.");
}
} else {
@ -226,7 +227,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
*s = Status::OK();
if (!merge_operator->FullMerge(key.user_key(), nullptr, *operands,
value, logger.get())) {
RecordTick(options.statistics, NUMBER_MERGE_FAILURES);
RecordTick(options.statistics.get(), NUMBER_MERGE_FAILURES);
*s = Status::Corruption("Error: Could not perform merge.");
}
} else {

@ -39,16 +39,20 @@ class MemTable {
int numlevel = 7,
const Options& options = Options());
~MemTable();
// Increase reference count.
void Ref() { ++refs_; }
// Drop reference count. Delete if no more references exist.
void Unref() {
// Drop reference count.
// If the refcount goes to zero return this memtable, otherwise return null
MemTable* Unref() {
--refs_;
assert(refs_ >= 0);
if (refs_ <= 0) {
delete this;
return this;
}
return nullptr;
}
// Returns an estimate of the number of bytes of data in use by this
@ -129,7 +133,6 @@ class MemTable {
void MarkImmutable() { table_->MarkReadOnly(); }
private:
~MemTable(); // Private since only Unref() should be used to delete it
friend class MemTableIterator;
friend class MemTableBackwardIterator;
friend class MemTableList;

@ -28,10 +28,15 @@ void MemTableList::RefAll() {
}
}
// Drop reference count on all underling memtables
void MemTableList::UnrefAll() {
// Drop reference count on all underling memtables. If the
// refcount of an underlying memtable drops to zero, then
// return it in to_delete vector.
void MemTableList::UnrefAll(std::vector<MemTable*>* to_delete) {
for (auto &memtable : memlist_) {
memtable->Unref();
MemTable* m = memtable->Unref();
if (m != nullptr) {
to_delete->push_back(m);
}
}
}

@ -44,8 +44,10 @@ class MemTableList {
// Increase reference count on all underling memtables
void RefAll();
// Drop reference count on all underling memtables
void UnrefAll();
// Drop reference count on all underling memtables. If the refcount
// on an underlying memtable drops to zero, then return it in
// to_delete vector.
void UnrefAll(std::vector<MemTable*>* to_delete);
// Returns the total number of memtables in the list
int size();

@ -8,6 +8,7 @@
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/merge_operator.h"
#include "util/statistics_imp.h"
#include <string>
#include <stdio.h>
@ -20,7 +21,7 @@ namespace rocksdb {
// operands_ stores the list of merge operands encountered while merging.
// keys_[i] corresponds to operands_[i] for each i.
void MergeHelper::MergeUntil(Iterator* iter, SequenceNumber stop_before,
bool at_bottom, shared_ptr<Statistics> stats) {
bool at_bottom, Statistics* stats) {
// Get a copy of the internal key, before it's invalidated by iter->Next()
// Also maintain the list of merge operands seen.
keys_.clear();

@ -8,7 +8,6 @@
#include "db/dbformat.h"
#include "rocksdb/slice.h"
#include "rocksdb/statistics.h"
#include <string>
#include <deque>
@ -18,6 +17,7 @@ class Comparator;
class Iterator;
class Logger;
class MergeOperator;
class Statistics;
class MergeHelper {
public:
@ -46,7 +46,7 @@ class MergeHelper {
// at_bottom: (IN) true if the iterator covers the bottem level, which means
// we could reach the start of the history of this user key.
void MergeUntil(Iterator* iter, SequenceNumber stop_before = 0,
bool at_bottom = false, shared_ptr<Statistics> stats=nullptr);
bool at_bottom = false, Statistics* stats = nullptr);
// Query the merge result
// These are valid until the next MergeUntil call

@ -65,12 +65,12 @@ Status TableCache::FindTable(const EnvOptions& toptions,
unique_ptr<RandomAccessFile> file;
unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics, NO_FILE_OPENS);
RecordTick(options_->statistics.get(), NO_FILE_OPENS);
if (s.ok()) {
if (options_->advise_random_on_open) {
file->Hint(RandomAccessFile::RANDOM);
}
StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS);
StopWatch sw(env_, options_->statistics.get(), TABLE_OPEN_IO_MICROS);
s = options_->table_factory->GetTableReader(*options_, toptions,
std::move(file), file_size,
&table_reader);
@ -78,7 +78,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(options_->statistics, NO_FILE_ERRORS);
RecordTick(options_->statistics.get(), NO_FILE_ERRORS);
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
} else {

@ -74,7 +74,7 @@ std::string TableProperties::ToString(
);
AppendProperty(
result,
"(estimated) table size=",
"(estimated) table size",
data_size + index_size + filter_size,
prop_delim,
kv_delim

@ -290,7 +290,7 @@ struct Saver {
std::deque<std::string>* merge_operands; // the merge operations encountered
Logger* logger;
bool didIO; // did we do any disk io?
shared_ptr<Statistics> statistics;
Statistics* statistics;
};
}
@ -439,7 +439,7 @@ void Version::Get(const ReadOptions& options,
saver.merge_operands = operands;
saver.logger = logger.get();
saver.didIO = false;
saver.statistics = db_options.statistics;
saver.statistics = db_options.statistics.get();
stats->seek_file = nullptr;
stats->seek_file_level = -1;
@ -458,7 +458,9 @@ void Version::Get(const ReadOptions& options,
// Get the list of files to search in this level
FileMetaData* const* files = &files_[level][0];
important_files.clear();
important_files.reserve(num_files);
if (level == 0) {
important_files.reserve(num_files);
}
// Some files may overlap each other. We find
// all files that overlap user_key and process them in order from
@ -566,7 +568,7 @@ void Version::Get(const ReadOptions& options,
value, logger.get())) {
*status = Status::OK();
} else {
RecordTick(db_options.statistics, NUMBER_MERGE_FAILURES);
RecordTick(db_options.statistics.get(), NUMBER_MERGE_FAILURES);
*status = Status::Corruption("could not perform end-of-key merge for ",
user_key);
}
@ -1296,10 +1298,12 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu,
}
if (s.ok()) {
if (options_->use_fsync) {
StopWatch sw(env_, options_->statistics, MANIFEST_FILE_SYNC_MICROS);
StopWatch sw(env_, options_->statistics.get(),
MANIFEST_FILE_SYNC_MICROS);
s = descriptor_log_->file()->Fsync();
} else {
StopWatch sw(env_, options_->statistics, MANIFEST_FILE_SYNC_MICROS);
StopWatch sw(env_, options_->statistics.get(),
MANIFEST_FILE_SYNC_MICROS);
s = descriptor_log_->file()->Sync();
}
}

@ -20,15 +20,14 @@
// data: uint8[len]
#include "rocksdb/write_batch.h"
#include "rocksdb/options.h"
#include "rocksdb/statistics.h"
#include "db/dbformat.h"
#include "db/db_impl.h"
#include "db/memtable.h"
#include "db/snapshot.h"
#include "db/write_batch_internal.h"
#include "util/coding.h"
#include "util/statistics_imp.h"
#include <stdexcept>
namespace rocksdb {
@ -197,7 +196,7 @@ class MemTableInserter : public WriteBatch::Handler {
virtual void Put(const Slice& key, const Slice& value) {
if (options_->inplace_update_support
&& mem_->Update(sequence_, kTypeValue, key, value)) {
RecordTick(options_->statistics, NUMBER_KEYS_UPDATED);
RecordTick(options_->statistics.get(), NUMBER_KEYS_UPDATED);
} else {
mem_->Add(sequence_, kTypeValue, key, value);
}
@ -215,7 +214,7 @@ class MemTableInserter : public WriteBatch::Handler {
ropts.snapshot = &read_from_snapshot;
std::string value;
if (!db_->KeyMayExist(ropts, key, &value)) {
RecordTick(options_->statistics, NUMBER_FILTERED_DELETES);
RecordTick(options_->statistics.get(), NUMBER_FILTERED_DELETES);
return;
}
}

@ -80,7 +80,7 @@ Such problems can be avoided by using the <code>WriteBatch</code> class to
atomically apply a set of updates:
<p>
<pre>
#include "leveldb/write_batch.h"
#include "rocksdb/write_batch.h"
...
std::string value;
rocksdb::Status s = db-&gt;Get(rocksdb::ReadOptions(), key1, &amp;value);

@ -228,6 +228,12 @@ class DB {
// Number of files in level-0 that would stop writes.
virtual int Level0StopWriteTrigger() = 0;
// Get Env object from the DB
virtual Env* GetEnv() const = 0;
// Get DB Options that we use
virtual const Options& GetOptions() const = 0;
// Flush all mem-table data.
virtual Status Flush(const FlushOptions& options) = 0;

@ -276,27 +276,6 @@ class Statistics {
// Create a concrete DBStatistics object
std::shared_ptr<Statistics> CreateDBStatistics();
// Ease of Use functions
inline void RecordTick(std::shared_ptr<Statistics> statistics,
Tickers ticker,
uint64_t count = 1) {
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
if (statistics) {
statistics->recordTick(ticker, count);
}
}
inline void SetTickerCount(std::shared_ptr<Statistics> statistics,
Tickers ticker,
uint64_t count) {
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
if (statistics) {
statistics->setTickerCount(ticker, count);
}
}
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_

@ -10,152 +10,144 @@ namespace rocksdb {
// This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d
class StackableDB : public DB {
public:
explicit StackableDB(StackableDB* sdb) : sdb_(sdb) {}
// StackableDB is the owner of db now!
explicit StackableDB(DB* db) : db_(db) {}
// Returns the DB object that is the lowermost component in the stack of DBs
virtual DB* GetRawDB() {
return sdb_->GetRawDB();
~StackableDB() {
delete db_;
}
// convert a DB to StackableDB
// TODO: This function does not work yet. Passing nullptr to StackableDB in
// NewStackableDB's constructor will cause segfault on object's usage
static StackableDB* DBToStackableDB(DB* db) {
class NewStackableDB : public StackableDB {
public:
NewStackableDB(DB* db)
: StackableDB(nullptr),
db_(db) {}
DB* GetRawDB() {
return db_;
}
private:
DB* db_;
};
return new NewStackableDB(db);
virtual DB* GetBaseDB() {
return db_;
}
virtual Status Put(const WriteOptions& options,
const Slice& key,
const Slice& val) override {
return sdb_->Put(options, key, val);
return db_->Put(options, key, val);
}
virtual Status Get(const ReadOptions& options,
const Slice& key,
std::string* value) override {
return sdb_->Get(options, key, value);
return db_->Get(options, key, value);
}
virtual std::vector<Status> MultiGet(const ReadOptions& options,
const std::vector<Slice>& keys,
std::vector<std::string>* values)
override {
return sdb_->MultiGet(options, keys, values);
return db_->MultiGet(options, keys, values);
}
virtual bool KeyMayExist(const ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found = nullptr) override {
return sdb_->KeyMayExist(options, key, value, value_found);
return db_->KeyMayExist(options, key, value, value_found);
}
virtual Status Delete(const WriteOptions& wopts, const Slice& key) override {
return sdb_->Delete(wopts, key);
return db_->Delete(wopts, key);
}
virtual Status Merge(const WriteOptions& options,
const Slice& key,
const Slice& value) override {
return sdb_->Merge(options, key, value);
return db_->Merge(options, key, value);
}
virtual Status Write(const WriteOptions& opts, WriteBatch* updates)
override {
return sdb_->Write(opts, updates);
return db_->Write(opts, updates);
}
virtual Iterator* NewIterator(const ReadOptions& opts) override {
return sdb_->NewIterator(opts);
return db_->NewIterator(opts);
}
virtual const Snapshot* GetSnapshot() override {
return sdb_->GetSnapshot();
return db_->GetSnapshot();
}
virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
return sdb_->ReleaseSnapshot(snapshot);
return db_->ReleaseSnapshot(snapshot);
}
virtual bool GetProperty(const Slice& property, std::string* value)
override {
return sdb_->GetProperty(property, value);
return db_->GetProperty(property, value);
}
virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes)
override {
return sdb_->GetApproximateSizes(r, n, sizes);
return db_->GetApproximateSizes(r, n, sizes);
}
virtual void CompactRange(const Slice* begin, const Slice* end,
bool reduce_level = false,
int target_level = -1) override {
return sdb_->CompactRange(begin, end, reduce_level, target_level);
return db_->CompactRange(begin, end, reduce_level, target_level);
}
virtual int NumberLevels() override {
return sdb_->NumberLevels();
return db_->NumberLevels();
}
virtual int MaxMemCompactionLevel() override {
return sdb_->MaxMemCompactionLevel();
return db_->MaxMemCompactionLevel();
}
virtual int Level0StopWriteTrigger() override {
return sdb_->Level0StopWriteTrigger();
return db_->Level0StopWriteTrigger();
}
virtual Env* GetEnv() const override {
return db_->GetEnv();
}
virtual const Options& GetOptions() const override {
return db_->GetOptions();
}
virtual Status Flush(const FlushOptions& fopts) override {
return sdb_->Flush(fopts);
return db_->Flush(fopts);
}
virtual Status DisableFileDeletions() override {
return sdb_->DisableFileDeletions();
return db_->DisableFileDeletions();
}
virtual Status EnableFileDeletions() override {
return sdb_->EnableFileDeletions();
return db_->EnableFileDeletions();
}
virtual Status GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
bool flush_memtable = true) override {
return sdb_->GetLiveFiles(vec, mfs, flush_memtable);
return db_->GetLiveFiles(vec, mfs, flush_memtable);
}
virtual SequenceNumber GetLatestSequenceNumber() const override {
return sdb_->GetLatestSequenceNumber();
return db_->GetLatestSequenceNumber();
}
virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
return sdb_->GetSortedWalFiles(files);
return db_->GetSortedWalFiles(files);
}
virtual Status DeleteFile(std::string name) override {
return sdb_->DeleteFile(name);
return db_->DeleteFile(name);
}
virtual Status GetUpdatesSince(SequenceNumber seq_number,
unique_ptr<TransactionLogIterator>* iter)
override {
return sdb_->GetUpdatesSince(seq_number, iter);
return db_->GetUpdatesSince(seq_number, iter);
}
protected:
StackableDB* sdb_;
DB* db_;
};
} // namespace rocksdb

@ -272,7 +272,8 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
CompressionType type,
BlockHandle* handle) {
Rep* r = rep_;
StopWatch sw(r->options.env, r->options.statistics, WRITE_RAW_BLOCK_MICROS);
StopWatch sw(r->options.env, r->options.statistics.get(),
WRITE_RAW_BLOCK_MICROS);
handle->set_offset(r->offset);
handle->set_size(block_contents.size());
r->status = r->file->Append(block_contents);
@ -530,8 +531,8 @@ Status BlockBasedTableBuilder::Finish() {
Log(
r->options.info_log,
"Table was constructed:\n"
" basic properties: %s\n"
" user collected properties: %s",
" [basic properties]: %s\n"
" [user collected properties]: %s",
r->props.ToString().c_str(),
user_collected.c_str()
);

@ -29,7 +29,8 @@ Status BlockBasedTableFactory::GetTableReader(
TableBuilder* BlockBasedTableFactory::GetTableBuilder(
const Options& options, WritableFile* file,
CompressionType compression_type) const {
auto flush_block_policy_factory = flush_block_policy_factory_.get();
auto flush_block_policy_factory =
table_options_.flush_block_policy_factory.get();
// if flush block policy factory is not set, we'll create the default one
// from the options.
@ -54,7 +55,8 @@ TableBuilder* BlockBasedTableFactory::GetTableBuilder(
// options.
// We can safely delete flush_block_policy_factory since it will only be used
// during the construction of `BlockBasedTableBuilder`.
if (flush_block_policy_factory != flush_block_policy_factory_.get()) {
if (flush_block_policy_factory !=
table_options_.flush_block_policy_factory.get()) {
delete flush_block_policy_factory;
}

@ -31,14 +31,18 @@ class BlockBasedTableBuilder;
class BlockBasedTableFactory: public TableFactory {
public:
// @flush_block_policy_factory creates the instances of flush block policy.
// which provides a configurable way to determine when to flush a block in
// the block based tables. If not set, table builder will use the default
// block flush policy, which cut blocks by block size (please refer to
// `FlushBlockBySizePolicy`).
BlockBasedTableFactory(
FlushBlockPolicyFactory* flush_block_policy_factory = nullptr) :
flush_block_policy_factory_(flush_block_policy_factory) {
struct TableOptions {
// @flush_block_policy_factory creates the instances of flush block policy.
// which provides a configurable way to determine when to flush a block in
// the block based tables. If not set, table builder will use the default
// block flush policy, which cut blocks by block size (please refer to
// `FlushBlockBySizePolicy`).
std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
};
BlockBasedTableFactory() : BlockBasedTableFactory(TableOptions()) { }
BlockBasedTableFactory(const TableOptions& table_options):
table_options_(table_options) {
}
~BlockBasedTableFactory() {
@ -58,7 +62,8 @@ public:
override;
private:
std::unique_ptr<FlushBlockPolicyFactory> flush_block_policy_factory_;
TableOptions table_options_;
};
} // namespace rocksdb

@ -200,7 +200,7 @@ Cache::Handle* GetFromBlockCache(
const Slice& key,
Tickers block_cache_miss_ticker,
Tickers block_cache_hit_ticker,
std::shared_ptr<Statistics> statistics) {
Statistics* statistics) {
auto cache_handle = block_cache->Lookup(key);
if (cache_handle != nullptr) {
BumpPerfCount(&perf_context.block_cache_hit_count);
@ -515,7 +515,7 @@ Status BlockBasedTable::GetBlock(
CachableEntry<Block>* entry) {
bool no_io = options.read_tier == kBlockCacheTier;
Cache* block_cache = table->rep_->options.block_cache.get();
auto statistics = table->rep_->options.statistics;
Statistics* statistics = table->rep_->options.statistics.get();
Status s;
if (block_cache != nullptr) {
@ -532,7 +532,7 @@ Status BlockBasedTable::GetBlock(
key,
block_cache_miss_ticker,
block_cache_hit_ticker,
table->rep_->options.statistics
statistics
);
if (entry->cache_handle != nullptr) {
@ -593,7 +593,7 @@ Iterator* BlockBasedTable::BlockReader(void* arg,
Cache* block_cache = table->rep_->options.block_cache.get();
Cache* block_cache_compressed = table->rep_->options.
block_cache_compressed.get();
std::shared_ptr<Statistics> statistics = table->rep_->options.statistics;
Statistics* statistics = table->rep_->options.statistics.get();
Block* block = nullptr;
Block* cblock = nullptr;
Cache::Handle* cache_handle = nullptr;
@ -791,12 +791,13 @@ BlockBasedTable::GetFilter(bool no_io) const {
cache_key
);
Statistics* statistics = rep_->options.statistics.get();
auto cache_handle = GetFromBlockCache(
block_cache,
key,
BLOCK_CACHE_FILTER_MISS,
BLOCK_CACHE_FILTER_HIT,
rep_->options.statistics
statistics
);
FilterBlockReader* filter = nullptr;
@ -824,7 +825,7 @@ BlockBasedTable::GetFilter(bool no_io) const {
cache_handle = block_cache->Insert(
key, filter, filter_size, &DeleteCachedFilter);
RecordTick(rep_->options.statistics, BLOCK_CACHE_ADD);
RecordTick(statistics, BLOCK_CACHE_ADD);
}
}
}
@ -945,9 +946,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
filter_entry.Release(rep_->options.block_cache.get());
}
RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_CHECKED);
Statistics* statistics = rep_->options.statistics.get();
RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
if (!may_match) {
RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_USEFUL);
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
}
return may_match;
@ -997,7 +999,7 @@ Status BlockBasedTable::Get(
// Not found
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
RecordTick(rep_->options.statistics.get(), BLOOM_FILTER_USEFUL);
break;
} else {
bool didIO = false;

@ -1402,6 +1402,15 @@ class PosixEnv : public Env {
fprintf(stdout,
"Created bg thread 0x%lx\n",
(unsigned long)t);
// Set the thread name to aid debugging
#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) && (__GLIBC_PREREQ(2, 12))
char name_buf[16];
snprintf(name_buf, sizeof name_buf, "rocksdb:bg%zu", bgthreads_.size());
name_buf[sizeof name_buf - 1] = '\0';
pthread_setname_np(t, name_buf);
#endif
bgthreads_.push_back(t);
}

@ -0,0 +1,32 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#pragma once
#include "rocksdb/statistics.h"
namespace rocksdb {
// Utility functions
inline void RecordTick(Statistics* statistics,
Tickers ticker,
uint64_t count = 1) {
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
if (statistics) {
statistics->recordTick(ticker, count);
}
}
inline void SetTickerCount(Statistics* statistics,
Tickers ticker,
uint64_t count) {
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
if (statistics) {
statistics->setTickerCount(ticker, count);
}
}
}

@ -5,16 +5,16 @@
//
#pragma once
#include "rocksdb/env.h"
#include "rocksdb/statistics.h"
#include "util/statistics_imp.h"
namespace rocksdb {
// Auto-scoped.
// Records the statistic into the corresponding histogram.
class StopWatch {
public:
StopWatch(
explicit StopWatch(
Env * const env,
std::shared_ptr<Statistics> statistics = nullptr,
Statistics* statistics = nullptr,
const Histograms histogram_name = DB_GET) :
env_(env),
start_time_(env->NowMicros()),
@ -36,7 +36,7 @@ class StopWatch {
private:
Env* const env_;
const uint64_t start_time_;
std::shared_ptr<Statistics> statistics_;
Statistics* statistics_;
const Histograms histogram_name_;
};
@ -44,7 +44,7 @@ class StopWatch {
// a nano second precision stopwatch
class StopWatchNano {
public:
StopWatchNano(Env* const env, bool auto_start = false)
explicit StopWatchNano(Env* const env, bool auto_start = false)
: env_(env), start_(0) {
if (auto_start) {
Start();

@ -254,6 +254,14 @@ int DBWithTTL::Level0StopWriteTrigger() {
return db_->Level0StopWriteTrigger();
}
Env* DBWithTTL::GetEnv() const {
return db_->GetEnv();
}
const Options& DBWithTTL::GetOptions() const {
return db_->GetOptions();
}
Status DBWithTTL::Flush(const FlushOptions& fopts) {
return db_->Flush(fopts);
}

@ -67,6 +67,10 @@ class DBWithTTL : public StackableDB {
virtual int Level0StopWriteTrigger();
virtual Env* GetEnv() const;
virtual const Options& GetOptions() const;
virtual Status Flush(const FlushOptions& fopts);
virtual Status DisableFileDeletions();
@ -88,7 +92,7 @@ class DBWithTTL : public StackableDB {
// Simulate a db crash, no elegant closing of database.
void TEST_Destroy_DBWithTtl();
virtual DB* GetRawDB() {
virtual DB* GetBaseDB() {
return db_;
}

Loading…
Cancel
Save