Merge branch 'master' of github.com:facebook/rocksdb

main
Gabriela Jacques da Silva 9 years ago
commit 94be872eab
  1. 2
      .travis.yml
  2. 2
      Makefile
  3. 12
      db/auto_roll_logger.cc
  4. 79
      db/column_family.cc
  5. 145
      db/column_family_test.cc
  6. 117
      db/db_compaction_test.cc
  7. 39
      db/db_impl.cc
  8. 4
      db/db_impl.h
  9. 55
      db/db_test.cc
  10. 8
      db/db_test_util.cc
  11. 7
      db/db_test_util.h
  12. 15
      db/db_universal_compaction_test.cc
  13. 13
      db/perf_context_test.cc
  14. 30
      db/version_set.cc
  15. 2
      db/version_set.h
  16. 114
      db/write_batch_test.cc
  17. 12
      db/write_controller.cc
  18. 18
      db/write_controller.h
  19. 14
      include/rocksdb/options.h
  20. 2
      include/rocksdb/perf_context.h
  21. 7
      include/rocksdb/statistics.h
  22. 170
      port/win/env_win.cc
  23. 5
      util/options.cc
  24. 5
      util/options_helper.h
  25. 45
      util/options_test.cc
  26. 64
      util/perf_context.cc

@ -34,7 +34,7 @@ before_script:
# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. # as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
script: script:
- if [[ "${TRAVIS_OS_NAME}" == 'linux' ]]; then OPT=-DTRAVIS CLANG_FORMAT_DIFF=/tmp/clang-format-diff.py make format || true; fi - if [[ "${TRAVIS_OS_NAME}" == 'linux' ]]; then OPT=-DTRAVIS CLANG_FORMAT_DIFF=/tmp/clang-format-diff.py make format || true; fi
- OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 check - OPT=-DTRAVIS V=1 make -j4 check && OPT=-DTRAVIS V=1 make clean jclean rocksdbjava jtest && make clean && OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 static_lib
notifications: notifications:
email: email:

@ -700,7 +700,7 @@ clean:
tags: tags:
ctags * -R ctags * -R
cscope -b `find . -name '*.cc'` `find . -name '*.h'` cscope -b `find . -name '*.cc'` `find . -name '*.h'` `find . -name '*.c'`
format: format:
build_tools/format-diff.sh build_tools/format-diff.sh

@ -32,8 +32,16 @@ Status AutoRollLogger::ResetLogger() {
} }
void AutoRollLogger::RollLogFile() { void AutoRollLogger::RollLogFile() {
std::string old_fname = OldInfoLogFileName( // This function is called when log is rotating. Two rotations
dbname_, env_->NowMicros(), db_absolute_path_, db_log_dir_); // can happen quickly (NowMicro returns same value). To not overwrite
// previous log file we increment by one micro second and try again.
uint64_t now = env_->NowMicros();
std::string old_fname;
do {
old_fname = OldInfoLogFileName(
dbname_, now, db_absolute_path_, db_log_dir_);
now++;
} while (env_->FileExists(old_fname).ok());
env_->RenameFile(log_fname_, old_fname); env_->RenameFile(log_fname_, old_fname);
} }

@ -239,6 +239,17 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options,
result.level0_slowdown_writes_trigger, result.level0_slowdown_writes_trigger,
result.level0_file_num_compaction_trigger); result.level0_file_num_compaction_trigger);
} }
if (result.soft_pending_compaction_bytes_limit == 0) {
result.soft_pending_compaction_bytes_limit =
result.hard_pending_compaction_bytes_limit;
} else if (result.hard_pending_compaction_bytes_limit > 0 &&
result.soft_pending_compaction_bytes_limit >
result.hard_pending_compaction_bytes_limit) {
result.soft_pending_compaction_bytes_limit =
result.hard_pending_compaction_bytes_limit;
}
if (result.level_compaction_dynamic_level_bytes) { if (result.level_compaction_dynamic_level_bytes) {
if (result.compaction_style != kCompactionStyleLevel || if (result.compaction_style != kCompactionStyleLevel ||
db_options.db_paths.size() > 1U) { db_options.db_paths.size() > 1U) {
@ -513,6 +524,21 @@ std::unique_ptr<WriteControllerToken> SetupDelay(
} }
return write_controller->GetDelayToken(write_rate); return write_controller->GetDelayToken(write_rate);
} }
int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
int level0_slowdown_writes_trigger) {
// SanitizeOptions() ensures it.
assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger);
// 1/4 of the way between L0 compaction trigger threshold and slowdown
// condition.
// Or twice as compaction trigger, if it is smaller.
return std::min(level0_file_num_compaction_trigger * 2,
level0_file_num_compaction_trigger +
(level0_slowdown_writes_trigger -
level0_file_num_compaction_trigger) /
4);
}
} // namespace } // namespace
void ColumnFamilyData::RecalculateWriteStallConditions( void ColumnFamilyData::RecalculateWriteStallConditions(
@ -531,21 +557,6 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"(waiting for flush), max_write_buffer_number is set to %d", "(waiting for flush), max_write_buffer_number is set to %d",
name_.c_str(), imm()->NumNotFlushed(), name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number); mutable_cf_options.max_write_buffer_number);
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
imm()->NumNotFlushed() >=
mutable_cf_options.max_write_buffer_number - 1) {
write_controller_token_ =
SetupDelay(ioptions_.delayed_write_rate, write_controller,
compaction_needed_bytes, prev_compaction_needed_bytes_,
mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we have %d immutable memtables "
"(waiting for flush), max_write_buffer_number is set to %d "
"rate %" PRIu64,
name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number,
write_controller->delayed_write_rate());
} else if (vstorage->l0_delay_trigger_count() >= } else if (vstorage->l0_delay_trigger_count() >=
mutable_cf_options.level0_stop_writes_trigger) { mutable_cf_options.level0_stop_writes_trigger) {
write_controller_token_ = write_controller->GetStopToken(); write_controller_token_ = write_controller->GetStopToken();
@ -567,6 +578,21 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"[%s] Stopping writes because of estimated pending compaction " "[%s] Stopping writes because of estimated pending compaction "
"bytes %" PRIu64, "bytes %" PRIu64,
name_.c_str(), compaction_needed_bytes); name_.c_str(), compaction_needed_bytes);
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
imm()->NumNotFlushed() >=
mutable_cf_options.max_write_buffer_number - 1) {
write_controller_token_ =
SetupDelay(ioptions_.delayed_write_rate, write_controller,
compaction_needed_bytes, prev_compaction_needed_bytes_,
mutable_cf_options.disable_auto_compactions);
internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1);
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Stalling writes because we have %d immutable memtables "
"(waiting for flush), max_write_buffer_number is set to %d "
"rate %" PRIu64,
name_.c_str(), imm()->NumNotFlushed(),
mutable_cf_options.max_write_buffer_number,
write_controller->delayed_write_rate());
} else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 && } else if (mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
vstorage->l0_delay_trigger_count() >= vstorage->l0_delay_trigger_count() >=
mutable_cf_options.level0_slowdown_writes_trigger) { mutable_cf_options.level0_slowdown_writes_trigger) {
@ -598,6 +624,29 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
"bytes %" PRIu64 " rate %" PRIu64, "bytes %" PRIu64 " rate %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes(), name_.c_str(), vstorage->estimated_compaction_needed_bytes(),
write_controller->delayed_write_rate()); write_controller->delayed_write_rate());
} else if (vstorage->l0_delay_trigger_count() >=
GetL0ThresholdSpeedupCompaction(
mutable_cf_options.level0_file_num_compaction_trigger,
mutable_cf_options.level0_slowdown_writes_trigger)) {
write_controller_token_ = write_controller->GetCompactionPressureToken();
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Increasing compaction threads because we have %d level-0 "
"files ",
name_.c_str(), vstorage->l0_delay_trigger_count());
} else if (vstorage->estimated_compaction_needed_bytes() >=
mutable_cf_options.soft_pending_compaction_bytes_limit / 4) {
// Increase compaction threads if bytes needed for compaction exceeds
// 1/4 of threshold for slowing down.
// If soft pending compaction byte limit is not set, always speed up
// compaction.
write_controller_token_ = write_controller->GetCompactionPressureToken();
if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) {
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
"[%s] Increasing compaction threads because of estimated pending "
"compaction "
"bytes %" PRIu64,
name_.c_str(), vstorage->estimated_compaction_needed_bytes());
}
} else { } else {
write_controller_token_.reset(); write_controller_token_.reset();
} }

@ -65,6 +65,7 @@ class ColumnFamilyTest : public testing::Test {
~ColumnFamilyTest() { ~ColumnFamilyTest() {
Close(); Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
Destroy(); Destroy();
delete env_; delete env_;
} }
@ -2047,7 +2048,6 @@ TEST_F(ColumnFamilyTest, FlushAndDropRaceCondition) {
Close(); Close();
Destroy(); Destroy();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
} }
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
@ -2125,7 +2125,6 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) {
drop_cf_thread.join(); drop_cf_thread.join();
Close(); Close();
Destroy(); Destroy();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
for (auto* comparator : comparators) { for (auto* comparator : comparators) {
if (comparator) { if (comparator) {
delete comparator; delete comparator;
@ -2137,6 +2136,9 @@ TEST_F(ColumnFamilyTest, CreateAndDropRace) {
TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) { TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
const uint64_t kBaseRate = 810000u; const uint64_t kBaseRate = 810000u;
db_options_.delayed_write_rate = kBaseRate; db_options_.delayed_write_rate = kBaseRate;
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
Open({"default"}); Open({"default"});
ColumnFamilyData* cfd = ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd(); static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
@ -2162,6 +2164,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(!dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().IsStopped());
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(400); vstorage->TEST_set_estimated_compaction_needed_bytes(400);
cfd->RecalculateWriteStallConditions(mutable_cf_options); cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2169,6 +2172,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate / 1.2, ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate()); dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(500); vstorage->TEST_set_estimated_compaction_needed_bytes(500);
cfd->RecalculateWriteStallConditions(mutable_cf_options); cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2224,6 +2228,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
cfd->RecalculateWriteStallConditions(mutable_cf_options); cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped());
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(3001); vstorage->TEST_set_estimated_compaction_needed_bytes(3001);
cfd->RecalculateWriteStallConditions(mutable_cf_options); cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2248,6 +2253,7 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
ASSERT_EQ(kBaseRate / 1.2, ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate()); dbfull()->TEST_write_controler().delayed_write_rate());
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(101); vstorage->set_l0_delay_trigger_count(101);
cfd->RecalculateWriteStallConditions(mutable_cf_options); cfd->RecalculateWriteStallConditions(mutable_cf_options);
@ -2320,6 +2326,73 @@ TEST_F(ColumnFamilyTest, WriteStallSingleColumnFamily) {
dbfull()->TEST_write_controler().delayed_write_rate()); dbfull()->TEST_write_controler().delayed_write_rate());
} }
TEST_F(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) {
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
Open({"default"});
ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
VersionStorageInfo* vstorage = cfd->current()->storage_info();
MutableCFOptions mutable_cf_options(
Options(db_options_, column_family_options_),
ImmutableCFOptions(Options(db_options_, column_family_options_)));
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 36;
mutable_cf_options.level0_stop_writes_trigger = 50;
// Speedup threshold = 200 / 4 = 50
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
vstorage->TEST_set_estimated_compaction_needed_bytes(40);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(50);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(300);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(45);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(7);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(9);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(6);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
// Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 16;
mutable_cf_options.level0_stop_writes_trigger = 30;
vstorage->set_l0_delay_trigger_count(5);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(7);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(3);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
}
TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) { TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) {
const uint64_t kBaseRate = 810000u; const uint64_t kBaseRate = 810000u;
db_options_.delayed_write_rate = kBaseRate; db_options_.delayed_write_rate = kBaseRate;
@ -2401,6 +2474,74 @@ TEST_F(ColumnFamilyTest, WriteStallTwoColumnFamilies) {
ASSERT_EQ(kBaseRate / 1.2, ASSERT_EQ(kBaseRate / 1.2,
dbfull()->TEST_write_controler().delayed_write_rate()); dbfull()->TEST_write_controler().delayed_write_rate());
} }
TEST_F(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) {
db_options_.base_background_compactions = 2;
db_options_.max_background_compactions = 6;
column_family_options_.soft_pending_compaction_bytes_limit = 200;
column_family_options_.hard_pending_compaction_bytes_limit = 2000;
Open();
CreateColumnFamilies({"one"});
ColumnFamilyData* cfd =
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
VersionStorageInfo* vstorage = cfd->current()->storage_info();
ColumnFamilyData* cfd1 =
static_cast<ColumnFamilyHandleImpl*>(handles_[1])->cfd();
VersionStorageInfo* vstorage1 = cfd1->current()->storage_info();
MutableCFOptions mutable_cf_options(
Options(db_options_, column_family_options_),
ImmutableCFOptions(Options(db_options_, column_family_options_)));
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
mutable_cf_options.level0_file_num_compaction_trigger = 4;
mutable_cf_options.level0_slowdown_writes_trigger = 36;
mutable_cf_options.level0_stop_writes_trigger = 30;
// Speedup threshold = 200 / 4 = 50
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
MutableCFOptions mutable_cf_options1 = mutable_cf_options;
mutable_cf_options1.level0_slowdown_writes_trigger = 16;
vstorage->TEST_set_estimated_compaction_needed_bytes(40);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(60);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(30);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(70);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->TEST_set_estimated_compaction_needed_bytes(20);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->TEST_set_estimated_compaction_needed_bytes(3);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(9);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage1->set_l0_delay_trigger_count(2);
cfd1->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(6, dbfull()->BGCompactionsAllowed());
vstorage->set_l0_delay_trigger_count(0);
cfd->RecalculateWriteStallConditions(mutable_cf_options);
ASSERT_EQ(2, dbfull()->BGCompactionsAllowed());
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -533,6 +533,104 @@ TEST_P(DBCompactionTestWithParam, CompactionTrigger) {
ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1);
} }
TEST_F(DBCompactionTest, BGCompactionsAllowed) {
// Create several column families. Make compaction triggers in all of them
// and see number of compactions scheduled to be less than allowed.
const int kNumKeysPerFile = 100;
Options options;
options.write_buffer_size = 110 << 10; // 110KB
options.arena_block_size = 4 << 10;
options.num_levels = 3;
// Should speed up compaction when there are 4 files.
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 20;
options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large
options.base_background_compactions = 1;
options.max_background_compactions = 3;
options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile));
options = CurrentOptions(options);
// Block all threads in thread pool.
const size_t kTotalTasks = 4;
env_->SetBackgroundThreads(4, Env::LOW);
test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
for (size_t i = 0; i < kTotalTasks; i++) {
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_tasks[i], Env::Priority::LOW);
sleeping_tasks[i].WaitUntilSleeping();
}
CreateAndReopenWithCF({"one", "two", "three"}, options);
Random rnd(301);
for (int cf = 0; cf < 4; cf++) {
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(cf, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(cf, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
}
}
// Now all column families qualify compaction but only one should be
// scheduled, because no column family hits speed up condition.
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
// Create two more files for one column family, which triggers speed up
// condition, three compactions will be scheduled.
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(2, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(2, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1,
NumTableFilesAtLevel(0, 2));
}
ASSERT_EQ(3, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
// Unblock all threads to unblock all compactions.
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].WakeUp();
sleeping_tasks[i].WaitUntilDone();
}
dbfull()->TEST_WaitForCompact();
// Verify number of compactions allowed will come back to 1.
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].Reset();
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
&sleeping_tasks[i], Env::Priority::LOW);
sleeping_tasks[i].WaitUntilSleeping();
}
for (int cf = 0; cf < 4; cf++) {
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(cf, Key(i), ""));
}
// put extra key to trigger flush
ASSERT_OK(Put(cf, "", ""));
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
}
}
// Now all column families qualify compaction but only one should be
// scheduled, because no column family hits speed up condition.
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
for (size_t i = 0; i < kTotalTasks; i++) {
sleeping_tasks[i].WakeUp();
sleeping_tasks[i].WaitUntilDone();
}
}
TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) { TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) {
Options options; Options options;
options.write_buffer_size = 100000000; // Large write buffer options.write_buffer_size = 100000000; // Large write buffer
@ -2198,6 +2296,25 @@ TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) {
Destroy(options); Destroy(options);
} }
TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) {
Options options = CurrentOptions();
options.max_background_compactions = 5;
options.soft_pending_compaction_bytes_limit = 0;
options.hard_pending_compaction_bytes_limit = 100;
options.create_if_missing = true;
DestroyAndReopen(options);
ASSERT_EQ(5, db_->GetOptions().base_background_compactions);
ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit);
options.base_background_compactions = 4;
options.max_background_compactions = 3;
options.soft_pending_compaction_bytes_limit = 200;
options.hard_pending_compaction_bytes_limit = 150;
DestroyAndReopen(options);
ASSERT_EQ(3, db_->GetOptions().base_background_compactions);
ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit);
}
// This tests for a bug that could cause two level0 compactions running // This tests for a bug that could cause two level0 compactions running
// concurrently // concurrently
// TODO(aekmekji): Make sure that the reason this fails when run with // TODO(aekmekji): Make sure that the reason this fails when run with

@ -146,6 +146,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
result.info_log = nullptr; result.info_log = nullptr;
} }
} }
if (result.base_background_compactions == -1) {
result.base_background_compactions = result.max_background_compactions;
}
if (result.base_background_compactions > result.max_background_compactions) {
result.base_background_compactions = result.max_background_compactions;
}
result.env->IncBackgroundThreadsIfNeeded(src.max_background_compactions, result.env->IncBackgroundThreadsIfNeeded(src.max_background_compactions,
Env::Priority::LOW); Env::Priority::LOW);
result.env->IncBackgroundThreadsIfNeeded(src.max_background_flushes, result.env->IncBackgroundThreadsIfNeeded(src.max_background_flushes,
@ -2448,12 +2454,14 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH, this); env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH, this);
} }
auto bg_compactions_allowed = BGCompactionsAllowed();
// special case -- if max_background_flushes == 0, then schedule flush on a // special case -- if max_background_flushes == 0, then schedule flush on a
// compaction thread // compaction thread
if (db_options_.max_background_flushes == 0) { if (db_options_.max_background_flushes == 0) {
while (unscheduled_flushes_ > 0 && while (unscheduled_flushes_ > 0 &&
bg_flush_scheduled_ + bg_compaction_scheduled_ < bg_flush_scheduled_ + bg_compaction_scheduled_ <
db_options_.max_background_compactions) { bg_compactions_allowed) {
unscheduled_flushes_--; unscheduled_flushes_--;
bg_flush_scheduled_++; bg_flush_scheduled_++;
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::LOW, this); env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::LOW, this);
@ -2466,7 +2474,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
return; return;
} }
while (bg_compaction_scheduled_ < db_options_.max_background_compactions && while (bg_compaction_scheduled_ < bg_compactions_allowed &&
unscheduled_compactions_ > 0) { unscheduled_compactions_ > 0) {
CompactionArg* ca = new CompactionArg; CompactionArg* ca = new CompactionArg;
ca->db = this; ca->db = this;
@ -2478,6 +2486,14 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
} }
} }
int DBImpl::BGCompactionsAllowed() const {
if (write_controller_.NeedSpeedupCompaction()) {
return db_options_.max_background_compactions;
} else {
return db_options_.base_background_compactions;
}
}
void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) { void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) {
assert(!cfd->pending_compaction()); assert(!cfd->pending_compaction());
cfd->Ref(); cfd->Ref();
@ -2590,10 +2606,10 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
LogToBuffer( LogToBuffer(
log_buffer, log_buffer,
"Calling FlushMemTableToOutputFile with column " "Calling FlushMemTableToOutputFile with column "
"family [%s], flush slots available %d, compaction slots available %d", "family [%s], flush slots available %d, compaction slots allowed %d, "
cfd->GetName().c_str(), "compaction slots scheduled %d",
db_options_.max_background_flushes - bg_flush_scheduled_, cfd->GetName().c_str(), db_options_.max_background_flushes,
db_options_.max_background_compactions - bg_compaction_scheduled_); bg_flush_scheduled_, BGCompactionsAllowed() - bg_compaction_scheduled_);
status = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress, status = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress,
job_context, log_buffer); job_context, log_buffer);
if (cfd->Unref()) { if (cfd->Unref()) {
@ -3311,6 +3327,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
RecordTick(stats_, NUMBER_KEYS_READ); RecordTick(stats_, NUMBER_KEYS_READ);
RecordTick(stats_, BYTES_READ, value->size()); RecordTick(stats_, BYTES_READ, value->size());
MeasureTime(stats_, BYTES_PER_READ, value->size());
} }
return s; return s;
} }
@ -3421,6 +3438,7 @@ std::vector<Status> DBImpl::MultiGet(
RecordTick(stats_, NUMBER_MULTIGET_CALLS); RecordTick(stats_, NUMBER_MULTIGET_CALLS);
RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read);
PERF_TIMER_STOP(get_post_process_time); PERF_TIMER_STOP(get_post_process_time);
return stat_list; return stat_list;
@ -4119,7 +4137,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
if (write_thread_.CompleteParallelWorker(&w)) { if (write_thread_.CompleteParallelWorker(&w)) {
// we're responsible for early exit // we're responsible for early exit
auto last_sequence = w.parallel_group->last_writer->sequence; auto last_sequence =
w.parallel_group->last_writer->sequence +
WriteBatchInternal::Count(w.parallel_group->last_writer->batch) - 1;
SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence); SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence);
versions_->SetLastSequence(last_sequence); versions_->SetLastSequence(last_sequence);
write_thread_.EarlyExitParallelGroup(&w); write_thread_.EarlyExitParallelGroup(&w);
@ -4305,6 +4325,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
// Record statistics // Record statistics
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
RecordTick(stats_, BYTES_WRITTEN, total_byte_size); RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_STOP(write_pre_and_post_process_time);
if (write_options.disableWAL) { if (write_options.disableWAL) {
@ -4418,7 +4439,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
this, true /*dont_filter_deletes*/, this, true /*dont_filter_deletes*/,
true /*concurrent_memtable_writes*/); true /*concurrent_memtable_writes*/);
assert(last_writer->sequence == last_sequence); assert(last_writer->sequence +
WriteBatchInternal::Count(last_writer->batch) - 1 ==
last_sequence);
// CompleteParallelWorker returns true if this thread should // CompleteParallelWorker returns true if this thread should
// handle exit, false means somebody else did // handle exit, false means somebody else did
exit_completed_early = !write_thread_.CompleteParallelWorker(&w); exit_completed_early = !write_thread_.CompleteParallelWorker(&w);

@ -347,6 +347,10 @@ class DBImpl : public DB {
#endif // NDEBUG #endif // NDEBUG
// Return maximum background compaction alowed to be scheduled based on
// compaction status.
int BGCompactionsAllowed() const;
// Returns the list of live files in 'live' and the list // Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'candidate_files'. // of all files in the filesystem in 'candidate_files'.
// If force == false and the last call was less than // If force == false and the last call was less than

@ -606,6 +606,61 @@ TEST_F(DBTest, EmptyFlush) {
kSkipUniversalCompaction | kSkipMergePut)); kSkipUniversalCompaction | kSkipMergePut));
} }
// Disable because not all platform can run it.
// It requires more than 9GB memory to run it, With single allocation
// of more than 3GB.
TEST_F(DBTest, DISABLED_VeryLargeValue) {
const size_t kValueSize = 3221225472u; // 3GB value
const size_t kKeySize = 8388608u; // 8MB key
std::string raw(kValueSize, 'v');
std::string key1(kKeySize, 'c');
std::string key2(kKeySize, 'd');
Options options;
options.env = env_;
options.write_buffer_size = 100000; // Small write buffer
options.paranoid_checks = true;
options = CurrentOptions(options);
DestroyAndReopen(options);
ASSERT_OK(Put("boo", "v1"));
ASSERT_OK(Put("foo", "v1"));
ASSERT_OK(Put(key1, raw));
raw[0] = 'w';
ASSERT_OK(Put(key2, raw));
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(1, NumTableFilesAtLevel(0));
std::string value;
Status s = db_->Get(ReadOptions(), key1, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('v', value[0]);
s = db_->Get(ReadOptions(), key2, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('w', value[0]);
// Compact all files.
Flush();
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
// Check DB is not in read-only state.
ASSERT_OK(Put("boo", "v1"));
s = db_->Get(ReadOptions(), key1, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('v', value[0]);
s = db_->Get(ReadOptions(), key2, &value);
ASSERT_OK(s);
ASSERT_EQ(kValueSize, value.size());
ASSERT_EQ('w', value[0]);
}
TEST_F(DBTest, GetFromImmutableLayer) { TEST_F(DBTest, GetFromImmutableLayer) {
do { do {
Options options; Options options;

@ -85,7 +85,8 @@ bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) {
option_config == kHashCuckoo || option_config == kUniversalCompaction || option_config == kHashCuckoo || option_config == kUniversalCompaction ||
option_config == kUniversalCompactionMultiLevel || option_config == kUniversalCompactionMultiLevel ||
option_config == kUniversalSubcompactions || option_config == kUniversalSubcompactions ||
option_config == kFIFOCompaction) { option_config == kFIFOCompaction ||
option_config == kConcurrentSkipList) {
return true; return true;
} }
#endif #endif
@ -361,6 +362,11 @@ Options DBTestBase::CurrentOptions(
options.max_subcompactions = 4; options.max_subcompactions = 4;
break; break;
} }
case kConcurrentSkipList: {
options.allow_concurrent_memtable_write = true;
options.enable_write_thread_adaptive_yield = true;
break;
}
default: default:
break; break;

@ -525,9 +525,10 @@ class DBTestBase : public testing::Test {
kOptimizeFiltersForHits = 27, kOptimizeFiltersForHits = 27,
kRowCache = 28, kRowCache = 28,
kRecycleLogFiles = 29, kRecycleLogFiles = 29,
kLevelSubcompactions = 30, kEnd = 30,
kUniversalSubcompactions = 31, kConcurrentSkipList = 30,
kEnd = 30 kLevelSubcompactions = 31,
kUniversalSubcompactions = 32,
}; };
int option_config_; int option_config_;

@ -187,14 +187,16 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
Env::Priority::LOW); Env::Priority::LOW);
Put("", "");
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
Put(Key(num * 10), "val"); Put(Key(num * 10), "val");
if (num) {
dbfull()->TEST_WaitForFlushMemTable();
}
Put(Key(30 + num * 10), "val"); Put(Key(30 + num * 10), "val");
Put(Key(60 + num * 10), "val"); Put(Key(60 + num * 10), "val");
dbfull()->TEST_WaitForFlushMemTable();
} }
Put("", "");
dbfull()->TEST_WaitForFlushMemTable();
// Query set of non existing keys // Query set of non existing keys
for (int i = 5; i < 90; i += 10) { for (int i = 5; i < 90; i += 10) {
@ -205,6 +207,13 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Make sure bloom filter is used for all but the last L0 file when looking
// up a non-existent key that's in the range of all L0 files.
ASSERT_EQ(Get(Key(35)), "NOT_FOUND");
ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1,
TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL);
// Unblock compaction and wait it for happening. // Unblock compaction and wait it for happening.
sleeping_task_low.WakeUp(); sleeping_task_low.WakeUp();
dbfull()->TEST_WaitForCompact(); dbfull()->TEST_WaitForCompact();

@ -589,6 +589,19 @@ TEST_F(PerfContextTest, FalseDBMutexWait) {
} }
} }
} }
TEST_F(PerfContextTest, ToString) {
perf_context.Reset();
perf_context.block_read_count = 12345;
std::string zero_included = perf_context.ToString();
ASSERT_NE(std::string::npos, zero_included.find("= 0"));
ASSERT_NE(std::string::npos, zero_included.find("= 12345"));
std::string zero_excluded = perf_context.ToString(true);
ASSERT_EQ(std::string::npos, zero_excluded.find("= 0"));
ASSERT_NE(std::string::npos, zero_excluded.find("= 12345"));
}
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -84,15 +84,11 @@ int FindFileInRange(const InternalKeyComparator& icmp,
// are MergeInProgress). // are MergeInProgress).
class FilePicker { class FilePicker {
public: public:
FilePicker( FilePicker(std::vector<FileMetaData*>* files, const Slice& user_key,
std::vector<FileMetaData*>* files, const Slice& ikey, autovector<LevelFilesBrief>* file_levels,
const Slice& user_key, unsigned int num_levels, FileIndexer* file_indexer,
const Slice& ikey, const Comparator* user_comparator,
autovector<LevelFilesBrief>* file_levels, const InternalKeyComparator* internal_comparator)
unsigned int num_levels,
FileIndexer* file_indexer,
const Comparator* user_comparator,
const InternalKeyComparator* internal_comparator)
: num_levels_(num_levels), : num_levels_(num_levels),
curr_level_(-1), curr_level_(-1),
hit_file_level_(-1), hit_file_level_(-1),
@ -102,6 +98,7 @@ class FilePicker {
files_(files), files_(files),
#endif #endif
level_files_brief_(file_levels), level_files_brief_(file_levels),
is_hit_file_last_in_level_(false),
user_key_(user_key), user_key_(user_key),
ikey_(ikey), ikey_(ikey),
file_indexer_(file_indexer), file_indexer_(file_indexer),
@ -126,6 +123,8 @@ class FilePicker {
// Loops over all files in current level. // Loops over all files in current level.
FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
hit_file_level_ = curr_level_; hit_file_level_ = curr_level_;
is_hit_file_last_in_level_ =
curr_index_in_curr_level_ == curr_file_level_->num_files - 1;
int cmp_largest = -1; int cmp_largest = -1;
// Do key range filtering of files or/and fractional cascading if: // Do key range filtering of files or/and fractional cascading if:
@ -209,6 +208,10 @@ class FilePicker {
// for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
unsigned int GetHitFileLevel() { return hit_file_level_; } unsigned int GetHitFileLevel() { return hit_file_level_; }
// Returns true if the most recent "hit file" (i.e., one returned by
// GetNextFile()) is at the last index in its level.
bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
private: private:
unsigned int num_levels_; unsigned int num_levels_;
unsigned int curr_level_; unsigned int curr_level_;
@ -220,6 +223,7 @@ class FilePicker {
#endif #endif
autovector<LevelFilesBrief>* level_files_brief_; autovector<LevelFilesBrief>* level_files_brief_;
bool search_ended_; bool search_ended_;
bool is_hit_file_last_in_level_;
LevelFilesBrief* curr_file_level_; LevelFilesBrief* curr_file_level_;
unsigned int curr_index_in_curr_level_; unsigned int curr_index_in_curr_level_;
unsigned int start_index_in_curr_level_; unsigned int start_index_in_curr_level_;
@ -903,7 +907,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
*status = table_cache_->Get( *status = table_cache_->Get(
read_options, *internal_comparator(), f->fd, ikey, &get_context, read_options, *internal_comparator(), f->fd, ikey, &get_context,
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()))); IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
fp.IsHitFileLastInLevel()));
// TODO: examine the behavior for corrupted key // TODO: examine the behavior for corrupted key
if (!status->ok()) { if (!status->ok()) {
return; return;
@ -960,10 +965,11 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
} }
} }
bool Version::IsFilterSkipped(int level) { bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) {
// Reaching the bottom level implies misses at all upper levels, so we'll // Reaching the bottom level implies misses at all upper levels, so we'll
// skip checking the filters when we predict a hit. // skip checking the filters when we predict a hit.
return cfd_->ioptions()->optimize_filters_for_hits && level > 0 && return cfd_->ioptions()->optimize_filters_for_hits &&
(level > 0 || is_file_last_in_level) &&
level == storage_info_.num_non_empty_levels() - 1; level == storage_info_.num_non_empty_levels() - 1;
} }

@ -530,7 +530,7 @@ class Version {
// checked during read operations. In certain cases (trivial move or preload), // checked during read operations. In certain cases (trivial move or preload),
// the filter block may already be cached, but we still do not access it such // the filter block may already be cached, but we still do not access it such
// that it eventually expires from the cache. // that it eventually expires from the cache.
bool IsFilterSkipped(int level); bool IsFilterSkipped(int level, bool is_file_last_in_level = false);
// The helper function of UpdateAccumulatedStats, which may fill the missing // The helper function of UpdateAccumulatedStats, which may fill the missing
// fields of file_mata from its associated TableProperties. // fields of file_mata from its associated TableProperties.

@ -308,6 +308,120 @@ TEST_F(WriteBatchTest, Blob) {
handler.seen); handler.seen);
} }
// It requires more than 30GB of memory to run the test. With single memory
// allocation of more than 30GB.
// Not all platform can run it. Also it runs a long time. So disable it.
TEST_F(WriteBatchTest, DISABLED_ManyUpdates) {
// Insert key and value of 3GB and push total batch size to 12GB.
static const size_t kKeyValueSize = 4u;
static const uint32_t kNumUpdates = 3 << 30;
std::string raw(kKeyValueSize, 'A');
WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u);
char c = 'A';
for (uint32_t i = 0; i < kNumUpdates; i++) {
if (c > 'Z') {
c = 'A';
}
raw[0] = c;
raw[raw.length() - 1] = c;
c++;
batch.Put(raw, raw);
}
ASSERT_EQ(kNumUpdates, batch.Count());
struct NoopHandler : public WriteBatch::Handler {
uint32_t num_seen = 0;
char expected_char = 'A';
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
EXPECT_EQ(kKeyValueSize, key.size());
EXPECT_EQ(kKeyValueSize, value.size());
EXPECT_EQ(expected_char, key[0]);
EXPECT_EQ(expected_char, value[0]);
EXPECT_EQ(expected_char, key[kKeyValueSize - 1]);
EXPECT_EQ(expected_char, value[kKeyValueSize - 1]);
expected_char++;
if (expected_char > 'Z') {
expected_char = 'A';
}
++num_seen;
return Status::OK();
}
virtual Status DeleteCF(uint32_t column_family_id,
const Slice& key) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual Status SingleDeleteCF(uint32_t column_family_id,
const Slice& key) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); }
virtual bool Continue() override { return num_seen < kNumUpdates; }
} handler;
batch.Iterate(&handler);
ASSERT_EQ(kNumUpdates, handler.num_seen);
}
// The test requires more than 18GB memory to run it, with single memory
// allocation of more than 12GB. Not all the platform can run it. So disable it.
TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) {
// Insert key and value of 3GB and push total batch size to 12GB.
static const size_t kKeyValueSize = 3221225472u;
std::string raw(kKeyValueSize, 'A');
WriteBatch batch(12884901888u + 1024u);
for (char i = 0; i < 2; i++) {
raw[0] = 'A' + i;
raw[raw.length() - 1] = 'A' - i;
batch.Put(raw, raw);
}
ASSERT_EQ(2, batch.Count());
struct NoopHandler : public WriteBatch::Handler {
int num_seen = 0;
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
EXPECT_EQ(kKeyValueSize, key.size());
EXPECT_EQ(kKeyValueSize, value.size());
EXPECT_EQ('A' + num_seen, key[0]);
EXPECT_EQ('A' + num_seen, value[0]);
EXPECT_EQ('A' - num_seen, key[kKeyValueSize - 1]);
EXPECT_EQ('A' - num_seen, value[kKeyValueSize - 1]);
++num_seen;
return Status::OK();
}
virtual Status DeleteCF(uint32_t column_family_id,
const Slice& key) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual Status SingleDeleteCF(uint32_t column_family_id,
const Slice& key) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
EXPECT_TRUE(false);
return Status::OK();
}
virtual void LogData(const Slice& blob) override { EXPECT_TRUE(false); }
virtual bool Continue() override { return num_seen < 2; }
} handler;
batch.Iterate(&handler);
ASSERT_EQ(2, handler.num_seen);
}
TEST_F(WriteBatchTest, Continue) { TEST_F(WriteBatchTest, Continue) {
WriteBatch batch; WriteBatch batch;

@ -26,6 +26,13 @@ std::unique_ptr<WriteControllerToken> WriteController::GetDelayToken(
return std::unique_ptr<WriteControllerToken>(new DelayWriteToken(this)); return std::unique_ptr<WriteControllerToken>(new DelayWriteToken(this));
} }
std::unique_ptr<WriteControllerToken>
WriteController::GetCompactionPressureToken() {
++total_compaction_pressure_;
return std::unique_ptr<WriteControllerToken>(
new CompactionPressureToken(this));
}
bool WriteController::IsStopped() const { return total_stopped_ > 0; } bool WriteController::IsStopped() const { return total_stopped_ > 0; }
// This is inside DB mutex, so we can't sleep and need to minimize // This is inside DB mutex, so we can't sleep and need to minimize
// frequency to get time. // frequency to get time.
@ -106,4 +113,9 @@ DelayWriteToken::~DelayWriteToken() {
assert(controller_->total_delayed_ >= 0); assert(controller_->total_delayed_ >= 0);
} }
CompactionPressureToken::~CompactionPressureToken() {
controller_->total_compaction_pressure_--;
assert(controller_->total_compaction_pressure_ >= 0);
}
} // namespace rocksdb } // namespace rocksdb

@ -23,6 +23,7 @@ class WriteController {
explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u) explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u)
: total_stopped_(0), : total_stopped_(0),
total_delayed_(0), total_delayed_(0),
total_compaction_pressure_(0),
bytes_left_(0), bytes_left_(0),
last_refill_time_(0) { last_refill_time_(0) {
set_delayed_write_rate(_delayed_write_rate); set_delayed_write_rate(_delayed_write_rate);
@ -38,10 +39,16 @@ class WriteController {
// which returns number of microseconds to sleep. // which returns number of microseconds to sleep.
std::unique_ptr<WriteControllerToken> GetDelayToken( std::unique_ptr<WriteControllerToken> GetDelayToken(
uint64_t delayed_write_rate); uint64_t delayed_write_rate);
// When an actor (column family) requests a moderate token, compaction
// threads will be increased
std::unique_ptr<WriteControllerToken> GetCompactionPressureToken();
// these two metods are querying the state of the WriteController // these three metods are querying the state of the WriteController
bool IsStopped() const; bool IsStopped() const;
bool NeedsDelay() const { return total_delayed_ > 0; } bool NeedsDelay() const { return total_delayed_ > 0; }
bool NeedSpeedupCompaction() const {
return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0;
}
// return how many microseconds the caller needs to sleep after the call // return how many microseconds the caller needs to sleep after the call
// num_bytes: how many number of bytes to put into the DB. // num_bytes: how many number of bytes to put into the DB.
// Prerequisite: DB mutex held. // Prerequisite: DB mutex held.
@ -59,9 +66,11 @@ class WriteController {
friend class WriteControllerToken; friend class WriteControllerToken;
friend class StopWriteToken; friend class StopWriteToken;
friend class DelayWriteToken; friend class DelayWriteToken;
friend class CompactionPressureToken;
int total_stopped_; int total_stopped_;
int total_delayed_; int total_delayed_;
int total_compaction_pressure_;
uint64_t bytes_left_; uint64_t bytes_left_;
uint64_t last_refill_time_; uint64_t last_refill_time_;
uint64_t delayed_write_rate_; uint64_t delayed_write_rate_;
@ -96,4 +105,11 @@ class DelayWriteToken : public WriteControllerToken {
virtual ~DelayWriteToken(); virtual ~DelayWriteToken();
}; };
class CompactionPressureToken : public WriteControllerToken {
public:
explicit CompactionPressureToken(WriteController* controller)
: WriteControllerToken(controller) {}
virtual ~CompactionPressureToken();
};
} // namespace rocksdb } // namespace rocksdb

@ -933,8 +933,19 @@ struct DBOptions {
// regardless of this setting // regardless of this setting
uint64_t delete_obsolete_files_period_micros; uint64_t delete_obsolete_files_period_micros;
// Suggested number of concurrent background compaction jobs, submitted to
// the default LOW priority thread pool.
//
// Default: max_background_compactions
int base_background_compactions;
// Maximum number of concurrent background compaction jobs, submitted to // Maximum number of concurrent background compaction jobs, submitted to
// the default LOW priority thread pool. // the default LOW priority thread pool.
// We first try to schedule compactions based on
// `base_background_compactions`. If the compaction cannot catch up , we
// will increase number of compaction threads up to
// `max_background_compactions`.
//
// If you're increasing this, also consider increasing number of threads in // If you're increasing this, also consider increasing number of threads in
// LOW priority thread pool. For more information, see // LOW priority thread pool. For more information, see
// Env::SetBackgroundThreads // Env::SetBackgroundThreads
@ -1110,6 +1121,9 @@ struct DBOptions {
// This option is currently honored only on Windows // This option is currently honored only on Windows
// //
// Default: 1 Mb // Default: 1 Mb
//
// Special value: 0 - means do not maintain per instance buffer. Allocate
// per request buffer and avoid locking.
size_t random_access_max_buffer_size; size_t random_access_max_buffer_size;
// This is the maximum buffer size that is used by WritableFileWriter. // This is the maximum buffer size that is used by WritableFileWriter.

@ -21,7 +21,7 @@ struct PerfContext {
void Reset(); // reset all performance counters to zero void Reset(); // reset all performance counters to zero
std::string ToString() const; std::string ToString(bool exclude_zero_counters = false) const;
uint64_t user_key_comparison_count; // total number of user key comparisons uint64_t user_key_comparison_count; // total number of user key comparisons
uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_cache_hit_count; // total number of block cache hits

@ -280,6 +280,10 @@ enum Histograms : uint32_t {
SST_READ_MICROS, SST_READ_MICROS,
// The number of subcompactions actually scheduled during a compaction // The number of subcompactions actually scheduled during a compaction
NUM_SUBCOMPACTIONS_SCHEDULED, NUM_SUBCOMPACTIONS_SCHEDULED,
// Value size distribution in each operation
BYTES_PER_READ,
BYTES_PER_WRITE,
BYTES_PER_MULTIGET,
HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match
}; };
@ -307,6 +311,9 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{WRITE_STALL, "rocksdb.db.write.stall"}, {WRITE_STALL, "rocksdb.db.write.stall"},
{SST_READ_MICROS, "rocksdb.sst.read.micros"}, {SST_READ_MICROS, "rocksdb.sst.read.micros"},
{NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"},
{BYTES_PER_READ, "rocksdb.bytes.per.read"},
{BYTES_PER_WRITE, "rocksdb.bytes.per.write"},
{BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"},
}; };
struct HistogramData { struct HistogramData {

@ -766,6 +766,18 @@ class WinRandomAccessFile : public RandomAccessFile {
return read; return read;
} }
void CalculateReadParameters(uint64_t offset, size_t bytes_requested,
size_t& actual_bytes_toread,
uint64_t& first_page_start) const {
const size_t alignment = buffer_.Alignment();
first_page_start = TruncateToPageBoundary(alignment, offset);
const uint64_t last_page_start =
TruncateToPageBoundary(alignment, offset + bytes_requested - 1);
actual_bytes_toread = (last_page_start - first_page_start) + alignment;
}
public: public:
WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
const EnvOptions& options) const EnvOptions& options)
@ -797,66 +809,87 @@ class WinRandomAccessFile : public RandomAccessFile {
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override { char* scratch) const override {
Status s; Status s;
SSIZE_T r = -1; SSIZE_T r = -1;
size_t left = n; size_t left = n;
char* dest = scratch; char* dest = scratch;
if (n == 0) {
*result = Slice(scratch, 0);
return s;
}
// When in unbuffered mode we need to do the following changes: // When in unbuffered mode we need to do the following changes:
// - use our own aligned buffer // - use our own aligned buffer
// - always read at the offset of that is a multiple of alignment // - always read at the offset of that is a multiple of alignment
if (!use_os_buffer_) { if (!use_os_buffer_) {
std::unique_lock<std::mutex> lock(buffer_mut_);
// Let's see if at least some of the requested data is already uint64_t first_page_start = 0;
// in the buffer size_t actual_bytes_toread = 0;
if (offset >= buffered_start_ && size_t bytes_requested = left;
offset < (buffered_start_ + buffer_.CurrentSize())) {
size_t buffer_offset = offset - buffered_start_;
r = buffer_.Read(dest, buffer_offset, left);
assert(r >= 0);
left -= size_t(r); if (!read_ahead_ && random_access_max_buffer_size_ == 0) {
offset += r; CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
dest += r; first_page_start);
}
// Still some left or none was buffered assert(actual_bytes_toread > 0);
if (left > 0) {
// Figure out the start/end offset for reading and amount to read r = ReadIntoOneShotBuffer(offset, first_page_start,
const size_t alignment = buffer_.Alignment(); actual_bytes_toread, left, dest);
const size_t first_page_start = } else {
TruncateToPageBoundary(alignment, offset);
std::unique_lock<std::mutex> lock(buffer_mut_);
// Let's see if at least some of the requested data is already
// in the buffer
if (offset >= buffered_start_ &&
offset < (buffered_start_ + buffer_.CurrentSize())) {
size_t buffer_offset = offset - buffered_start_;
r = buffer_.Read(dest, buffer_offset, left);
assert(r >= 0);
size_t bytes_requested = left; left -= size_t(r);
if (read_ahead_ && bytes_requested < compaction_readahead_size_) { offset += r;
bytes_requested = compaction_readahead_size_; dest += r;
} }
const size_t last_page_start = // Still some left or none was buffered
TruncateToPageBoundary(alignment, offset + bytes_requested - 1); if (left > 0) {
const size_t actual_bytes_toread = // Figure out the start/end offset for reading and amount to read
(last_page_start - first_page_start) + alignment; bytes_requested = left;
if (buffer_.Capacity() < actual_bytes_toread) { if (read_ahead_ && bytes_requested < compaction_readahead_size_) {
// If we are in read-ahead mode or the requested size bytes_requested = compaction_readahead_size_;
// exceeds max buffer size then use one-shot }
// big buffer otherwise reallocate main buffer
if (read_ahead_ || CalculateReadParameters(offset, bytes_requested, actual_bytes_toread,
first_page_start);
assert(actual_bytes_toread > 0);
if (buffer_.Capacity() < actual_bytes_toread) {
// If we are in read-ahead mode or the requested size
// exceeds max buffer size then use one-shot
// big buffer otherwise reallocate main buffer
if (read_ahead_ ||
(actual_bytes_toread > random_access_max_buffer_size_)) { (actual_bytes_toread > random_access_max_buffer_size_)) {
// Unlock the mutex since we are not using instance buffer // Unlock the mutex since we are not using instance buffer
lock.unlock(); lock.unlock();
r = ReadIntoOneShotBuffer(offset, first_page_start, r = ReadIntoOneShotBuffer(offset, first_page_start,
actual_bytes_toread, left, dest); actual_bytes_toread, left, dest);
} else { }
buffer_.AllocateNewBuffer(actual_bytes_toread); else {
buffer_.AllocateNewBuffer(actual_bytes_toread);
r = ReadIntoInstanceBuffer(offset, first_page_start,
actual_bytes_toread, left, dest);
}
}
else {
buffer_.Clear();
r = ReadIntoInstanceBuffer(offset, first_page_start, r = ReadIntoInstanceBuffer(offset, first_page_start,
actual_bytes_toread, left, dest); actual_bytes_toread, left, dest);
} }
} else {
buffer_.Clear();
r = ReadIntoInstanceBuffer(offset, first_page_start,
actual_bytes_toread, left, dest);
} }
} }
} else { } else {
@ -1105,6 +1138,8 @@ void WinthreadCall(const char* label, std::error_code result) {
} }
} }
typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME);
class WinEnv : public Env { class WinEnv : public Env {
public: public:
WinEnv(); WinEnv();
@ -1643,25 +1678,29 @@ class WinEnv : public Env {
} }
virtual uint64_t NowMicros() override { virtual uint64_t NowMicros() override {
// all std::chrono clocks on windows proved to return if (GetSystemTimePreciseAsFileTime_ != NULL) {
// values that may repeat that is not good enough for some uses. // all std::chrono clocks on windows proved to return
const int64_t c_UnixEpochStartTicks = 116444736000000000i64; // values that may repeat that is not good enough for some uses.
const int64_t c_FtToMicroSec = 10; const int64_t c_UnixEpochStartTicks = 116444736000000000i64;
const int64_t c_FtToMicroSec = 10;
// This interface needs to return system time and not
// just any microseconds because it is often used as an argument // This interface needs to return system time and not
// to TimedWait() on condition variable // just any microseconds because it is often used as an argument
FILETIME ftSystemTime; // to TimedWait() on condition variable
GetSystemTimePreciseAsFileTime(&ftSystemTime); FILETIME ftSystemTime;
GetSystemTimePreciseAsFileTime_(&ftSystemTime);
LARGE_INTEGER li;
li.LowPart = ftSystemTime.dwLowDateTime; LARGE_INTEGER li;
li.HighPart = ftSystemTime.dwHighDateTime; li.LowPart = ftSystemTime.dwLowDateTime;
// Subtract unix epoch start li.HighPart = ftSystemTime.dwHighDateTime;
li.QuadPart -= c_UnixEpochStartTicks; // Subtract unix epoch start
// Convert to microsecs li.QuadPart -= c_UnixEpochStartTicks;
li.QuadPart /= c_FtToMicroSec; // Convert to microsecs
return li.QuadPart; li.QuadPart /= c_FtToMicroSec;
return li.QuadPart;
}
using namespace std::chrono;
return duration_cast<microseconds>(system_clock::now().time_since_epoch()).count();
} }
virtual uint64_t NowNanos() override { virtual uint64_t NowNanos() override {
@ -2071,6 +2110,7 @@ class WinEnv : public Env {
std::vector<ThreadPool> thread_pools_; std::vector<ThreadPool> thread_pools_;
mutable std::mutex mu_; mutable std::mutex mu_;
std::vector<std::thread> threads_to_join_; std::vector<std::thread> threads_to_join_;
FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
}; };
WinEnv::WinEnv() WinEnv::WinEnv()
@ -2079,7 +2119,15 @@ WinEnv::WinEnv()
page_size_(4 * 1012), page_size_(4 * 1012),
allocation_granularity_(page_size_), allocation_granularity_(page_size_),
perf_counter_frequency_(0), perf_counter_frequency_(0),
thread_pools_(Priority::TOTAL) { thread_pools_(Priority::TOTAL),
GetSystemTimePreciseAsFileTime_(NULL) {
HMODULE module = GetModuleHandle("kernel32.dll");
if (module != NULL) {
GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress(
module, "GetSystemTimePreciseAsFileTime");
}
SYSTEM_INFO sinfo; SYSTEM_INFO sinfo;
GetSystemInfo(&sinfo); GetSystemInfo(&sinfo);

@ -229,6 +229,7 @@ DBOptions::DBOptions()
db_log_dir(""), db_log_dir(""),
wal_dir(""), wal_dir(""),
delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000), delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000),
base_background_compactions(-1),
max_background_compactions(1), max_background_compactions(1),
max_subcompactions(1), max_subcompactions(1),
max_background_flushes(1), max_background_flushes(1),
@ -295,6 +296,7 @@ DBOptions::DBOptions(const Options& options)
wal_dir(options.wal_dir), wal_dir(options.wal_dir),
delete_obsolete_files_period_micros( delete_obsolete_files_period_micros(
options.delete_obsolete_files_period_micros), options.delete_obsolete_files_period_micros),
base_background_compactions(options.base_background_compactions),
max_background_compactions(options.max_background_compactions), max_background_compactions(options.max_background_compactions),
max_subcompactions(options.max_subcompactions), max_subcompactions(options.max_subcompactions),
max_background_flushes(options.max_background_flushes), max_background_flushes(options.max_background_flushes),
@ -383,6 +385,8 @@ void DBOptions::Dump(Logger* log) const {
table_cache_numshardbits); table_cache_numshardbits);
Header(log, " Options.delete_obsolete_files_period_micros: %" PRIu64, Header(log, " Options.delete_obsolete_files_period_micros: %" PRIu64,
delete_obsolete_files_period_micros); delete_obsolete_files_period_micros);
Header(log, " Options.base_background_compactions: %d",
base_background_compactions);
Header(log, " Options.max_background_compactions: %d", Header(log, " Options.max_background_compactions: %d",
max_background_compactions); max_background_compactions);
Header(log, " Options.max_subcompactions: %" PRIu32, Header(log, " Options.max_subcompactions: %" PRIu32,
@ -652,6 +656,7 @@ Options::PrepareForBulkLoad()
// to L1. This is helpful so that all files that are // to L1. This is helpful so that all files that are
// input to the manual compaction are all at L0. // input to the manual compaction are all at L0.
max_background_compactions = 2; max_background_compactions = 2;
base_background_compactions = 2;
// The compaction would create large files in L1. // The compaction would create large files in L1.
target_file_size_base = 256 * 1024 * 1024; target_file_size_base = 256 * 1024 * 1024;

@ -208,7 +208,7 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
{offsetof(struct DBOptions, random_access_max_buffer_size), {offsetof(struct DBOptions, random_access_max_buffer_size),
OptionType::kSizeT, OptionVerificationType::kNormal}}, OptionType::kSizeT, OptionVerificationType::kNormal}},
{"writable_file_max_buffer_size", {"writable_file_max_buffer_size",
{offsetof(struct DBOptions, writable_file_max_buffer_size), {offsetof(struct DBOptions, writable_file_max_buffer_size),
OptionType::kSizeT, OptionVerificationType::kNormal}}, OptionType::kSizeT, OptionVerificationType::kNormal}},
{"use_adaptive_mutex", {"use_adaptive_mutex",
{offsetof(struct DBOptions, use_adaptive_mutex), OptionType::kBoolean, {offsetof(struct DBOptions, use_adaptive_mutex), OptionType::kBoolean,
@ -219,6 +219,9 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
{"max_background_compactions", {"max_background_compactions",
{offsetof(struct DBOptions, max_background_compactions), OptionType::kInt, {offsetof(struct DBOptions, max_background_compactions), OptionType::kInt,
OptionVerificationType::kNormal}}, OptionVerificationType::kNormal}},
{"base_background_compactions",
{offsetof(struct DBOptions, base_background_compactions), OptionType::kInt,
OptionVerificationType::kNormal}},
{"max_background_flushes", {"max_background_flushes",
{offsetof(struct DBOptions, max_background_flushes), OptionType::kInt, {offsetof(struct DBOptions, max_background_flushes), OptionType::kInt,
OptionVerificationType::kNormal}}, OptionVerificationType::kNormal}},

@ -1669,50 +1669,7 @@ TEST_F(OptionsParserTest, DBOptionsAllFieldsSettable) {
"table_cache_numshardbits=28;" "table_cache_numshardbits=28;"
"max_open_files=72;" "max_open_files=72;"
"max_file_opening_threads=35;" "max_file_opening_threads=35;"
"max_background_compactions=33;" "base_background_compactions=3;"
"use_fsync=true;"
"use_adaptive_mutex=false;"
"max_total_wal_size=4295005604;"
"compaction_readahead_size=0;"
"new_table_reader_for_compaction_inputs=false;"
"keep_log_file_num=4890;"
"skip_stats_update_on_db_open=false;"
"max_manifest_file_size=4295009941;"
"db_log_dir=path/to/db_log_dir;"
"skip_log_error_on_recovery=true;"
"writable_file_max_buffer_size=1048576;"
"paranoid_checks=true;"
"is_fd_close_on_exec=false;"
"bytes_per_sync=4295013613;"
"enable_thread_tracking=false;"
"disable_data_sync=false;"
"recycle_log_file_num=0;"
"disableDataSync=false;"
"create_missing_column_families=true;"
"log_file_time_to_roll=3097;"
"max_background_flushes=35;"
"create_if_missing=false;"
"error_if_exists=true;"
"allow_os_buffer=false;"
"delayed_write_rate=4294976214;"
"manifest_preallocation_size=1222;"
"allow_mmap_writes=false;"
"stats_dump_period_sec=70127;"
"allow_fallocate=true;"
"allow_mmap_reads=false;"
"max_log_file_size=4607;"
"random_access_max_buffer_size=1048576;"
"advise_random_on_open=true;"
"wal_bytes_per_sync=4295048118;"
"delete_obsolete_files_period_micros=4294967758;"
"WAL_ttl_seconds=4295008036;"
"WAL_size_limit_MB=4295036161;"
"wal_dir=path/to/wal_dir;"
"db_write_buffer_size=2587;"
"max_subcompactions=64330;"
"table_cache_numshardbits=28;"
"max_open_files=72;"
"max_file_opening_threads=35;"
"max_background_compactions=33;" "max_background_compactions=33;"
"use_fsync=true;" "use_fsync=true;"
"use_adaptive_mutex=false;" "use_adaptive_mutex=false;"

@ -61,32 +61,54 @@ void PerfContext::Reset() {
#endif #endif
} }
#define OUTPUT(counter) #counter << " = " << counter << ", " #define OUTPUT(counter) \
if (!exclude_zero_counters || (counter > 0)) { \
ss << #counter << " = " << counter << ", "; \
}
std::string PerfContext::ToString() const { std::string PerfContext::ToString(bool exclude_zero_counters) const {
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
return ""; return "";
#else #else
std::ostringstream ss; std::ostringstream ss;
ss << OUTPUT(user_key_comparison_count) << OUTPUT(block_cache_hit_count) OUTPUT(user_key_comparison_count);
<< OUTPUT(block_read_count) << OUTPUT(block_read_byte) OUTPUT(block_cache_hit_count);
<< OUTPUT(block_read_time) << OUTPUT(block_checksum_time) OUTPUT(block_read_count);
<< OUTPUT(block_decompress_time) << OUTPUT(internal_key_skipped_count) OUTPUT(block_read_byte);
<< OUTPUT(internal_delete_skipped_count) << OUTPUT(write_wal_time) OUTPUT(block_read_time);
<< OUTPUT(get_snapshot_time) << OUTPUT(get_from_memtable_time) OUTPUT(block_checksum_time);
<< OUTPUT(get_from_memtable_count) << OUTPUT(get_post_process_time) OUTPUT(block_decompress_time);
<< OUTPUT(get_from_output_files_time) << OUTPUT(seek_on_memtable_time) OUTPUT(internal_key_skipped_count);
<< OUTPUT(seek_on_memtable_count) << OUTPUT(seek_child_seek_time) OUTPUT(internal_delete_skipped_count);
<< OUTPUT(seek_child_seek_count) << OUTPUT(seek_min_heap_time) OUTPUT(write_wal_time);
<< OUTPUT(seek_internal_seek_time) << OUTPUT(find_next_user_entry_time) OUTPUT(get_snapshot_time);
<< OUTPUT(write_pre_and_post_process_time) << OUTPUT(write_memtable_time) OUTPUT(get_from_memtable_time);
<< OUTPUT(db_mutex_lock_nanos) << OUTPUT(db_condition_wait_nanos) OUTPUT(get_from_memtable_count);
<< OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time) OUTPUT(get_post_process_time);
<< OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos) OUTPUT(get_from_output_files_time);
<< OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos) OUTPUT(seek_on_memtable_time);
<< OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos) OUTPUT(seek_on_memtable_count);
<< OUTPUT(bloom_memtable_hit_count) << OUTPUT(bloom_memtable_miss_count) OUTPUT(seek_child_seek_time);
<< OUTPUT(bloom_sst_hit_count) << OUTPUT(bloom_sst_miss_count); OUTPUT(seek_child_seek_count);
OUTPUT(seek_min_heap_time);
OUTPUT(seek_internal_seek_time);
OUTPUT(find_next_user_entry_time);
OUTPUT(write_pre_and_post_process_time);
OUTPUT(write_memtable_time);
OUTPUT(db_mutex_lock_nanos);
OUTPUT(db_condition_wait_nanos);
OUTPUT(merge_operator_time_nanos);
OUTPUT(write_delay_time);
OUTPUT(read_index_block_nanos);
OUTPUT(read_filter_block_nanos);
OUTPUT(new_table_block_iter_nanos);
OUTPUT(new_table_iterator_nanos);
OUTPUT(block_seek_nanos);
OUTPUT(find_table_nanos);
OUTPUT(bloom_memtable_hit_count);
OUTPUT(bloom_memtable_miss_count);
OUTPUT(bloom_sst_hit_count);
OUTPUT(bloom_sst_miss_count);
return ss.str(); return ss.str();
#endif #endif
} }

Loading…
Cancel
Save