Skip bottommost level compaction if possible

Summary:
This is https://reviews.facebook.net/D39999 but after introducing an option to force compaction the bottom most level

Changes in this patch
- Introduce force_bottommost_level_compaction to CompactRangeOptions that force compacting bottommost level during compaction
- Skip bottommost level compaction if we dont have a compaction filter and force_bottommost_level_compaction options is not set

Although tests pass on my machine but I suspect that there maybe some tests that I am not aware of that  should use force_bottommost_level_compaction to pass in a deterministic way

Test Plan:
make check
adding new tests

Reviewers: igor, sdong, yhchiang

Reviewed By: yhchiang

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D40059
main
Islam AbdelRahman 10 years ago
parent 4b8bb62f0a
commit 4eabbdb7ec
  1. 1
      HISTORY.md
  2. 12
      db/compaction_job_stats_test.cc
  3. 14
      db/db_impl.cc
  4. 120
      db/db_test.cc
  5. 4
      include/rocksdb/options.h

@ -17,6 +17,7 @@
* When options.soft_rate_limit or options.level0_slowdown_writes_trigger is triggered, the way to slow down writes is changed to: write rate to DB is limited to to options.delayed_write_rate.
* DB::GetApproximateSizes() adds a parameter to allow the estimation to include data in mem table, with default to be not to include. It is now only supported in skip list mem table.
* DB::CompactRange() now accept CompactRangeOptions instead of multiple paramters. CompactRangeOptions is defined in include/rocksdb/options.h.
* Add force_bottommost_level_compaction option to CompactRangeOptions, which prevent compaction from skipping compacting bottommost level.
## 3.11.0 (5/19/2015)
### New Features

@ -668,17 +668,9 @@ TEST_F(CompactionJobStatsTest, CompactionJobStatsTest) {
1, num_keys_per_L0_file * 2,
compression_ratio,
num_keys_per_L0_file));
// In the second sub-compaction, we expect L1 compaction.
stats_checker->AddExpectedStats(
NewManualCompactionJobStats(
smallest_key, largest_key,
4, 4, num_keys_per_L0_file * 8,
kKeySize, kValueSize,
1, num_keys_per_L0_file * 8,
compression_ratio, 0));
ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 2U);
ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U);
Compact(1, smallest_key, largest_key);
ASSERT_EQ("0,1", FilesPerLevel(1));
ASSERT_EQ("0,4", FilesPerLevel(1));
options.compression = GetAnyCompression();
if (options.compression == kNoCompression) {
break;

@ -1372,8 +1372,18 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
// level 0 can never be the bottommost level (i.e. if all files are in
// level 0, we will compact to level 1)
if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal ||
cfd->ioptions()->compaction_style == kCompactionStyleFIFO ||
(level == max_level_with_files && level > 0)) {
cfd->ioptions()->compaction_style == kCompactionStyleFIFO) {
output_level = level;
} else if (level == max_level_with_files && level > 0) {
if (options.force_bottommost_level_compaction == false &&
cfd->ioptions()->compaction_filter == nullptr &&
cfd->ioptions()->compaction_filter_factory == nullptr &&
cfd->ioptions()->compaction_filter_factory_v2 == nullptr) {
// If we are not forced to compact the bottommost level and there is
// no compaction filter we can skip the compaction of
// the bottommost level
continue;
}
output_level = level;
} else {
output_level = level + 1;

@ -3964,6 +3964,64 @@ TEST_F(DBTest, TrivialMoveTargetLevel) {
}
}
TEST_F(DBTest, TrivialMoveToLastLevelWithFiles) {
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:TrivialMove",
[&](void* arg) { trivial_move++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:NonTrivial",
[&](void* arg) { non_trivial_move++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Options options;
options.write_buffer_size = 100000000;
options = CurrentOptions(options);
DestroyAndReopen(options);
int32_t value_size = 10 * 1024; // 10 KB
Random rnd(301);
std::vector<std::string> values;
// File with keys [ 0 => 99 ]
for (int i = 0; i < 100; i++) {
values.push_back(RandomString(&rnd, value_size));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Flush());
ASSERT_EQ("1", FilesPerLevel(0));
// Compaction will do L0=>L1 (trivial move) then move L1 files to L3
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 3;
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
ASSERT_EQ(trivial_move, 1);
ASSERT_EQ(non_trivial_move, 0);
// File with keys [ 100 => 199 ]
for (int i = 100; i < 200; i++) {
values.push_back(RandomString(&rnd, value_size));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Flush());
ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
// Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ("0,0,0,2", FilesPerLevel(0));
ASSERT_EQ(trivial_move, 4);
ASSERT_EQ(non_trivial_move, 0);
for (int i = 0; i < 200; i++) {
ASSERT_EQ(Get(Key(i)), values[i]);
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(DBTest, CompactionTrigger) {
Options options;
options.write_buffer_size = 100<<10; //100KB
@ -5431,6 +5489,7 @@ TEST_F(DBTest, ConvertCompactionStyle) {
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 0;
compact_options.force_bottommost_level_compaction = true;
dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr);
// Only 1 file in L0
@ -13739,6 +13798,67 @@ TEST_F(DBTest, SoftLimit) {
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(DBTest, ForceBottommostLevelCompaction) {
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:TrivialMove",
[&](void* arg) { trivial_move++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:NonTrivial",
[&](void* arg) { non_trivial_move++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Options options;
options.write_buffer_size = 100000000;
options = CurrentOptions(options);
DestroyAndReopen(options);
int32_t value_size = 10 * 1024; // 10 KB
Random rnd(301);
std::vector<std::string> values;
// File with keys [ 0 => 99 ]
for (int i = 0; i < 100; i++) {
values.push_back(RandomString(&rnd, value_size));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Flush());
ASSERT_EQ("1", FilesPerLevel(0));
// Compaction will do L0=>L1 (trivial move) then move L1 files to L3
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = 3;
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
ASSERT_EQ(trivial_move, 1);
ASSERT_EQ(non_trivial_move, 0);
// File with keys [ 100 => 199 ]
for (int i = 100; i < 200; i++) {
values.push_back(RandomString(&rnd, value_size));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Flush());
ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
// Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
// then compacte the bottommost level L3=>L3 (non trivial move)
compact_options = CompactRangeOptions();
compact_options.force_bottommost_level_compaction = true;
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
ASSERT_EQ(trivial_move, 4);
ASSERT_EQ(non_trivial_move, 1);
for (int i = 0; i < 200; i++) {
ASSERT_EQ(Get(Key(i)), values[i]);
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
} // namespace rocksdb
int main(int argc, char** argv) {

@ -1251,6 +1251,10 @@ struct CompactRangeOptions {
// Compaction outputs will be placed in options.db_paths[target_path_id].
// Behavior is undefined if target_path_id is out of range.
uint32_t target_path_id = 0;
// By default compaction will try to skip compacting bottommost level if
// possible, setting this flag to true will force compaction to compact
// the bottomost level.
bool force_bottommost_level_compaction = false;
};
} // namespace rocksdb

Loading…
Cancel
Save