A new compaction picking priority that optimizes for write amplification for random updates.

Summary: Introduce a compaction picking priority that picks files who contains the oldest rows to compact. This is a mode that slightly improves write amplification for random update cases.

Test Plan: Add a unit test and run it in valgrind too.

Reviewers: yhchiang, anthony, IslamAbdelRahman, rven, kradhakrishnan, MarkCallaghan, igor

Reviewed By: igor

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D51459
main
sdong 9 years ago
parent de6958b2e2
commit d6e1035a1f
  1. 1
      HISTORY.md
  2. 47
      db/db_compaction_test.cc
  3. 10
      db/version_set.cc
  4. 11
      include/rocksdb/options.h
  5. 2
      util/mutable_cf_options.h
  6. 2
      util/options.cc

@ -9,6 +9,7 @@
### Public API Changes
* When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families.
* Change names in CompactionPri and add a new one.
## 4.2.0 (11/9/2015)
### New Features

@ -1868,6 +1868,53 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) {
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
::testing::Values(1, 4));
class CompactionPriTest : public DBTestBase,
public testing::WithParamInterface<uint32_t> {
public:
CompactionPriTest() : DBTestBase("/compaction_pri_test") {
compaction_pri_ = GetParam();
}
// Required if inheriting from testing::WithParamInterface<>
static void SetUpTestCase() {}
static void TearDownTestCase() {}
uint32_t compaction_pri_;
};
TEST_P(CompactionPriTest, Test) {
Options options;
options.write_buffer_size = 16 * 1024;
options.compaction_pri = static_cast<CompactionPri>(compaction_pri_);
options.hard_pending_compaction_bytes_limit = 256 * 1024;
options.max_bytes_for_level_base = 64 * 1024;
options.max_bytes_for_level_multiplier = 4;
options.compression = kNoCompression;
options = CurrentOptions(options);
DestroyAndReopen(options);
Random rnd(301);
const int kNKeys = 5000;
int keys[kNKeys];
for (int i = 0; i < kNKeys; i++) {
keys[i] = i;
}
std::random_shuffle(std::begin(keys), std::end(keys));
for (int i = 0; i < kNKeys; i++) {
ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 102)));
}
dbfull()->TEST_WaitForCompact();
for (int i = 0; i < kNKeys; i++) {
ASSERT_NE("NOT_FOUND", Get(Key(i)));
}
}
INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
::testing::Values(0, 1, 2));
#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb

@ -1386,16 +1386,22 @@ void VersionStorageInfo::UpdateFilesByCompactionPri(
num = temp.size();
}
switch (mutable_cf_options.compaction_pri) {
case kCompactionPriByCompensatedSize:
case kByCompensatedSize:
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareCompensatedSizeDescending);
break;
case kCompactionPriByLargestSeq:
case kOldestLargestSeqFirst:
std::sort(temp.begin(), temp.end(),
[this](const Fsize& f1, const Fsize& f2) -> bool {
return f1.file->largest_seqno < f2.file->largest_seqno;
});
break;
case kOldestSmallestSeqFirst:
std::sort(temp.begin(), temp.end(),
[this](const Fsize& f1, const Fsize& f2) -> bool {
return f1.file->smallest_seqno < f2.file->smallest_seqno;
});
break;
default:
assert(false);
}

@ -82,9 +82,14 @@ enum CompactionStyle : char {
enum CompactionPri : char {
// Slightly Priotize larger files by size compensated by #deletes
kCompactionPriByCompensatedSize = 0x0,
// First compact files whose data is oldest.
kCompactionPriByLargestSeq = 0x1,
kByCompensatedSize = 0x0,
// First compact files whose data's latest update time is oldest.
// Try this if you only update some hot keys in small ranges.
kOldestLargestSeqFirst = 0x1,
// First compact files whose range hasn't been compacted to the next level
// for the longest. If your updates are random across the key space,
// write amplification is slightly better with this option.
kOldestSmallestSeqFirst = 0x2,
};
enum class WALRecoveryMode : char {

@ -67,7 +67,7 @@ struct MutableCFOptions {
level0_file_num_compaction_trigger(0),
level0_slowdown_writes_trigger(0),
level0_stop_writes_trigger(0),
compaction_pri(kCompactionPriByCompensatedSize),
compaction_pri(kByCompensatedSize),
max_grandparent_overlap_factor(0),
expanded_compaction_factor(0),
source_compaction_factor(0),

@ -110,7 +110,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
disable_auto_compactions(false),
purge_redundant_kvs_while_flush(true),
compaction_style(kCompactionStyleLevel),
compaction_pri(kCompactionPriByCompensatedSize),
compaction_pri(kByCompensatedSize),
verify_checksums_in_compaction(true),
filter_deletes(false),
max_sequential_skip_in_iterations(8),

Loading…
Cancel
Save