A new compaction picking priority that optimizes for write amplification for random updates.

Summary: Introduce a compaction picking priority that picks files who contains the oldest rows to compact. This is a mode that slightly improves write amplification for random update cases.

Test Plan: Add a unit test and run it in valgrind too.

Reviewers: yhchiang, anthony, IslamAbdelRahman, rven, kradhakrishnan, MarkCallaghan, igor

Reviewed By: igor

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D51459
main
sdong 9 years ago
parent de6958b2e2
commit d6e1035a1f
  1. 1
      HISTORY.md
  2. 47
      db/db_compaction_test.cc
  3. 10
      db/version_set.cc
  4. 11
      include/rocksdb/options.h
  5. 2
      util/mutable_cf_options.h
  6. 2
      util/options.cc

@ -9,6 +9,7 @@
### Public API Changes ### Public API Changes
* When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families.
* Change names in CompactionPri and add a new one.
## 4.2.0 (11/9/2015) ## 4.2.0 (11/9/2015)
### New Features ### New Features

@ -1868,6 +1868,53 @@ TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) {
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
::testing::Values(1, 4)); ::testing::Values(1, 4));
class CompactionPriTest : public DBTestBase,
public testing::WithParamInterface<uint32_t> {
public:
CompactionPriTest() : DBTestBase("/compaction_pri_test") {
compaction_pri_ = GetParam();
}
// Required if inheriting from testing::WithParamInterface<>
static void SetUpTestCase() {}
static void TearDownTestCase() {}
uint32_t compaction_pri_;
};
TEST_P(CompactionPriTest, Test) {
Options options;
options.write_buffer_size = 16 * 1024;
options.compaction_pri = static_cast<CompactionPri>(compaction_pri_);
options.hard_pending_compaction_bytes_limit = 256 * 1024;
options.max_bytes_for_level_base = 64 * 1024;
options.max_bytes_for_level_multiplier = 4;
options.compression = kNoCompression;
options = CurrentOptions(options);
DestroyAndReopen(options);
Random rnd(301);
const int kNKeys = 5000;
int keys[kNKeys];
for (int i = 0; i < kNKeys; i++) {
keys[i] = i;
}
std::random_shuffle(std::begin(keys), std::end(keys));
for (int i = 0; i < kNKeys; i++) {
ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 102)));
}
dbfull()->TEST_WaitForCompact();
for (int i = 0; i < kNKeys; i++) {
ASSERT_NE("NOT_FOUND", Get(Key(i)));
}
}
INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
::testing::Values(0, 1, 2));
#endif // !defined(ROCKSDB_LITE) #endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb } // namespace rocksdb

@ -1386,16 +1386,22 @@ void VersionStorageInfo::UpdateFilesByCompactionPri(
num = temp.size(); num = temp.size();
} }
switch (mutable_cf_options.compaction_pri) { switch (mutable_cf_options.compaction_pri) {
case kCompactionPriByCompensatedSize: case kByCompensatedSize:
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareCompensatedSizeDescending); CompareCompensatedSizeDescending);
break; break;
case kCompactionPriByLargestSeq: case kOldestLargestSeqFirst:
std::sort(temp.begin(), temp.end(), std::sort(temp.begin(), temp.end(),
[this](const Fsize& f1, const Fsize& f2) -> bool { [this](const Fsize& f1, const Fsize& f2) -> bool {
return f1.file->largest_seqno < f2.file->largest_seqno; return f1.file->largest_seqno < f2.file->largest_seqno;
}); });
break; break;
case kOldestSmallestSeqFirst:
std::sort(temp.begin(), temp.end(),
[this](const Fsize& f1, const Fsize& f2) -> bool {
return f1.file->smallest_seqno < f2.file->smallest_seqno;
});
break;
default: default:
assert(false); assert(false);
} }

@ -82,9 +82,14 @@ enum CompactionStyle : char {
enum CompactionPri : char { enum CompactionPri : char {
// Slightly Priotize larger files by size compensated by #deletes // Slightly Priotize larger files by size compensated by #deletes
kCompactionPriByCompensatedSize = 0x0, kByCompensatedSize = 0x0,
// First compact files whose data is oldest. // First compact files whose data's latest update time is oldest.
kCompactionPriByLargestSeq = 0x1, // Try this if you only update some hot keys in small ranges.
kOldestLargestSeqFirst = 0x1,
// First compact files whose range hasn't been compacted to the next level
// for the longest. If your updates are random across the key space,
// write amplification is slightly better with this option.
kOldestSmallestSeqFirst = 0x2,
}; };
enum class WALRecoveryMode : char { enum class WALRecoveryMode : char {

@ -67,7 +67,7 @@ struct MutableCFOptions {
level0_file_num_compaction_trigger(0), level0_file_num_compaction_trigger(0),
level0_slowdown_writes_trigger(0), level0_slowdown_writes_trigger(0),
level0_stop_writes_trigger(0), level0_stop_writes_trigger(0),
compaction_pri(kCompactionPriByCompensatedSize), compaction_pri(kByCompensatedSize),
max_grandparent_overlap_factor(0), max_grandparent_overlap_factor(0),
expanded_compaction_factor(0), expanded_compaction_factor(0),
source_compaction_factor(0), source_compaction_factor(0),

@ -110,7 +110,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
disable_auto_compactions(false), disable_auto_compactions(false),
purge_redundant_kvs_while_flush(true), purge_redundant_kvs_while_flush(true),
compaction_style(kCompactionStyleLevel), compaction_style(kCompactionStyleLevel),
compaction_pri(kCompactionPriByCompensatedSize), compaction_pri(kByCompensatedSize),
verify_checksums_in_compaction(true), verify_checksums_in_compaction(true),
filter_deletes(false), filter_deletes(false),
max_sequential_skip_in_iterations(8), max_sequential_skip_in_iterations(8),

Loading…
Cancel
Save