Use `sstableKeyCompare()` for compaction output boundary check (#10763)

Summary:
To make it consistent with the compaction picker which uses the `sstableKeyCompare()` to pick the overlap files. For example, without this change, it may cut L1 files like:
```
 L1: [2-21]  [22-30]
 L2: [1-10] [21-30]
```
Because "21" on L1 is smaller than "21" on L2. But for compaction, these 2 files are overlapped.
`sstableKeyCompare()` also take range delete into consideration which may cut file for the same key.
It also makes the `max_compaction_bytes` calculation more accurate for cases like above, the overlapped bytes was under estimated. Also make sure the 2 keys won't be splitted to 2 files because of reaching `max_compaction_bytes`.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10763

Reviewed By: cbi42

Differential Revision: D39971904

Pulled By: cbi42

fbshipit-source-id: bcc309e9c3dc61a8f50667a6f633e6132c0154a8
main
Jay Zhuang 2 years ago committed by Facebook GitHub Bot
parent d6d8c007ff
commit 23fa5b7789
  1. 156
      db/compaction/compaction_job_test.cc
  2. 92
      db/compaction/compaction_outputs.cc
  3. 5
      db/compaction/compaction_outputs.h
  4. 51
      db/db_range_del_test.cc

@ -1748,8 +1748,8 @@ class CompactionJobDynamicFileSizeTest
}; };
TEST_P(CompactionJobDynamicFileSizeTest, CutForMaxCompactionBytes) { TEST_P(CompactionJobDynamicFileSizeTest, CutForMaxCompactionBytes) {
// dynamic_file_size option should has no impact on cutting for max compaction // dynamic_file_size option should have no impact on cutting for max
// bytes. // compaction bytes.
bool enable_dyanmic_file_size = GetParam(); bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size; cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;
@ -1800,12 +1800,21 @@ TEST_P(CompactionJobDynamicFileSizeTest, CutForMaxCompactionBytes) {
{KeyStr("o", 2U, kTypeValue), "val"}}); {KeyStr("o", 2U, kTypeValue), "val"}});
AddMockFile(file5, 2); AddMockFile(file5, 2);
// The expected output should be:
// L1: [c, h, j] [n]
// L2: [b ... e] [h ... k] [l ... o]
// It's better to have "j" in the first file, because anyway it's overlapping
// with the second file on L2.
// (Note: before this PR, it was cut at "h" because it's using the internal
// comparator which think L1 "h" with seqno 3 is smaller than L2 "h" with
// seqno 1, but actually they're overlapped with the compaction picker).
auto expected_file1 = auto expected_file1 =
mock::MakeMockFile({{KeyStr("c", 5U, kTypeValue), "val2"}, mock::MakeMockFile({{KeyStr("c", 5U, kTypeValue), "val2"},
{KeyStr("h", 3U, kTypeValue), "val"}}); {KeyStr("h", 3U, kTypeValue), "val"},
{KeyStr("j", 4U, kTypeValue), "val"}});
auto expected_file2 = auto expected_file2 =
mock::MakeMockFile({{KeyStr("j", 4U, kTypeValue), "val"}, mock::MakeMockFile({{KeyStr("n", 6U, kTypeValue), "val3"}});
{KeyStr("n", 6U, kTypeValue), "val3"}});
SetLastSequence(6U); SetLastSequence(6U);
@ -1974,6 +1983,143 @@ TEST_P(CompactionJobDynamicFileSizeTest, CutToAlignGrandparentBoundary) {
} }
} }
TEST_P(CompactionJobDynamicFileSizeTest, CutToAlignGrandparentBoundarySameKey) {
bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;
NewDB();
// MockTable has 1 byte per entry by default and each file is 10 bytes.
// When the file size is smaller than 100, it won't cut file earlier to align
// with its grandparent boundary.
const size_t kKeyValueSize = 10000;
mock_table_factory_->SetKeyValueSize(kKeyValueSize);
mutable_cf_options_.target_file_size_base = 10 * kKeyValueSize;
mock::KVVector file1;
for (int i = 0; i < 7; i++) {
file1.emplace_back(KeyStr("a", 100 - i, kTypeValue),
"val" + std::to_string(100 - i));
}
file1.emplace_back(KeyStr("b", 90, kTypeValue), "valb");
AddMockFile(file1);
auto file2 = mock::MakeMockFile({{KeyStr("a", 93U, kTypeValue), "val93"},
{KeyStr("b", 90U, kTypeValue), "valb"}});
AddMockFile(file2, 1);
auto file3 = mock::MakeMockFile({{KeyStr("a", 89U, kTypeValue), "val"},
{KeyStr("a", 88U, kTypeValue), "val"}});
AddMockFile(file3, 2);
auto file4 = mock::MakeMockFile({{KeyStr("a", 87U, kTypeValue), "val"},
{KeyStr("a", 86U, kTypeValue), "val"}});
AddMockFile(file4, 2);
auto file5 = mock::MakeMockFile({{KeyStr("b", 85U, kTypeValue), "val"},
{KeyStr("b", 84U, kTypeValue), "val"}});
AddMockFile(file5, 2);
mock::KVVector expected_file1;
mock::KVVector expected_file_disable_dynamic_file_size;
for (int i = 0; i < 8; i++) {
expected_file1.emplace_back(KeyStr("a", 100 - i, kTypeValue),
"val" + std::to_string(100 - i));
expected_file_disable_dynamic_file_size.emplace_back(
KeyStr("a", 100 - i, kTypeValue), "val" + std::to_string(100 - i));
}
// make sure `b` is cut in a separated file (so internally it's not using
// internal comparator, which will think the "b:90" (seqno 90) here is smaller
// than "b:85" on L2.)
auto expected_file2 =
mock::MakeMockFile({{KeyStr("b", 90U, kTypeValue), "valb"}});
expected_file_disable_dynamic_file_size.emplace_back(
KeyStr("b", 90U, kTypeValue), "valb");
SetLastSequence(122U);
const std::vector<int> input_levels = {0, 1};
auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0);
auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1);
// Just keep all the history
std::vector<SequenceNumber> snapshots;
for (int i = 80; i <= 100; i++) {
snapshots.emplace_back(i);
}
if (enable_dyanmic_file_size) {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file1, expected_file2}, snapshots);
} else {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file_disable_dynamic_file_size}, snapshots);
}
}
TEST_P(CompactionJobDynamicFileSizeTest, CutForMaxCompactionBytesSameKey) {
// dynamic_file_size option should have no impact on cutting for max
// compaction bytes.
bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;
NewDB();
mutable_cf_options_.target_file_size_base = 80;
mutable_cf_options_.max_compaction_bytes = 20;
auto file1 = mock::MakeMockFile({{KeyStr("a", 104U, kTypeValue), "val1"},
{KeyStr("b", 103U, kTypeValue), "val"}});
AddMockFile(file1);
auto file2 = mock::MakeMockFile({{KeyStr("a", 102U, kTypeValue), "val2"},
{KeyStr("c", 101U, kTypeValue), "val"}});
AddMockFile(file2, 1);
for (int i = 0; i < 10; i++) {
auto file =
mock::MakeMockFile({{KeyStr("a", 100 - (i * 2), kTypeValue), "val"},
{KeyStr("a", 99 - (i * 2), kTypeValue), "val"}});
AddMockFile(file, 2);
}
for (int i = 0; i < 10; i++) {
auto file =
mock::MakeMockFile({{KeyStr("b", 80 - (i * 2), kTypeValue), "val"},
{KeyStr("b", 79 - (i * 2), kTypeValue), "val"}});
AddMockFile(file, 2);
}
auto file5 = mock::MakeMockFile({{KeyStr("c", 60U, kTypeValue), "valc"},
{KeyStr("c", 59U, kTypeValue), "valc"}});
// "a" has 10 overlapped grandparent files (each size 10), which is far
// exceeded the `max_compaction_bytes`, but make sure 2 "a" are not separated,
// as splitting them won't help reducing the compaction size.
// also make sure "b" and "c" are cut separately.
mock::KVVector expected_file1 =
mock::MakeMockFile({{KeyStr("a", 104U, kTypeValue), "val1"},
{KeyStr("a", 102U, kTypeValue), "val2"}});
mock::KVVector expected_file2 =
mock::MakeMockFile({{KeyStr("b", 103U, kTypeValue), "val"}});
mock::KVVector expected_file3 =
mock::MakeMockFile({{KeyStr("c", 101U, kTypeValue), "val"}});
SetLastSequence(122U);
const std::vector<int> input_levels = {0, 1};
auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0);
auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1);
// Just keep all the history
std::vector<SequenceNumber> snapshots;
for (int i = 80; i <= 105; i++) {
snapshots.emplace_back(i);
}
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file1, expected_file2, expected_file3}, snapshots);
}
INSTANTIATE_TEST_CASE_P(CompactionJobDynamicFileSizeTest, INSTANTIATE_TEST_CASE_P(CompactionJobDynamicFileSizeTest,
CompactionJobDynamicFileSizeTest, testing::Bool()); CompactionJobDynamicFileSizeTest, testing::Bool());

@ -84,25 +84,20 @@ size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
if (grandparents.empty()) { if (grandparents.empty()) {
return curr_key_boundary_switched_num; return curr_key_boundary_switched_num;
} }
assert(!internal_key.empty());
InternalKey ikey;
ikey.DecodeFrom(internal_key);
assert(ikey.Valid());
// TODO: here it uses the internal comparator but Compaction picker uses user const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
// comparator to get a clean cut with `GetOverlappingInputs()`, which means
// this function may cut files still be overlapped in compaction, for example
// current function can generate L1 files like:
// L1: [2-21] [22-30]
// L2: [1-10] [21-30]
// Because L1 `21` has higher seq_number, which is smaller than `21` on L2,
// it cuts in the first file. But for compaction picker L1 [2-21] file
// overlaps with both files on L2. Ideally it should cut to
// L1: [2-20] [21-30]
const InternalKeyComparator* icmp =
&compaction_->column_family_data()->internal_comparator();
// Move the grandparent_index_ to the file containing the current user_key.
// If there are multiple files containing the same user_key, make sure the
// index points to the last file containing the key.
while (grandparent_index_ < grandparents.size()) { while (grandparent_index_ < grandparents.size()) {
if (being_grandparent_gap_) { if (being_grandparent_gap_) {
if (icmp->Compare(internal_key, if (sstableKeyCompare(ucmp, ikey,
grandparents[grandparent_index_]->smallest.Encode()) < grandparents[grandparent_index_]->smallest) < 0) {
0) {
break; break;
} }
if (seen_key_) { if (seen_key_) {
@ -113,9 +108,16 @@ size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
} }
being_grandparent_gap_ = false; being_grandparent_gap_ = false;
} else { } else {
if (icmp->Compare(internal_key, int cmp_result = sstableKeyCompare(
grandparents[grandparent_index_]->largest.Encode()) <= ucmp, ikey, grandparents[grandparent_index_]->largest);
0) { // If it's same key, make sure grandparent_index_ is pointing to the last
// one.
if (cmp_result < 0 ||
(cmp_result == 0 &&
(grandparent_index_ == grandparents.size() - 1 ||
sstableKeyCompare(ucmp, ikey,
grandparents[grandparent_index_ + 1]->smallest) <
0))) {
break; break;
} }
if (seen_key_) { if (seen_key_) {
@ -132,13 +134,55 @@ size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
if (!seen_key_ && !being_grandparent_gap_) { if (!seen_key_ && !being_grandparent_gap_) {
assert(grandparent_overlapped_bytes_ == 0); assert(grandparent_overlapped_bytes_ == 0);
grandparent_overlapped_bytes_ = grandparent_overlapped_bytes_ =
grandparents[grandparent_index_]->fd.GetFileSize(); GetCurrentKeyGrandparentOverlappedBytes(internal_key);
} }
seen_key_ = true; seen_key_ = true;
return curr_key_boundary_switched_num; return curr_key_boundary_switched_num;
} }
uint64_t CompactionOutputs::GetCurrentKeyGrandparentOverlappedBytes(
const Slice& internal_key) const {
// no overlap with any grandparent file
if (being_grandparent_gap_) {
return 0;
}
uint64_t overlapped_bytes = 0;
const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
InternalKey ikey;
ikey.DecodeFrom(internal_key);
#ifndef NDEBUG
// make sure the grandparent_index_ is pointing to the last files containing
// the current key.
int cmp_result =
sstableKeyCompare(ucmp, ikey, grandparents[grandparent_index_]->largest);
assert(
cmp_result < 0 ||
(cmp_result == 0 &&
(grandparent_index_ == grandparents.size() - 1 ||
sstableKeyCompare(
ucmp, ikey, grandparents[grandparent_index_ + 1]->smallest) < 0)));
assert(sstableKeyCompare(ucmp, ikey,
grandparents[grandparent_index_]->smallest) >= 0);
#endif
overlapped_bytes += grandparents[grandparent_index_]->fd.GetFileSize();
// go backwards to find all overlapped files, one key can overlap multiple
// files. In the following example, if the current output key is `c`, and one
// compaction file was cut before `c`, current `c` can overlap with 3 files:
// [a b] [c...
// [b, b] [c, c] [c, c] [c, d]
for (int64_t i = static_cast<int64_t>(grandparent_index_) - 1;
i >= 0 && sstableKeyCompare(ucmp, ikey, grandparents[i]->largest) == 0;
i--) {
overlapped_bytes += grandparents[i]->fd.GetFileSize();
}
return overlapped_bytes;
}
bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) { bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
assert(c_iter.Valid()); assert(c_iter.Valid());
@ -238,7 +282,7 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
if (compaction_->immutable_options()->compaction_style == if (compaction_->immutable_options()->compaction_style ==
kCompactionStyleLevel && kCompactionStyleLevel &&
compaction_->immutable_options()->level_compaction_dynamic_file_size && compaction_->immutable_options()->level_compaction_dynamic_file_size &&
current_output_file_size_ > current_output_file_size_ >=
((compaction_->target_output_file_size() + 99) / 100) * ((compaction_->target_output_file_size() + 99) / 100) *
(50 + std::min(grandparent_boundary_switched_num_ * 5, (50 + std::min(grandparent_boundary_switched_num_ * 5,
size_t{40}))) { size_t{40}))) {
@ -297,13 +341,9 @@ Status CompactionOutputs::AddToOutput(
return s; return s;
} }
// reset grandparent information // reset grandparent information
const std::vector<FileMetaData*>& grandparents =
compaction_->grandparents();
grandparent_overlapped_bytes_ =
being_grandparent_gap_
? 0
: grandparents[grandparent_index_]->fd.GetFileSize();
grandparent_boundary_switched_num_ = 0; grandparent_boundary_switched_num_ = 0;
grandparent_overlapped_bytes_ =
GetCurrentKeyGrandparentOverlappedBytes(key);
} }
// Open output file if necessary // Open output file if necessary

@ -227,6 +227,11 @@ class CompactionOutputs {
// It returns how many boundaries it crosses by including current key. // It returns how many boundaries it crosses by including current key.
size_t UpdateGrandparentBoundaryInfo(const Slice& internal_key); size_t UpdateGrandparentBoundaryInfo(const Slice& internal_key);
// helper function to get the overlapped grandparent files size, it's only
// used for calculating the first key's overlap.
uint64_t GetCurrentKeyGrandparentOverlappedBytes(
const Slice& internal_key) const;
// Add current key from compaction_iterator to the output file. If needed // Add current key from compaction_iterator to the output file. If needed
// close and open new compaction output with the functions provided. // close and open new compaction output with the functions provided.
Status AddToOutput(const CompactionIterator& c_iter, Status AddToOutput(const CompactionIterator& c_iter,

@ -1726,17 +1726,16 @@ TEST_F(DBRangeDelTest, OverlappedKeys) {
ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK(db_->Flush(FlushOptions()));
ASSERT_EQ(1, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(0));
// The key range is broken up into 4 SSTs to avoid a future big compaction // The key range is broken up into three SSTs to avoid a future big compaction
// with the grandparent // with the grandparent
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
true /* disallow_trivial_move */)); true /* disallow_trivial_move */));
ASSERT_EQ(4, NumTableFilesAtLevel(1)); ASSERT_EQ(3, NumTableFilesAtLevel(1));
ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
true /* disallow_trivial_move */)); true /* disallow_trivial_move */));
// L1->L2 compaction outputs to 2 files because there are 2 separated // L1->L2 compaction size is limited to max_compaction_bytes
// compactions: [0-4] and [5-9] ASSERT_EQ(3, NumTableFilesAtLevel(2));
ASSERT_EQ(2, NumTableFilesAtLevel(2));
ASSERT_EQ(0, NumTableFilesAtLevel(1)); ASSERT_EQ(0, NumTableFilesAtLevel(1));
} }
@ -1985,6 +1984,40 @@ TEST_F(DBRangeDelTest, TombstoneFromCurrentLevel) {
delete iter; delete iter;
} }
class TombstoneTestSstPartitioner : public SstPartitioner {
public:
const char* Name() const override { return "SingleKeySstPartitioner"; }
PartitionerResult ShouldPartition(
const PartitionerRequest& request) override {
if (cmp->Compare(*request.current_user_key, DBTestBase::Key(5)) == 0) {
return kRequired;
} else {
return kNotRequired;
}
}
bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
const Slice& /*largest_user_key*/) override {
return false;
}
const Comparator* cmp = BytewiseComparator();
};
class TombstoneTestSstPartitionerFactory : public SstPartitionerFactory {
public:
static const char* kClassName() {
return "TombstoneTestSstPartitionerFactory";
}
const char* Name() const override { return kClassName(); }
std::unique_ptr<SstPartitioner> CreatePartitioner(
const SstPartitioner::Context& /* context */) const override {
return std::unique_ptr<SstPartitioner>(new TombstoneTestSstPartitioner());
}
};
TEST_F(DBRangeDelTest, TombstoneAcrossFileBoundary) { TEST_F(DBRangeDelTest, TombstoneAcrossFileBoundary) {
// Verify that a range tombstone across file boundary covers keys from older // Verify that a range tombstone across file boundary covers keys from older
// levels. Test set up: // levels. Test set up:
@ -1998,11 +2031,17 @@ TEST_F(DBRangeDelTest, TombstoneAcrossFileBoundary) {
options.target_file_size_base = 2 * 1024; options.target_file_size_base = 2 * 1024;
options.max_compaction_bytes = 2 * 1024; options.max_compaction_bytes = 2 * 1024;
// Make sure L1 files are split before "5"
auto factory = std::make_shared<TombstoneTestSstPartitionerFactory>();
options.sst_partitioner_factory = factory;
DestroyAndReopen(options); DestroyAndReopen(options);
Random rnd(301); Random rnd(301);
// L2 // L2
ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(1 << 10))); // the file should be smaller than max_compaction_bytes, otherwise the file
// will be cut before 7.
ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(1 << 9)));
ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK(db_->Flush(FlushOptions()));
MoveFilesToLevel(2); MoveFilesToLevel(2);
ASSERT_EQ(1, NumTableFilesAtLevel(2)); ASSERT_EQ(1, NumTableFilesAtLevel(2));

Loading…
Cancel
Save