Replace the output split key with its pointer in subcompaction (#10316)

Summary:
Earlier implementation of cutting the output files with a compact cursor under Round-Robin priority uses `Valid()` to determine if the `output_split_key` is valid in `ShouldStopBefore`. This contributes to excessive CPU computation, as pointed out by [this issue](https://github.com/facebook/rocksdb/issues/10315). In this PR, we change the type of `output_split_key` to be `InternalKey*` and set it as `nullptr` if it is not going to be used in `ShouldStopBefore`, `Valid()` condition checking can be avoided using that pointer.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10316

Reviewed By: ajkr

Differential Revision: D37661492

Pulled By: littlepig2013

fbshipit-source-id: 66ff1105f3378e5573d3a126fdaff9bb23b5498f
main
zczhu 2 years ago committed by Facebook GitHub Bot
parent e6c5e0ab9a
commit 8debfe2b21
  1. 13
      db/compaction/compaction.cc
  2. 4
      db/compaction/compaction.h
  3. 39
      db/compaction/compaction_job.cc

@ -285,13 +285,13 @@ Compaction::Compaction(
// Every compaction regardless of any compaction reason may respect the // Every compaction regardless of any compaction reason may respect the
// existing compact cursor in the output level to split output files // existing compact cursor in the output level to split output files
InternalKey temp_split_key = InternalKey(); output_split_key_ = nullptr;
if (immutable_options_.compaction_style == kCompactionStyleLevel && if (immutable_options_.compaction_style == kCompactionStyleLevel &&
immutable_options_.compaction_pri == kRoundRobin) { immutable_options_.compaction_pri == kRoundRobin) {
const InternalKey cursor = const InternalKey* cursor =
input_vstorage_->GetCompactCursors()[output_level_]; &input_vstorage_->GetCompactCursors()[output_level_];
if (cursor.Valid()) { if (cursor->size() != 0) {
const Slice& cursor_user_key = ExtractUserKey(cursor.Encode()); const Slice& cursor_user_key = ExtractUserKey(cursor->Encode());
auto ucmp = vstorage->InternalComparator()->user_comparator(); auto ucmp = vstorage->InternalComparator()->user_comparator();
// May split output files according to the cursor if it in the user-key // May split output files according to the cursor if it in the user-key
// range // range
@ -299,11 +299,10 @@ Compaction::Compaction(
0 && 0 &&
ucmp->CompareWithoutTimestamp(cursor_user_key, largest_user_key_) <= ucmp->CompareWithoutTimestamp(cursor_user_key, largest_user_key_) <=
0) { 0) {
temp_split_key = cursor; output_split_key_ = cursor;
} }
} }
} }
output_split_key_ = temp_split_key;
} }
Compaction::~Compaction() { Compaction::~Compaction() {

@ -182,7 +182,7 @@ class Compaction {
// split the output files according to the existing cursor in the output // split the output files according to the existing cursor in the output
// level under round-robin compaction policy. Empty indicates no required // level under round-robin compaction policy. Empty indicates no required
// splitting key // splitting key
const InternalKey GetOutputSplitKey() const { return output_split_key_; } const InternalKey* GetOutputSplitKey() const { return output_split_key_; }
// If true, then the compaction can be done by simply deleting input files. // If true, then the compaction can be done by simply deleting input files.
bool deletion_compaction() const { return deletion_compaction_; } bool deletion_compaction() const { return deletion_compaction_; }
@ -387,7 +387,7 @@ class Compaction {
// If true, then the compaction can be done by simply deleting input files. // If true, then the compaction can be done by simply deleting input files.
const bool deletion_compaction_; const bool deletion_compaction_;
// should it split the output file using the compact cursor? // should it split the output file using the compact cursor?
InternalKey output_split_key_; const InternalKey* output_split_key_;
// L0 files in LSM-tree might be overlapping. But the compaction picking // L0 files in LSM-tree might be overlapping. But the compaction picking
// logic might pick a subset of the files that aren't overlapping. if // logic might pick a subset of the files that aren't overlapping. if

@ -203,6 +203,9 @@ struct CompactionJob::SubcompactionState {
// A flag determines if this subcompaction has been split by the cursor // A flag determines if this subcompaction has been split by the cursor
bool is_split = false; bool is_split = false;
// We also maintain the output split key for each subcompaction to avoid
// repetitive comparison in ShouldStopBefore()
const InternalKey* local_output_split_key = nullptr;
SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size, SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size,
uint32_t _sub_job_id) uint32_t _sub_job_id)
@ -212,6 +215,21 @@ struct CompactionJob::SubcompactionState {
approx_size(size), approx_size(size),
sub_job_id(_sub_job_id) { sub_job_id(_sub_job_id) {
assert(compaction != nullptr); assert(compaction != nullptr);
const InternalKeyComparator* icmp =
&compaction->column_family_data()->internal_comparator();
const InternalKey* output_split_key = compaction->GetOutputSplitKey();
// Invalid output_split_key indicates that we do not need to split
if (output_split_key != nullptr) {
// We may only split the output when the cursor is in the range. Split
if ((end == nullptr || icmp->user_comparator()->Compare(
ExtractUserKey(output_split_key->Encode()),
ExtractUserKey(*end)) < 0) &&
(start == nullptr || icmp->user_comparator()->Compare(
ExtractUserKey(output_split_key->Encode()),
ExtractUserKey(*start)) > 0)) {
local_output_split_key = output_split_key;
}
}
} }
// Adds the key and value to the builder // Adds the key and value to the builder
@ -237,21 +255,12 @@ struct CompactionJob::SubcompactionState {
&compaction->column_family_data()->internal_comparator(); &compaction->column_family_data()->internal_comparator();
const std::vector<FileMetaData*>& grandparents = compaction->grandparents(); const std::vector<FileMetaData*>& grandparents = compaction->grandparents();
const InternalKey output_split_key = compaction->GetOutputSplitKey(); // Invalid local_output_split_key indicates that we do not need to split
if (output_split_key.Valid() && !is_split) { if (local_output_split_key != nullptr && !is_split) {
// Invalid output_split_key indicates that we do not need to split // Split occurs when the next key is larger than/equal to the cursor
if ((end == nullptr || icmp->user_comparator()->Compare( if (icmp->Compare(internal_key, local_output_split_key->Encode()) >= 0) {
ExtractUserKey(output_split_key.Encode()), is_split = true;
ExtractUserKey(*end)) < 0) && return true;
(start == nullptr || icmp->user_comparator()->Compare(
ExtractUserKey(output_split_key.Encode()),
ExtractUserKey(*start)) > 0)) {
// We may only split the output when the cursor is in the range. Split
// occurs when the next key is larger than/equal to the cursor
if (icmp->Compare(internal_key, output_split_key.Encode()) >= 0) {
is_split = true;
return true;
}
} }
} }
bool grandparant_file_switched = false; bool grandparant_file_switched = false;

Loading…
Cancel
Save