Use actual file size when checking max_compaction_size (#10728)

Summary:
currently, there are places in compaction_picker where we add up `compensated_file_size` of files being compacted and limit the sum to be under `max_compaction_bytes`. `compensated_file_size` contains booster for point tombstones and should be used only for determining file's compaction priority. This PR replaces `compensated_file_size` with actual file size in such places.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10728

Test Plan: CI

Reviewed By: ajkr

Differential Revision: D39789427

Pulled By: cbi42

fbshipit-source-id: 1f89fb6c0159c53bf01d8dc783f465959f442c81
main
Changyu Bi 2 years ago committed by Facebook GitHub Bot
parent f3cc66632b
commit fd71a82f4f
  1. 30
      db/compaction/compaction_picker.cc
  2. 8
      db/compaction/compaction_picker_level.cc
  3. 2
      include/rocksdb/universal_compaction.h

@ -27,16 +27,6 @@
namespace ROCKSDB_NAMESPACE {
namespace {
uint64_t TotalCompensatedFileSize(const std::vector<FileMetaData*>& files) {
uint64_t sum = 0;
for (size_t i = 0; i < files.size() && files[i]; i++) {
sum += files[i]->compensated_file_size;
}
return sum;
}
} // anonymous namespace
bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
size_t min_files_to_compact,
uint64_t max_compact_bytes_per_del_file,
@ -63,8 +53,6 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
return false;
}
size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
uint64_t compensated_compact_bytes =
level_files[start]->compensated_file_size;
size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
// Compaction range will be [start, limit).
size_t limit;
@ -73,11 +61,10 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
size_t new_compact_bytes_per_del_file = 0;
for (limit = start + 1; limit < level_files.size(); ++limit) {
compact_bytes += static_cast<size_t>(level_files[limit]->fd.file_size);
compensated_compact_bytes += level_files[limit]->compensated_file_size;
new_compact_bytes_per_del_file = compact_bytes / (limit - start);
if (level_files[limit]->being_compacted ||
new_compact_bytes_per_del_file > compact_bytes_per_del_file ||
compensated_compact_bytes > max_compaction_bytes) {
compact_bytes > max_compaction_bytes) {
break;
}
compact_bytes_per_del_file = new_compact_bytes_per_del_file;
@ -507,8 +494,8 @@ bool CompactionPicker::SetupOtherInputs(
if (!output_level_inputs->empty()) {
const uint64_t limit = mutable_cf_options.max_compaction_bytes;
const uint64_t output_level_inputs_size =
TotalCompensatedFileSize(output_level_inputs->files);
const uint64_t inputs_size = TotalCompensatedFileSize(inputs->files);
TotalFileSize(output_level_inputs->files);
const uint64_t inputs_size = TotalFileSize(inputs->files);
bool expand_inputs = false;
CompactionInputFiles expanded_inputs;
@ -527,8 +514,7 @@ bool CompactionPicker::SetupOtherInputs(
&expanded_inputs.files, base_index,
nullptr);
}
uint64_t expanded_inputs_size =
TotalCompensatedFileSize(expanded_inputs.files);
uint64_t expanded_inputs_size = TotalFileSize(expanded_inputs.files);
if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) {
try_overlapping_inputs = false;
}
@ -554,7 +540,7 @@ bool CompactionPicker::SetupOtherInputs(
vstorage->GetCleanInputsWithinInterval(input_level, &all_start,
&all_limit, &expanded_inputs.files,
base_index, nullptr);
expanded_inputs_size = TotalCompensatedFileSize(expanded_inputs.files);
expanded_inputs_size = TotalFileSize(expanded_inputs.files);
if (expanded_inputs.size() > inputs->size() &&
output_level_inputs_size + expanded_inputs_size < limit &&
!AreFilesInCompaction(expanded_inputs.files)) {
@ -724,18 +710,18 @@ Compaction* CompactionPicker::CompactRange(
}
largest = &inputs[i]->largest;
uint64_t s = inputs[i]->compensated_file_size;
uint64_t input_file_size = inputs[i]->fd.GetFileSize();
uint64_t output_level_total = 0;
if (output_level < vstorage->num_non_empty_levels()) {
std::vector<FileMetaData*> files;
vstorage->GetOverlappingInputsRangeBinarySearch(
output_level, smallest, largest, &files, hint_index, &hint_index);
for (const auto& file : files) {
output_level_total += file->compensated_file_size;
output_level_total += file->fd.GetFileSize();
}
}
input_level_total += s;
input_level_total += input_file_size;
if (input_level_total + output_level_total >= limit) {
covering_the_whole_range = false;

@ -318,7 +318,7 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
// Constraint 3 (pre-calculate the ideal max bytes to compact)
for (auto f : level_files) {
if (!f->being_compacted) {
start_lvl_bytes_no_compacting += f->compensated_file_size;
start_lvl_bytes_no_compacting += f->fd.GetFileSize();
}
}
if (start_lvl_bytes_no_compacting >
@ -341,7 +341,7 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
}
}
// Constraint 3
if (start_level_inputs_[0]->compensated_file_size >=
if (start_level_inputs_[0]->fd.GetFileSize() >=
start_lvl_max_bytes_to_compact) {
return;
}
@ -368,7 +368,7 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
curr_bytes_to_compact = 0;
for (auto start_lvl_f : tmp_start_level_inputs.files) {
curr_bytes_to_compact += start_lvl_f->compensated_file_size;
curr_bytes_to_compact += start_lvl_f->fd.GetFileSize();
}
// Check whether any output level files are locked
@ -385,7 +385,7 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
uint64_t start_lvl_curr_bytes_to_compact = curr_bytes_to_compact;
for (auto output_lvl_f : output_level_inputs.files) {
curr_bytes_to_compact += output_lvl_f->compensated_file_size;
curr_bytes_to_compact += output_lvl_f->fd.GetFileSize();
}
if (curr_bytes_to_compact > mutable_cf_options_.max_compaction_bytes) {
// Constraint 2

@ -59,7 +59,7 @@ class CompactionOptionsUniversal {
// A1...An B1...Bm C1...Ct
// where A1 is the newest and Ct is the oldest, and we are going to compact
// B1...Bm, we calculate the total size of all the files as total_size, as
// well as the total size of C1...Ct as total_C, the compaction output file
// well as the total size of C1...Ct as total_C, the compaction output file
// will be compressed iff
// total_C / total_size < this percentage
// Default: -1

Loading…
Cancel
Save