diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 4806eff7e..5bcf4cd29 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -334,11 +334,6 @@ struct RangeWithSize { : range(a, b), size(s) {} }; -bool SliceCompare(const Comparator* cmp, const Slice& a, const Slice& b) { - // Returns true if a < b - return cmp->Compare(ExtractUserKey(a), ExtractUserKey(b)) < 0; -} - // Generates a histogram representing potential divisions of key ranges from // the input. It adds the starting and/or ending keys of certain input files // to the working set and then finds the approximate size of data in between @@ -347,14 +342,13 @@ bool SliceCompare(const Comparator* cmp, const Slice& a, const Slice& b) { void CompactionJob::GenSubcompactionBoundaries() { auto* c = compact_->compaction; auto* cfd = c->column_family_data(); - std::set > bounds( - std::bind(&SliceCompare, cfd->user_comparator(), std::placeholders::_1, - std::placeholders::_2)); + const Comparator* cfd_comparator = cfd->user_comparator(); + std::vector bounds; int start_lvl = c->start_level(); int out_lvl = c->output_level(); // Add the starting and/or ending key of certain input files as a potential - // boundary (because we're inserting into a set, it avoids duplicates) + // boundary for (size_t lvl_idx = 0; lvl_idx < c->num_input_levels(); lvl_idx++) { int lvl = c->level(lvl_idx); if (lvl >= start_lvl && lvl <= out_lvl) { @@ -369,27 +363,37 @@ void CompactionJob::GenSubcompactionBoundaries() { // For level 0 add the starting and ending key of each file since the // files may have greatly differing key ranges (not range-partitioned) for (size_t i = 0; i < num_files; i++) { - bounds.emplace(flevel->files[i].smallest_key); - bounds.emplace(flevel->files[i].largest_key); + bounds.emplace_back(flevel->files[i].smallest_key); + bounds.emplace_back(flevel->files[i].largest_key); } } else { // For all other levels add the smallest/largest key in the level to // encompass the range covered by that level - bounds.emplace(flevel->files[0].smallest_key); - bounds.emplace(flevel->files[num_files - 1].largest_key); + bounds.emplace_back(flevel->files[0].smallest_key); + bounds.emplace_back(flevel->files[num_files - 1].largest_key); if (lvl == out_lvl) { // For the last level include the starting keys of all files since // the last level is the largest and probably has the widest key // range. Since it's range partitioned, the ending key of one file // and the starting key of the next are very close (or identical). for (size_t i = 1; i < num_files; i++) { - bounds.emplace(flevel->files[i].smallest_key); + bounds.emplace_back(flevel->files[i].smallest_key); } } } } } + std::sort(bounds.begin(), bounds.end(), + [cfd_comparator] (const Slice& a, const Slice& b) -> bool { + return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) < 0; + }); + // Remove duplicated entries from bounds + bounds.erase(std::unique(bounds.begin(), bounds.end(), + [cfd_comparator] (const Slice& a, const Slice& b) -> bool { + return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) == 0; + }), bounds.end()); + // Combine consecutive pairs of boundaries into ranges with an approximate // size of data covered by keys in that range uint64_t sum = 0;