From e0abec15805939de64d87bafab70ee147b5b97a5 Mon Sep 17 00:00:00 2001
From: Zhipeng Jia <zhipeng.jia@outlook.com>
Date: Tue, 22 Dec 2015 14:34:57 +0800
Subject: [PATCH] Sorting std::vector instead of using std::set

---
 db/compaction_job.cc | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/db/compaction_job.cc b/db/compaction_job.cc
index 4806eff7e..5bcf4cd29 100644
--- a/db/compaction_job.cc
+++ b/db/compaction_job.cc
@@ -334,11 +334,6 @@ struct RangeWithSize {
       : range(a, b), size(s) {}
 };
 
-bool SliceCompare(const Comparator* cmp, const Slice& a, const Slice& b) {
-  // Returns true if a < b
-  return cmp->Compare(ExtractUserKey(a), ExtractUserKey(b)) < 0;
-}
-
 // Generates a histogram representing potential divisions of key ranges from
 // the input. It adds the starting and/or ending keys of certain input files
 // to the working set and then finds the approximate size of data in between
@@ -347,14 +342,13 @@ bool SliceCompare(const Comparator* cmp, const Slice& a, const Slice& b) {
 void CompactionJob::GenSubcompactionBoundaries() {
   auto* c = compact_->compaction;
   auto* cfd = c->column_family_data();
-  std::set<Slice, std::function<bool(const Slice& a, const Slice& b)> > bounds(
-      std::bind(&SliceCompare, cfd->user_comparator(), std::placeholders::_1,
-                std::placeholders::_2));
+  const Comparator* cfd_comparator = cfd->user_comparator();
+  std::vector<Slice> bounds;
   int start_lvl = c->start_level();
   int out_lvl = c->output_level();
 
   // Add the starting and/or ending key of certain input files as a potential
-  // boundary (because we're inserting into a set, it avoids duplicates)
+  // boundary
   for (size_t lvl_idx = 0; lvl_idx < c->num_input_levels(); lvl_idx++) {
     int lvl = c->level(lvl_idx);
     if (lvl >= start_lvl && lvl <= out_lvl) {
@@ -369,27 +363,37 @@ void CompactionJob::GenSubcompactionBoundaries() {
         // For level 0 add the starting and ending key of each file since the
         // files may have greatly differing key ranges (not range-partitioned)
         for (size_t i = 0; i < num_files; i++) {
-          bounds.emplace(flevel->files[i].smallest_key);
-          bounds.emplace(flevel->files[i].largest_key);
+          bounds.emplace_back(flevel->files[i].smallest_key);
+          bounds.emplace_back(flevel->files[i].largest_key);
         }
       } else {
         // For all other levels add the smallest/largest key in the level to
         // encompass the range covered by that level
-        bounds.emplace(flevel->files[0].smallest_key);
-        bounds.emplace(flevel->files[num_files - 1].largest_key);
+        bounds.emplace_back(flevel->files[0].smallest_key);
+        bounds.emplace_back(flevel->files[num_files - 1].largest_key);
         if (lvl == out_lvl) {
           // For the last level include the starting keys of all files since
           // the last level is the largest and probably has the widest key
           // range. Since it's range partitioned, the ending key of one file
           // and the starting key of the next are very close (or identical).
           for (size_t i = 1; i < num_files; i++) {
-            bounds.emplace(flevel->files[i].smallest_key);
+            bounds.emplace_back(flevel->files[i].smallest_key);
           }
         }
       }
     }
   }
 
+  std::sort(bounds.begin(), bounds.end(),
+    [cfd_comparator] (const Slice& a, const Slice& b) -> bool {
+      return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) < 0;
+    });
+  // Remove duplicated entries from bounds
+  bounds.erase(std::unique(bounds.begin(), bounds.end(),
+    [cfd_comparator] (const Slice& a, const Slice& b) -> bool {
+      return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) == 0;
+    }), bounds.end());
+
   // Combine consecutive pairs of boundaries into ranges with an approximate
   // size of data covered by keys in that range
   uint64_t sum = 0;