Cache heap::downheap() root comparison (optimize heap cmp call)

Summary:
Reduce number of comparisons in heap by caching which child node in the first level is smallest (left_child or right_child)
So next time we can compare directly against the smallest child

I see that the total number of calls to comparator drops significantly when using this optimization

Before caching (~2mil key comparison for iterating the DB)
```
$ DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readseq" --db="/dev/shm/heap_opt" --use_existing_db --disable_auto_compactions --cache_size=1000000000  --perf_level=2
readseq      :       0.338 micros/op 2959201 ops/sec;  327.4 MB/s user_key_comparison_count = 2000008
```
After caching (~1mil key comparison for iterating the DB)
```
$ DEBUG_LEVEL=0 make db_bench -j64 && ./db_bench --benchmarks="readseq" --db="/dev/shm/heap_opt" --use_existing_db --disable_auto_compactions --cache_size=1000000000 --perf_level=2
readseq      :       0.309 micros/op 3236801 ops/sec;  358.1 MB/s user_key_comparison_count = 1000011
```

It also improves
Closes https://github.com/facebook/rocksdb/pull/1600

Differential Revision: D4256027

Pulled By: IslamAbdelRahman

fbshipit-source-id: 76fcc66
main
Islam AbdelRahman 8 years ago committed by Facebook Github Bot
parent e39d080871
commit 4a21b1402c
  1. 3
      tools/db_bench_tool.cc
  2. 32
      util/heap.h

@ -3589,6 +3589,9 @@ class Benchmark {
}
delete iter;
thread->stats.AddBytes(bytes);
if (FLAGS_perf_level > rocksdb::PerfLevel::kDisable) {
thread->stats.AddMessage(perf_context.ToString());
}
}
void ReadReverse(ThreadState* thread) {

@ -8,6 +8,7 @@
#include <algorithm>
#include <cstdint>
#include <functional>
#include "port/port.h"
#include "util/autovector.h"
namespace rocksdb {
@ -75,22 +76,28 @@ class BinaryHeap {
data_.pop_back();
if (!empty()) {
downheap(get_root());
} else {
reset_root_cmp_cache();
}
}
void swap(BinaryHeap &other) {
std::swap(cmp_, other.cmp_);
data_.swap(other.data_);
std::swap(root_cmp_cache_, other.root_cmp_cache_);
}
void clear() {
data_.clear();
reset_root_cmp_cache();
}
bool empty() const {
return data_.empty();
}
void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; }
private:
static inline size_t get_root() { return 0; }
static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
@ -108,10 +115,13 @@ class BinaryHeap {
index = parent;
}
data_[index] = std::move(v);
reset_root_cmp_cache();
}
void downheap(size_t index) {
T v = std::move(data_[index]);
size_t picked_child = port::kMaxSizet;
while (1) {
const size_t left_child = get_left(index);
if (get_left(index) >= data_.size()) {
@ -119,9 +129,11 @@ class BinaryHeap {
}
const size_t right_child = left_child + 1;
assert(right_child == get_right(index));
size_t picked_child = left_child;
if (right_child < data_.size() &&
cmp_(data_[left_child], data_[right_child])) {
picked_child = left_child;
if (index == 0 && root_cmp_cache_ < data_.size()) {
picked_child = root_cmp_cache_;
} else if (right_child < data_.size() &&
cmp_(data_[left_child], data_[right_child])) {
picked_child = right_child;
}
if (!cmp_(v, data_[picked_child])) {
@ -130,11 +142,25 @@ class BinaryHeap {
data_[index] = std::move(data_[picked_child]);
index = picked_child;
}
if (index == 0) {
// We did not change anything in the tree except for the value
// of the root node, left and right child did not change, we can
// cache that `picked_child` is the smallest child
// so next time we compare againist it directly
root_cmp_cache_ = picked_child;
} else {
// the tree changed, reset cache
reset_root_cmp_cache();
}
data_[index] = std::move(v);
}
Compare cmp_;
autovector<T> data_;
// Used to reduce number of cmp_ calls in downheap()
size_t root_cmp_cache_ = port::kMaxSizet;
};
} // namespace rocksdb

Loading…
Cancel
Save