Summary: Repeat ofmainb6655a679d
(reverted inb7a2369fb2
) with a proper fix for the issue that57d216ea65
was trying to fix. Test Plan: make check for i in $(seq 100); do ./db_stress --test_batches_snapshots=1 --threads=32 --write_buffer_size=4194304 --destroy_db_initially=0 --reopen=20 --readpercent=45 --prefixpercent=5 --writepercent=35 --delpercent=5 --iterpercent=10 --db=/tmp/rocksdb_crashtest_KdCI5F --max_key=100000000 --mmap_read=0 --block_size=16384 --cache_size=1048576 --open_files=500000 --verify_checksum=1 --sync=0 --progress_reports=0 --disable_wal=0 --disable_data_sync=1 --target_file_size_base=2097152 --target_file_size_multiplier=2 --max_write_buffer_number=3 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --filter_deletes=0 --memtablerep=prefix_hash --prefix_size=7 --ops_per_thread=200 || break; done Reviewers: anthony, sdong, igor, yhchiang Reviewed By: igor, yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D41391
parent
9a6a0bd8c9
commit
e1c99e10c1
@ -0,0 +1,140 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <algorithm> |
||||||
|
#include <cstdint> |
||||||
|
#include <functional> |
||||||
|
#include "util/autovector.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// Binary heap implementation optimized for use in multi-way merge sort.
|
||||||
|
// Comparison to std::priority_queue:
|
||||||
|
// - In libstdc++, std::priority_queue::pop() usually performs just over logN
|
||||||
|
// comparisons but never fewer.
|
||||||
|
// - std::priority_queue does not have a replace-top operation, requiring a
|
||||||
|
// pop+push. If the replacement element is the new top, this requires
|
||||||
|
// around 2logN comparisons.
|
||||||
|
// - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
|
||||||
|
// comparisons.
|
||||||
|
// - This heap provides a replace_top() operation which requires [1, 2logN]
|
||||||
|
// comparisons. When the replacement element is also the new top, this
|
||||||
|
// takes just 1 or 2 comparisons.
|
||||||
|
//
|
||||||
|
// The last property can yield an order-of-magnitude performance improvement
|
||||||
|
// when merge-sorting real-world non-random data. If the merge operation is
|
||||||
|
// likely to take chunks of elements from the same input stream, only 1
|
||||||
|
// comparison per element is needed. In RocksDB-land, this happens when
|
||||||
|
// compacting a database where keys are not randomly distributed across L0
|
||||||
|
// files but nearby keys are likely to be in the same L0 file.
|
||||||
|
//
|
||||||
|
// The container uses the same counterintuitive ordering as
|
||||||
|
// std::priority_queue: the comparison operator is expected to provide the
|
||||||
|
// less-than relation, but top() will return the maximum.
|
||||||
|
|
||||||
|
template<typename T, typename Compare = std::less<T>> |
||||||
|
class BinaryHeap { |
||||||
|
public: |
||||||
|
BinaryHeap() { } |
||||||
|
explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { } |
||||||
|
|
||||||
|
void push(const T& value) { |
||||||
|
data_.push_back(value); |
||||||
|
upheap(data_.size() - 1); |
||||||
|
} |
||||||
|
|
||||||
|
void push(T&& value) { |
||||||
|
data_.push_back(std::move(value)); |
||||||
|
upheap(data_.size() - 1); |
||||||
|
} |
||||||
|
|
||||||
|
const T& top() const { |
||||||
|
assert(!empty()); |
||||||
|
return data_.front(); |
||||||
|
} |
||||||
|
|
||||||
|
void replace_top(const T& value) { |
||||||
|
assert(!empty()); |
||||||
|
data_.front() = value; |
||||||
|
downheap(get_root()); |
||||||
|
} |
||||||
|
|
||||||
|
void replace_top(T&& value) { |
||||||
|
assert(!empty()); |
||||||
|
data_.front() = std::move(value); |
||||||
|
downheap(get_root()); |
||||||
|
} |
||||||
|
|
||||||
|
void pop() { |
||||||
|
assert(!empty()); |
||||||
|
data_.front() = std::move(data_.back()); |
||||||
|
data_.pop_back(); |
||||||
|
if (!empty()) { |
||||||
|
downheap(get_root()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void swap(BinaryHeap &other) { |
||||||
|
std::swap(cmp_, other.cmp_); |
||||||
|
data_.swap(other.data_); |
||||||
|
} |
||||||
|
|
||||||
|
void clear() { |
||||||
|
data_.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
bool empty() const { |
||||||
|
return data_.empty(); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
static inline size_t get_root() { return 0; } |
||||||
|
static inline size_t get_parent(size_t index) { return (index - 1) / 2; } |
||||||
|
static inline size_t get_left(size_t index) { return 2 * index + 1; } |
||||||
|
static inline size_t get_right(size_t index) { return 2 * index + 2; } |
||||||
|
|
||||||
|
void upheap(size_t index) { |
||||||
|
T v = std::move(data_[index]); |
||||||
|
while (index > get_root()) { |
||||||
|
const size_t parent = get_parent(index); |
||||||
|
if (!cmp_(data_[parent], v)) { |
||||||
|
break; |
||||||
|
} |
||||||
|
data_[index] = std::move(data_[parent]); |
||||||
|
index = parent; |
||||||
|
} |
||||||
|
data_[index] = std::move(v); |
||||||
|
} |
||||||
|
|
||||||
|
void downheap(size_t index) { |
||||||
|
T v = std::move(data_[index]); |
||||||
|
while (1) { |
||||||
|
const size_t left_child = get_left(index); |
||||||
|
if (get_left(index) >= data_.size()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
const size_t right_child = left_child + 1; |
||||||
|
assert(right_child == get_right(index)); |
||||||
|
size_t picked_child = left_child; |
||||||
|
if (right_child < data_.size() && |
||||||
|
cmp_(data_[left_child], data_[right_child])) { |
||||||
|
picked_child = right_child; |
||||||
|
} |
||||||
|
if (!cmp_(v, data_[picked_child])) { |
||||||
|
break; |
||||||
|
} |
||||||
|
data_[index] = std::move(data_[picked_child]); |
||||||
|
index = picked_child; |
||||||
|
} |
||||||
|
data_[index] = std::move(v); |
||||||
|
} |
||||||
|
|
||||||
|
Compare cmp_; |
||||||
|
autovector<T> data_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,138 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <gtest/gtest.h> |
||||||
|
|
||||||
|
#include <climits> |
||||||
|
|
||||||
|
#include <queue> |
||||||
|
#include <utility> |
||||||
|
|
||||||
|
#include "util/heap.h" |
||||||
|
|
||||||
|
#ifndef GFLAGS |
||||||
|
const int64_t FLAGS_iters = 100000; |
||||||
|
#else |
||||||
|
#include <gflags/gflags.h> |
||||||
|
DEFINE_int64(iters, 100000, "number of pseudo-random operations in each test"); |
||||||
|
#endif // GFLAGS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compares the custom heap implementation in util/heap.h against |
||||||
|
* std::priority_queue on a pseudo-random sequence of operations. |
||||||
|
*/ |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
using HeapTestValue = uint64_t; |
||||||
|
using Params = std::tuple<size_t, HeapTestValue, int64_t>; |
||||||
|
|
||||||
|
class HeapTest : public ::testing::TestWithParam<Params> { |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(HeapTest, Test) { |
||||||
|
// This test performs the same pseudorandom sequence of operations on a
|
||||||
|
// BinaryHeap and an std::priority_queue, comparing output. The three
|
||||||
|
// possible operations are insert, replace top and pop.
|
||||||
|
//
|
||||||
|
// Insert is chosen slightly more often than the others so that the size of
|
||||||
|
// the heap slowly grows. Once the size heats the MAX_HEAP_SIZE limit, we
|
||||||
|
// disallow inserting until the heap becomes empty, testing the "draining"
|
||||||
|
// scenario.
|
||||||
|
|
||||||
|
const auto MAX_HEAP_SIZE = std::get<0>(GetParam()); |
||||||
|
const auto MAX_VALUE = std::get<1>(GetParam()); |
||||||
|
const auto RNG_SEED = std::get<2>(GetParam()); |
||||||
|
|
||||||
|
BinaryHeap<HeapTestValue> heap; |
||||||
|
std::priority_queue<HeapTestValue> ref; |
||||||
|
|
||||||
|
std::mt19937 rng(RNG_SEED); |
||||||
|
std::uniform_int_distribution<HeapTestValue> value_dist(0, MAX_VALUE); |
||||||
|
int ndrains = 0; |
||||||
|
bool draining = false; // hit max size, draining until we empty the heap
|
||||||
|
size_t size = 0; |
||||||
|
for (int64_t i = 0; i < FLAGS_iters; ++i) { |
||||||
|
if (size == 0) { |
||||||
|
draining = false; |
||||||
|
} |
||||||
|
|
||||||
|
if (!draining && |
||||||
|
(size == 0 || std::bernoulli_distribution(0.4)(rng))) { |
||||||
|
// insert
|
||||||
|
HeapTestValue val = value_dist(rng); |
||||||
|
heap.push(val); |
||||||
|
ref.push(val); |
||||||
|
++size; |
||||||
|
if (size == MAX_HEAP_SIZE) { |
||||||
|
draining = true; |
||||||
|
++ndrains; |
||||||
|
} |
||||||
|
} else if (std::bernoulli_distribution(0.5)(rng)) { |
||||||
|
// replace top
|
||||||
|
HeapTestValue val = value_dist(rng); |
||||||
|
heap.replace_top(val); |
||||||
|
ref.pop(); |
||||||
|
ref.push(val); |
||||||
|
} else { |
||||||
|
// pop
|
||||||
|
assert(size > 0); |
||||||
|
heap.pop(); |
||||||
|
ref.pop(); |
||||||
|
--size; |
||||||
|
} |
||||||
|
|
||||||
|
// After every operation, check that the public methods give the same
|
||||||
|
// results
|
||||||
|
assert((size == 0) == ref.empty()); |
||||||
|
ASSERT_EQ(size == 0, heap.empty()); |
||||||
|
if (size > 0) { |
||||||
|
ASSERT_EQ(ref.top(), heap.top()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Probabilities should be set up to occasionally hit the max heap size and
|
||||||
|
// drain it
|
||||||
|
assert(ndrains > 0); |
||||||
|
|
||||||
|
heap.clear(); |
||||||
|
ASSERT_TRUE(heap.empty()); |
||||||
|
} |
||||||
|
|
||||||
|
// Basic test, MAX_VALUE = 3*MAX_HEAP_SIZE (occasional duplicates)
|
||||||
|
INSTANTIATE_TEST_CASE_P( |
||||||
|
Basic, HeapTest, |
||||||
|
::testing::Values(Params(1000, 3000, 0x1b575cf05b708945)) |
||||||
|
); |
||||||
|
// Mid-size heap with small values (many duplicates)
|
||||||
|
INSTANTIATE_TEST_CASE_P( |
||||||
|
SmallValues, HeapTest, |
||||||
|
::testing::Values(Params(100, 10, 0x5ae213f7bd5dccd0)) |
||||||
|
); |
||||||
|
// Small heap, large value range (no duplicates)
|
||||||
|
INSTANTIATE_TEST_CASE_P( |
||||||
|
SmallHeap, HeapTest, |
||||||
|
::testing::Values(Params(10, ULLONG_MAX, 0x3e1fa8f4d01707cf)) |
||||||
|
); |
||||||
|
// Two-element heap
|
||||||
|
INSTANTIATE_TEST_CASE_P( |
||||||
|
TwoElementHeap, HeapTest, |
||||||
|
::testing::Values(Params(2, 5, 0x4b5e13ea988c6abc)) |
||||||
|
); |
||||||
|
// One-element heap
|
||||||
|
INSTANTIATE_TEST_CASE_P( |
||||||
|
OneElementHeap, HeapTest, |
||||||
|
::testing::Values(Params(1, 3, 0x176a1019ab0b612e)) |
||||||
|
); |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
::testing::InitGoogleTest(&argc, argv); |
||||||
|
#ifdef GFLAGS |
||||||
|
GFLAGS::ParseCommandLineFlags(&argc, &argv, true); |
||||||
|
#endif // GFLAGS
|
||||||
|
return RUN_ALL_TESTS(); |
||||||
|
} |
Loading…
Reference in new issue