[RocksDB] [Performance Branch] Added dynamic bloom, to be used for memable non-existing key filtering
Summary: as title Test Plan: dynamic_bloom_test Reviewers: dhruba, sdong, kailiu CC: leveldb Differential Revision: https://reviews.facebook.net/D14385main
parent
a82f42b765
commit
3c02c363b3
@ -0,0 +1,63 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "dynamic_bloom.h" |
||||||
|
|
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
static uint32_t BloomHash(const Slice& key) { |
||||||
|
return Hash(key.data(), key.size(), 0xbc9f1d34); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
DynamicBloom::DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t (*hash_func)(const Slice& key), |
||||||
|
uint32_t num_probes) |
||||||
|
: hash_func_(hash_func), |
||||||
|
total_bits_((total_bits + 7) / 8 * 8), |
||||||
|
num_probes_(num_probes) { |
||||||
|
assert(hash_func_); |
||||||
|
assert(num_probes_ > 0); |
||||||
|
assert(total_bits_ > 0); |
||||||
|
data_.reset(new unsigned char[total_bits_ / 8]()); |
||||||
|
} |
||||||
|
|
||||||
|
DynamicBloom::DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t num_probes) |
||||||
|
: hash_func_(&BloomHash), |
||||||
|
total_bits_((total_bits + 7) / 8 * 8), |
||||||
|
num_probes_(num_probes) { |
||||||
|
assert(num_probes_ > 0); |
||||||
|
assert(total_bits_ > 0); |
||||||
|
data_.reset(new unsigned char[total_bits_ / 8]()); |
||||||
|
} |
||||||
|
|
||||||
|
void DynamicBloom::Add(const Slice& key) { |
||||||
|
uint32_t h = hash_func_(key); |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (uint32_t i = 0; i < num_probes_; i++) { |
||||||
|
const uint32_t bitpos = h % total_bits_; |
||||||
|
data_[bitpos/8] |= (1 << (bitpos % 8)); |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
bool DynamicBloom::MayContain(const Slice& key) { |
||||||
|
uint32_t h = hash_func_(key); |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (uint32_t i = 0; i < num_probes_; i++) { |
||||||
|
const uint32_t bitpos = h % total_bits_; |
||||||
|
if ((data_[bitpos/8] & (1 << (bitpos % 8))) |
||||||
|
== 0) return false; |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,42 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <atomic> |
||||||
|
#include <memory> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Slice; |
||||||
|
|
||||||
|
class DynamicBloom { |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
// total_bits: fixed total bits for the bloom
|
||||||
|
// hash_func: customized hash function
|
||||||
|
// num_probes: number of hash probes for a single key
|
||||||
|
DynamicBloom(uint32_t total_bits, |
||||||
|
uint32_t (*hash_func)(const Slice& key), |
||||||
|
uint32_t num_probes = 6); |
||||||
|
|
||||||
|
explicit DynamicBloom(uint32_t total_bits, uint32_t num_probes = 6); |
||||||
|
|
||||||
|
// Assuming single threaded access to Add
|
||||||
|
void Add(const Slice& key); |
||||||
|
|
||||||
|
// Multithreaded access to MayContain is OK
|
||||||
|
bool MayContain(const Slice& key); |
||||||
|
|
||||||
|
|
||||||
|
private: |
||||||
|
uint32_t (*hash_func_)(const Slice& key); |
||||||
|
uint32_t total_bits_; |
||||||
|
uint32_t num_probes_; |
||||||
|
std::unique_ptr<unsigned char[]> data_; |
||||||
|
}; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,113 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <gflags/gflags.h> |
||||||
|
|
||||||
|
#include "dynamic_bloom.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
DEFINE_int32(bits_per_key, 10, ""); |
||||||
|
DEFINE_int32(num_probes, 6, ""); |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
static Slice Key(int i, char* buffer) { |
||||||
|
memcpy(buffer, &i, sizeof(i)); |
||||||
|
return Slice(buffer, sizeof(i)); |
||||||
|
} |
||||||
|
|
||||||
|
class DynamicBloomTest { |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, EmptyFilter) { |
||||||
|
DynamicBloom bloom(100, 2); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("hello")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("world")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, Small) { |
||||||
|
DynamicBloom bloom(100, 2); |
||||||
|
bloom.Add("hello"); |
||||||
|
bloom.Add("world"); |
||||||
|
ASSERT_TRUE(bloom.MayContain("hello")); |
||||||
|
ASSERT_TRUE(bloom.MayContain("world")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("x")); |
||||||
|
ASSERT_TRUE(! bloom.MayContain("foo")); |
||||||
|
} |
||||||
|
|
||||||
|
static int NextLength(int length) { |
||||||
|
if (length < 10) { |
||||||
|
length += 1; |
||||||
|
} else if (length < 100) { |
||||||
|
length += 10; |
||||||
|
} else if (length < 1000) { |
||||||
|
length += 100; |
||||||
|
} else { |
||||||
|
length += 1000; |
||||||
|
} |
||||||
|
return length; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(DynamicBloomTest, VaryingLengths) { |
||||||
|
char buffer[sizeof(int)]; |
||||||
|
|
||||||
|
// Count number of filters that significantly exceed the false positive rate
|
||||||
|
int mediocre_filters = 0; |
||||||
|
int good_filters = 0; |
||||||
|
|
||||||
|
fprintf(stderr, "bits_per_key: %d num_probes: %d\n", |
||||||
|
FLAGS_bits_per_key, FLAGS_num_probes); |
||||||
|
|
||||||
|
for (int length = 1; length <= 10000; length = NextLength(length)) { |
||||||
|
uint32_t bloom_bits = std::max(length * FLAGS_bits_per_key, 64); |
||||||
|
DynamicBloom bloom(bloom_bits, FLAGS_num_probes); |
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
bloom.Add(Key(i, buffer)); |
||||||
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer))); |
||||||
|
} |
||||||
|
|
||||||
|
// All added keys must match
|
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
ASSERT_TRUE(bloom.MayContain(Key(i, buffer))) |
||||||
|
<< "Length " << length << "; key " << i; |
||||||
|
} |
||||||
|
|
||||||
|
// Check false positive rate
|
||||||
|
|
||||||
|
int result = 0; |
||||||
|
for (int i = 0; i < 10000; i++) { |
||||||
|
if (bloom.MayContain(Key(i + 1000000000, buffer))) { |
||||||
|
result++; |
||||||
|
} |
||||||
|
} |
||||||
|
double rate = result / 10000.0; |
||||||
|
|
||||||
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; \n", |
||||||
|
rate*100.0, length); |
||||||
|
|
||||||
|
//ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||||
|
if (rate > 0.0125) |
||||||
|
mediocre_filters++; // Allowed, but not too often
|
||||||
|
else |
||||||
|
good_filters++; |
||||||
|
} |
||||||
|
|
||||||
|
fprintf(stderr, "Filters: %d good, %d mediocre\n", |
||||||
|
good_filters, mediocre_filters); |
||||||
|
|
||||||
|
ASSERT_LE(mediocre_filters, good_filters/5); |
||||||
|
} |
||||||
|
|
||||||
|
// Different bits-per-byte
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
google::ParseCommandLineFlags(&argc, &argv, true); |
||||||
|
|
||||||
|
return rocksdb::test::RunAllTests(); |
||||||
|
} |
Loading…
Reference in new issue