Refactor/consolidate legacy Bloom implementation details (#5784)

Summary:
Refactoring to consolidate implementation details of legacy
Bloom filters. This helps to organize and document some related,
obscure code.

Also added make/cpp var TEST_CACHE_LINE_SIZE so that it's easy to
compile and run unit tests for non-native cache line size. (Fixed a
related test failure in db_properties_test.)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5784

Test Plan:
make check, including Recently added Bloom schema unit tests
(in ./plain_table_db_test && ./bloom_test), and including with
TEST_CACHE_LINE_SIZE=128U and TEST_CACHE_LINE_SIZE=256U. Tested the
schema tests with temporary fault injection into new implementations.

Some performance testing with modified unit tests suggest a small to moderate
improvement in speed.

Differential Revision: D17381384

Pulled By: pdillinger

fbshipit-source-id: ee42586da996798910fc45ac0b6289147f16d8df
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 638d239507
commit 68626249c3
  1. 5
      Makefile
  2. 14
      db/db_properties_test.cc
  3. 27
      port/port_posix.h
  4. 7
      table/full_filter_bits_builder.h
  5. 8
      table/plain/plain_table_bloom.cc
  6. 66
      table/plain/plain_table_bloom.h
  7. 7
      table/plain/plain_table_reader.cc
  8. 28
      third-party/folly/folly/ConstexprMath.h
  9. 175
      util/bloom.cc
  10. 140
      util/bloom_impl.h
  11. 3
      util/bloom_test.cc

@ -332,6 +332,11 @@ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
endif endif
endif endif
ifdef TEST_CACHE_LINE_SIZE
PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
endif
# This (the first rule) must depend on "all". # This (the first rule) must depend on "all".
default: all default: all

@ -210,12 +210,11 @@ void VerifySimilar(uint64_t a, uint64_t b, double bias) {
} }
} }
void VerifyTableProperties(const TableProperties& base_tp, void VerifyTableProperties(
const TableProperties& new_tp, const TableProperties& base_tp, const TableProperties& new_tp,
double filter_size_bias = 0.1, double filter_size_bias = CACHE_LINE_SIZE >= 256 ? 0.15 : 0.1,
double index_size_bias = 0.1, double index_size_bias = 0.1, double data_size_bias = 0.1,
double data_size_bias = 0.1, double num_data_blocks_bias = 0.05) {
double num_data_blocks_bias = 0.05) {
VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias);
VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias);
VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias);
@ -266,7 +265,8 @@ void GetExpectedTableProperties(
// discount 1 byte as value size is not encoded in value delta encoding // discount 1 byte as value size is not encoded in value delta encoding
(value_delta_encoding ? 1 : 0)); (value_delta_encoding ? 1 : 0));
expected_tp->filter_size = expected_tp->filter_size =
kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 +
/*average-ish overhead*/ CACHE_LINE_SIZE / 2);
} }
} // anonymous namespace } // anonymous namespace

@ -178,22 +178,31 @@ typedef pthread_once_t OnceType;
extern void InitOnce(OnceType* once, void (*initializer)()); extern void InitOnce(OnceType* once, void (*initializer)());
#ifndef CACHE_LINE_SIZE #ifndef CACHE_LINE_SIZE
#if defined(__s390__) // To test behavior with non-native cache line size, e.g. for
#define CACHE_LINE_SIZE 256U // Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size.
#elif defined(__powerpc__) || defined(__aarch64__) // This disables ALIGN_AS to keep it from failing compilation.
#define CACHE_LINE_SIZE 128U #ifdef TEST_CACHE_LINE_SIZE
#else #define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE
#define CACHE_LINE_SIZE 64U #define ALIGN_AS(n) /*empty*/
#endif #else
#if defined(__s390__)
#define CACHE_LINE_SIZE 256U
#elif defined(__powerpc__) || defined(__aarch64__)
#define CACHE_LINE_SIZE 128U
#else
#define CACHE_LINE_SIZE 64U
#endif
#define ALIGN_AS(n) alignas(n)
#endif
#endif #endif
static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0,
"Cache line size must be a power of 2 number of bytes");
extern void *cacheline_aligned_alloc(size_t size); extern void *cacheline_aligned_alloc(size_t size);
extern void cacheline_aligned_free(void *memblock); extern void cacheline_aligned_free(void *memblock);
#define ALIGN_AS(n) alignas(n)
#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
extern void Crash(const std::string& srcfile, int srcline); extern void Crash(const std::string& srcfile, int srcline);

@ -20,8 +20,7 @@ class Slice;
class FullFilterBitsBuilder : public FilterBitsBuilder { class FullFilterBitsBuilder : public FilterBitsBuilder {
public: public:
explicit FullFilterBitsBuilder(const size_t bits_per_key, explicit FullFilterBitsBuilder(const int bits_per_key, const int num_probes);
const size_t num_probes);
// No Copy allowed // No Copy allowed
FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete; FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete;
@ -56,8 +55,8 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
private: private:
friend class FullFilterBlockTest_DuplicateEntries_Test; friend class FullFilterBlockTest_DuplicateEntries_Test;
size_t bits_per_key_; int bits_per_key_;
size_t num_probes_; int num_probes_;
std::vector<uint32_t> hash_entries_; std::vector<uint32_t> hash_entries_;
// Get totalbits that optimized for cpu cache line // Get totalbits that optimized for cpu cache line

@ -33,9 +33,9 @@ uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes) PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes)
: kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {} : kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}
void PlainTableBloomV1::SetRawData(unsigned char* raw_data, uint32_t total_bits, void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits,
uint32_t num_blocks) { uint32_t num_blocks) {
data_ = reinterpret_cast<uint8_t*>(raw_data); data_ = raw_data;
kTotalBits = total_bits; kTotalBits = total_bits;
kNumBlocks = num_blocks; kNumBlocks = num_blocks;
} }
@ -63,7 +63,7 @@ void PlainTableBloomV1::SetTotalBits(Allocator* allocator,
if (kNumBlocks > 0 && cache_line_offset > 0) { if (kNumBlocks > 0 && cache_line_offset > 0) {
raw += CACHE_LINE_SIZE - cache_line_offset; raw += CACHE_LINE_SIZE - cache_line_offset;
} }
data_ = reinterpret_cast<uint8_t*>(raw); data_ = raw;
} }
void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) { void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) {

@ -10,8 +10,11 @@
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "port/port.h" #include "port/port.h"
#include "util/bloom_impl.h"
#include "util/hash.h" #include "util/hash.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include <memory> #include <memory>
namespace rocksdb { namespace rocksdb {
@ -50,12 +53,9 @@ class PlainTableBloomV1 {
uint32_t GetNumBlocks() const { return kNumBlocks; } uint32_t GetNumBlocks() const { return kNumBlocks; }
Slice GetRawData() const { Slice GetRawData() const { return Slice(data_, GetTotalBits() / 8); }
return Slice(reinterpret_cast<char*>(data_), GetTotalBits() / 8);
}
void SetRawData(unsigned char* raw_data, uint32_t total_bits, void SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks = 0);
uint32_t num_blocks = 0);
uint32_t GetTotalBits() const { return kTotalBits; } uint32_t GetTotalBits() const { return kTotalBits; }
@ -66,7 +66,10 @@ class PlainTableBloomV1 {
uint32_t kNumBlocks; uint32_t kNumBlocks;
const uint32_t kNumProbes; const uint32_t kNumProbes;
uint8_t* data_; char* data_;
static constexpr int LOG2_CACHE_LINE_SIZE =
folly::constexpr_log2(CACHE_LINE_SIZE);
}; };
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -76,8 +79,9 @@ class PlainTableBloomV1 {
#endif #endif
inline void PlainTableBloomV1::Prefetch(uint32_t h) { inline void PlainTableBloomV1::Prefetch(uint32_t h) {
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); uint32_t ignored;
PREFETCH(&(data_[b / 8]), 0, 3); LegacyLocalityBloomImpl</*ExtraRotates*/ true>::PrepareHashMayMatch(
h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE);
} }
} }
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -86,54 +90,22 @@ inline void PlainTableBloomV1::Prefetch(uint32_t h) {
inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const { inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const {
assert(IsInitialized()); assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); return LegacyLocalityBloomImpl<true>::HashMayMatch(
for (uint32_t i = 0; i < kNumProbes; ++i) { h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE);
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else { } else {
for (uint32_t i = 0; i < kNumProbes; ++i) { return LegacyNoLocalityBloomImpl::HashMayMatch(h, kTotalBits, kNumProbes,
const uint32_t bitpos = h % kTotalBits; data_);
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
} }
return true;
} }
inline void PlainTableBloomV1::AddHash(uint32_t h) { inline void PlainTableBloomV1::AddHash(uint32_t h) {
assert(IsInitialized()); assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); LegacyLocalityBloomImpl<true>::AddHash(h, kNumBlocks, kNumProbes, data_,
for (uint32_t i = 0; i < kNumProbes; ++i) { LOG2_CACHE_LINE_SIZE);
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data_[bitpos / 8] |= (1 << (bitpos % 8));
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else { } else {
for (uint32_t i = 0; i < kNumProbes; ++i) { LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_);
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
} }
} }

@ -367,10 +367,9 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
} }
} }
// cast away const qualifier, because bloom_ won't be changed // cast away const qualifier, because bloom_ won't be changed
bloom_.SetRawData( bloom_.SetRawData(const_cast<char*>(bloom_block->data()),
const_cast<unsigned char*>( static_cast<uint32_t>(bloom_block->size()) * 8,
reinterpret_cast<const unsigned char*>(bloom_block->data())), num_blocks);
static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks);
} else { } else {
// Index in file but no bloom in file. Disable bloom filter in this case. // Index in file but no bloom in file. Disable bloom filter in this case.
enable_bloom_ = false; enable_bloom_ = false;

@ -14,4 +14,32 @@ template <typename T, typename... Ts>
constexpr T constexpr_max(T a, T b, Ts... ts) { constexpr T constexpr_max(T a, T b, Ts... ts) {
return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...); return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...);
} }
namespace detail {
template <typename T>
constexpr T constexpr_log2_(T a, T e) {
return e == T(1) ? a : constexpr_log2_(a + T(1), e / T(2));
}
template <typename T>
constexpr T constexpr_log2_ceil_(T l2, T t) {
return l2 + T(T(1) << l2 < t ? 1 : 0);
}
template <typename T>
constexpr T constexpr_square_(T t) {
return t * t;
}
} // namespace detail
template <typename T>
constexpr T constexpr_log2(T t) {
return detail::constexpr_log2_(T(0), t);
}
template <typename T>
constexpr T constexpr_log2_ceil(T t) {
return detail::constexpr_log2_ceil_(constexpr_log2(t), t);
}
} // namespace folly } // namespace folly

@ -13,19 +13,22 @@
#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_filter_block.h"
#include "table/block_based/full_filter_block.h" #include "table/block_based/full_filter_block.h"
#include "table/full_filter_bits_builder.h" #include "table/full_filter_bits_builder.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include "util/bloom_impl.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/hash.h" #include "util/hash.h"
namespace rocksdb { namespace rocksdb {
typedef LegacyLocalityBloomImpl</*ExtraRotates*/ false> LegacyFullFilterImpl;
class BlockBasedFilterBlockBuilder; class BlockBasedFilterBlockBuilder;
class FullFilterBlockBuilder; class FullFilterBlockBuilder;
FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key, FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key,
const size_t num_probes) const int num_probes)
: bits_per_key_(bits_per_key), num_probes_(num_probes) { : bits_per_key_(bits_per_key), num_probes_(num_probes) {
assert(bits_per_key_); assert(bits_per_key_);
} }
FullFilterBitsBuilder::~FullFilterBitsBuilder() {} FullFilterBitsBuilder::~FullFilterBitsBuilder() {}
@ -74,7 +77,7 @@ uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry,
uint32_t* num_lines) { uint32_t* num_lines) {
assert(bits_per_key_); assert(bits_per_key_);
if (num_entry != 0) { if (num_entry != 0) {
uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_); uint32_t total_bits_tmp = static_cast<uint32_t>(num_entry * bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp); *total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8); *num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
@ -124,24 +127,15 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
#endif #endif
assert(num_lines > 0 && total_bits > 0); assert(num_lines > 0 && total_bits > 0);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits LegacyFullFilterImpl::AddHash(h, num_lines, num_probes_, data,
uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8); folly::constexpr_log2(CACHE_LINE_SIZE));
for (uint32_t i = 0; i < num_probes_; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
} }
namespace { namespace {
class FullFilterBitsReader : public FilterBitsReader { class FullFilterBitsReader : public FilterBitsReader {
public: public:
explicit FullFilterBitsReader(const Slice& contents) explicit FullFilterBitsReader(const Slice& contents)
: data_(const_cast<char*>(contents.data())), : data_(contents.data()),
data_len_(static_cast<uint32_t>(contents.size())), data_len_(static_cast<uint32_t>(contents.size())),
num_probes_(0), num_probes_(0),
num_lines_(0), num_lines_(0),
@ -177,16 +171,23 @@ class FullFilterBitsReader : public FilterBitsReader {
~FullFilterBitsReader() override {} ~FullFilterBitsReader() override {}
bool MayMatch(const Slice& entry) override { // "contents" contains the data built by a preceding call to
// FilterBitsBuilder::Finish. MayMatch must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
bool MayMatch(const Slice& key) override {
if (data_len_ <= 5) { // remain same with original filter if (data_len_ <= 5) { // remain same with original filter
return false; return false;
} }
// Other Error params, including a broken filter, regarded as match // Other Error params, including a broken filter, regarded as match
if (num_probes_ == 0 || num_lines_ == 0) return true; if (num_probes_ == 0 || num_lines_ == 0) return true;
uint32_t hash = BloomHash(entry); uint32_t hash = BloomHash(key);
uint32_t bit_offset; uint32_t byte_offset;
FilterPrepare(hash, Slice(data_, data_len_), num_lines_, &bit_offset); LegacyFullFilterImpl::PrepareHashMayMatch(
return HashMayMatch(hash, Slice(data_, data_len_), num_probes_, bit_offset); hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_);
return LegacyFullFilterImpl::HashMayMatchPrepared(
hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
} }
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
@ -202,16 +203,18 @@ class FullFilterBitsReader : public FilterBitsReader {
// Other Error params, including a broken filter, regarded as match // Other Error params, including a broken filter, regarded as match
if (num_probes_ == 0 || num_lines_ == 0) return; if (num_probes_ == 0 || num_lines_ == 0) return;
uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE]; uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE];
uint32_t bit_offsets[MultiGetContext::MAX_BATCH_SIZE]; uint32_t byte_offsets[MultiGetContext::MAX_BATCH_SIZE];
for (int i = 0; i < num_keys; ++i) { for (int i = 0; i < num_keys; ++i) {
hashes[i] = BloomHash(*keys[i]); hashes[i] = BloomHash(*keys[i]);
FilterPrepare(hashes[i], Slice(data_, data_len_), num_lines_, LegacyFullFilterImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_,
&bit_offsets[i]); /*out*/ &byte_offsets[i],
log2_cache_line_size_);
} }
for (int i = 0; i < num_keys; ++i) { for (int i = 0; i < num_keys; ++i) {
if (!HashMayMatch(hashes[i], Slice(data_, data_len_), num_probes_, if (!LegacyFullFilterImpl::HashMayMatchPrepared(hashes[i], num_probes_,
bit_offsets[i])) { data_ + byte_offsets[i],
log2_cache_line_size_)) {
may_match[i] = false; may_match[i] = false;
} }
} }
@ -219,38 +222,19 @@ class FullFilterBitsReader : public FilterBitsReader {
private: private:
// Filter meta data // Filter meta data
char* data_; const char* data_;
uint32_t data_len_; uint32_t data_len_;
size_t num_probes_; int num_probes_;
uint32_t num_lines_; uint32_t num_lines_;
uint32_t log2_cache_line_size_; uint32_t log2_cache_line_size_;
// Get num_probes, and num_lines from filter // Get num_probes, and num_lines from filter
// If filter format broken, set both to 0. // If filter format broken, set both to 0.
void GetFilterMeta(const Slice& filter, size_t* num_probes, void GetFilterMeta(const Slice& filter, int* num_probes, uint32_t* num_lines);
uint32_t* num_lines);
// "filter" contains the data appended by a preceding call to
// FilterBitsBuilder::Finish. This method must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
//
// hash: target to be checked
// filter: the whole filter, including meta data bytes
// num_probes: number of probes, read before hand
// num_lines: filter metadata, read before hand
// Before calling this function, need to ensure the input meta data
// is valid.
bool HashMayMatch(const uint32_t& hash, const Slice& filter,
const size_t& num_probes, const uint32_t& bit_offset);
void FilterPrepare(const uint32_t& hash, const Slice& filter,
const uint32_t& num_lines, uint32_t* bit_offset);
}; };
void FullFilterBitsReader::GetFilterMeta(const Slice& filter, void FullFilterBitsReader::GetFilterMeta(const Slice& filter, int* num_probes,
size_t* num_probes, uint32_t* num_lines) { uint32_t* num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size()); uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) { if (len <= 5) {
// filter is empty or broken // filter is empty or broken
@ -263,54 +247,6 @@ void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
*num_lines = DecodeFixed32(filter.data() + len - 4); *num_lines = DecodeFixed32(filter.data() + len - 4);
} }
void FullFilterBitsReader::FilterPrepare(const uint32_t& hash,
const Slice& filter,
const uint32_t& num_lines,
uint32_t* bit_offset) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return; // remain the same with original filter
// It is ensured the params are valid before calling it
assert(num_lines != 0 && (len - 5) % num_lines == 0);
uint32_t h = hash;
// Left shift by an extra 3 to convert bytes to bits
uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3);
PREFETCH(&filter.data()[b / 8], 0 /* rw */, 1 /* locality */);
PREFETCH(&filter.data()[b / 8 + (1 << log2_cache_line_size_) - 1],
0 /* rw */, 1 /* locality */);
*bit_offset = b;
}
bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
const Slice& filter,
const size_t& num_probes,
const uint32_t& bit_offset) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return false; // remain the same with original filter
// It is ensured the params are valid before calling it
assert(num_probes != 0);
const char* data = filter.data();
uint32_t h = hash;
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < num_probes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos =
bit_offset + (h & ((1 << (log2_cache_line_size_ + 3)) - 1));
if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
// An implementation of filter policy // An implementation of filter policy
class BloomFilterPolicy : public FilterPolicy { class BloomFilterPolicy : public FilterPolicy {
public: public:
@ -326,56 +262,45 @@ class BloomFilterPolicy : public FilterPolicy {
void CreateFilter(const Slice* keys, int n, std::string* dst) const override { void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
// Compute bloom filter size (in both bits and bytes) // Compute bloom filter size (in both bits and bytes)
size_t bits = n * bits_per_key_; uint32_t bits = static_cast<uint32_t>(n * bits_per_key_);
// For small n, we can see a very high false positive rate. Fix it // For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length. // by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64; if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8; uint32_t bytes = (bits + 7) / 8;
bits = bytes * 8; bits = bytes * 8;
const size_t init_size = dst->size(); const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0); dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes
char* array = &(*dst)[init_size]; char* array = &(*dst)[init_size];
for (size_t i = 0; i < static_cast<size_t>(n); i++) { for (int i = 0; i < n; i++) {
// Use double-hashing to generate a sequence of hash values. LegacyNoLocalityBloomImpl::AddHash(hash_func_(keys[i]), bits, num_probes_,
// See analysis in [Kirsch,Mitzenmacher 2006]. array);
uint32_t h = hash_func_(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < num_probes_; j++) {
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
} }
} }
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
const size_t len = bloom_filter.size(); const size_t len = bloom_filter.size();
if (len < 2) return false; if (len < 2 || len > 0xffffffffU) {
return false;
}
const char* array = bloom_filter.data(); const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8; const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
// Use the encoded k so that we can read filters generated by // Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters. // bloom filters created using different parameters.
const size_t k = array[len-1]; const int k = static_cast<uint8_t>(array[len - 1]);
if (k > 30) { if (k > 30) {
// Reserved for potentially new encodings for short bloom filters. // Reserved for potentially new encodings for short bloom filters.
// Consider it a match. // Consider it a match.
return true; return true;
} }
// NB: using k not num_probes_
uint32_t h = hash_func_(key); return LegacyNoLocalityBloomImpl::HashMayMatch(hash_func_(key), bits, k,
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits array);
for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
} }
FilterBitsBuilder* GetFilterBitsBuilder() const override { FilterBitsBuilder* GetFilterBitsBuilder() const override {
@ -394,15 +319,15 @@ class BloomFilterPolicy : public FilterPolicy {
bool UseBlockBasedBuilder() { return use_block_based_builder_; } bool UseBlockBasedBuilder() { return use_block_based_builder_; }
private: private:
size_t bits_per_key_; int bits_per_key_;
size_t num_probes_; int num_probes_;
uint32_t (*hash_func_)(const Slice& key); uint32_t (*hash_func_)(const Slice& key);
const bool use_block_based_builder_; const bool use_block_based_builder_;
void initialize() { void initialize() {
// We intentionally round down to reduce probing cost a little bit // We intentionally round down to reduce probing cost a little bit
num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2) num_probes_ = static_cast<int>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
if (num_probes_ < 1) num_probes_ = 1; if (num_probes_ < 1) num_probes_ = 1;
if (num_probes_ > 30) num_probes_ = 30; if (num_probes_ > 30) num_probes_ = 30;
} }

@ -0,0 +1,140 @@
// Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Implementation details of various Bloom filter implementations used in
// RocksDB. (DynamicBloom is in a separate file for now because it
// supports concurrent write.)
#pragma once
#include <stddef.h>
#include <stdint.h>
#include "rocksdb/slice.h"
namespace rocksdb {
// A legacy Bloom filter implementation with no locality of probes (slow).
// It uses double hashing to generate a sequence of hash values.
// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have
// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004].
//
// DO NOT REUSE - faster and more predictably accurate implementations
// are available at
// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc
// See e.g. RocksDB DynamicBloom.
//
class LegacyNoLocalityBloomImpl {
public:
static inline void AddHash(uint32_t h, uint32_t total_bits,
int num_probes, char *data) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (int i = 0; i < num_probes; i++) {
const uint32_t bitpos = h % total_bits;
data[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
static inline bool HashMayMatch(uint32_t h, uint32_t total_bits,
int num_probes, const char *data) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (int i = 0; i < num_probes; i++) {
const uint32_t bitpos = h % total_bits;
if ((data[bitpos/8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
};
// A legacy Bloom filter implementation with probes local to a single
// cache line (fast). Because SST files might be transported between
// platforms, the cache line size is a parameter rather than hard coded.
// (But if specified as a constant parameter, an optimizing compiler
// should take advantage of that.)
//
// When ExtraRotates is false, this implementation is notably deficient in
// accuracy. Specifically, it uses double hashing with a 1/512 chance of the
// increment being zero (when cache line size is 512 bits). Thus, there's a
// 1/512 chance of probing only one index, which we'd expect to incur about
// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at
// https://github.com/facebook/rocksdb/issues/4120
//
// DO NOT REUSE - faster and more predictably accurate implementations
// are available at
// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc
// See e.g. RocksDB DynamicBloom.
//
template <bool ExtraRotates>
class LegacyLocalityBloomImpl {
private:
static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) {
uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h;
return offset_h % num_lines;
}
public:
static inline void AddHash(uint32_t h, uint32_t num_lines,
int num_probes, char *data,
int log2_cache_line_bytes) {
const int log2_cache_line_bits = log2_cache_line_bytes + 3;
char *data_at_offset =
data + (GetLine(h, num_lines) << log2_cache_line_bytes);
const uint32_t delta = (h >> 17) | (h << 15);
for (int i = 0; i < num_probes; ++i) {
// Mask to bit-within-cache-line address
const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
data_at_offset[bitpos / 8] |= (1 << (bitpos % 8));
if (ExtraRotates) {
h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
}
h += delta;
}
}
static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines,
const char *data,
uint32_t /*out*/*byte_offset,
int log2_cache_line_bytes) {
uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
PREFETCH(data + b, 0 /* rw */, 1 /* locality */);
PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1),
0 /* rw */, 1 /* locality */);
*byte_offset = b;
}
static inline bool HashMayMatch(uint32_t h, uint32_t num_lines,
int num_probes, const char *data,
int log2_cache_line_bytes) {
uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
return HashMayMatchPrepared(h, num_probes,
data + b, log2_cache_line_bytes);
}
static inline bool HashMayMatchPrepared(uint32_t h, int num_probes,
const char *data_at_offset,
int log2_cache_line_bytes) {
const int log2_cache_line_bits = log2_cache_line_bytes + 3;
const uint32_t delta = (h >> 17) | (h << 15);
for (int i = 0; i < num_probes; ++i) {
// Mask to bit-within-cache-line address
const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
if (ExtraRotates) {
h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
}
h += delta;
}
return true;
}
};
} // namespace rocksdb

@ -352,7 +352,8 @@ TEST_F(FullBloomTest, FullVaryingLengths) {
} }
Build(); Build();
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5)) << length; ASSERT_LE(FilterSize(),
(size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
// All added keys must match // All added keys must match
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {

Loading…
Cancel
Save