Add regression test for serialized Bloom filters (#5778)

Summary:
Check that we don't accidentally change the on-disk format of
existing Bloom filter implementations, including for various
CACHE_LINE_SIZE (by changing temporarily).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5778

Test Plan: thisisthetest

Differential Revision: D17269630

Pulled By: pdillinger

fbshipit-source-id: c77017662f010a77603b7d475892b1f0d5563d8b
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent fbab9913e2
commit 108c619acb
  1. 15
      db/plain_table_db_test.cc
  2. 164
      util/bloom_test.cc

@ -770,8 +770,19 @@ TEST_P(PlainTableDBTest, BloomSchema) {
for (unsigned i = 0; i < 32; ++i) { for (unsigned i = 0; i < 32; ++i) {
// Known pattern of Bloom filter false positives can detect schema change // Known pattern of Bloom filter false positives can detect schema change
// with high probability. Known FPs stuffed into bits: // with high probability. Known FPs stuffed into bits:
bool expect_fp = (bloom_locality ? 2421694657UL : 1785868347UL) uint32_t pattern;
& (1UL << i); if (!bloom_locality) {
pattern = 1785868347UL;
} else if (CACHE_LINE_SIZE == 64) {
pattern = 2421694657UL;
} else if (CACHE_LINE_SIZE == 128) {
pattern = 788710956UL;
} else {
ASSERT_EQ(CACHE_LINE_SIZE, 256);
pattern = 163905UL;
}
bool expect_fp = pattern & (1UL << i);
//fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp);
expect_bloom_not_match = !expect_fp; expect_bloom_not_match = !expect_fp;
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n'))); ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
} }

@ -23,6 +23,7 @@ int main() {
#include "table/full_filter_bits_builder.h" #include "table/full_filter_bits_builder.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/hash.h"
#include "util/gflags_compat.h" #include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::ParseCommandLineFlags;
@ -55,7 +56,7 @@ static int NextLength(int length) {
class BloomTest : public testing::Test { class BloomTest : public testing::Test {
private: private:
const FilterPolicy* policy_; std::unique_ptr<const FilterPolicy> policy_;
std::string filter_; std::string filter_;
std::vector<std::string> keys_; std::vector<std::string> keys_;
@ -63,13 +64,20 @@ class BloomTest : public testing::Test {
BloomTest() : policy_( BloomTest() : policy_(
NewBloomFilterPolicy(FLAGS_bits_per_key)) {} NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
~BloomTest() override { delete policy_; }
void Reset() { void Reset() {
keys_.clear(); keys_.clear();
filter_.clear(); filter_.clear();
} }
void ResetPolicy(const FilterPolicy* policy = nullptr) {
if (policy == nullptr) {
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key));
} else {
policy_.reset(policy);
}
Reset();
}
void Add(const Slice& s) { void Add(const Slice& s) {
keys_.push_back(s.ToString()); keys_.push_back(s.ToString());
} }
@ -90,6 +98,10 @@ class BloomTest : public testing::Test {
return filter_.size(); return filter_.size();
} }
Slice FilterData() const {
return Slice(filter_);
}
void DumpFilter() { void DumpFilter() {
fprintf(stderr, "F("); fprintf(stderr, "F(");
for (size_t i = 0; i+1 < filter_.size(); i++) { for (size_t i = 0; i+1 < filter_.size(); i++) {
@ -173,11 +185,62 @@ TEST_F(BloomTest, VaryingLengths) {
ASSERT_LE(mediocre_filters, good_filters/5); ASSERT_LE(mediocre_filters, good_filters/5);
} }
// Ensure the implementation doesn't accidentally change in an
// incompatible way
TEST_F(BloomTest, Schema) {
char buffer[sizeof(int)];
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 969445585);
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 1694458207);
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
ResetPolicy(NewBloomFilterPolicy(10));
for (int key = 1; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 1908442116);
ResetPolicy(NewBloomFilterPolicy(10));
for (int key = 1; key < 88; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
ResetPolicy();
}
// Different bits-per-byte // Different bits-per-byte
class FullBloomTest : public testing::Test { class FullBloomTest : public testing::Test {
private: private:
const FilterPolicy* policy_; std::unique_ptr<const FilterPolicy> policy_;
std::unique_ptr<FilterBitsBuilder> bits_builder_; std::unique_ptr<FilterBitsBuilder> bits_builder_;
std::unique_ptr<FilterBitsReader> bits_reader_; std::unique_ptr<FilterBitsReader> bits_reader_;
std::unique_ptr<const char[]> buf_; std::unique_ptr<const char[]> buf_;
@ -190,8 +253,6 @@ class FullBloomTest : public testing::Test {
Reset(); Reset();
} }
~FullBloomTest() override { delete policy_; }
FullFilterBitsBuilder* GetFullFilterBitsBuilder() { FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get()); return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
} }
@ -203,6 +264,15 @@ class FullBloomTest : public testing::Test {
filter_size_ = 0; filter_size_ = 0;
} }
void ResetPolicy(const FilterPolicy* policy = nullptr) {
if (policy == nullptr) {
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, false));
} else {
policy_.reset(policy);
}
Reset();
}
void Add(const Slice& s) { void Add(const Slice& s) {
bits_builder_->AddKey(s); bits_builder_->AddKey(s);
} }
@ -217,6 +287,10 @@ class FullBloomTest : public testing::Test {
return filter_size_; return filter_size_;
} }
Slice FilterData() {
return Slice(buf_.get(), filter_size_);
}
bool Matches(const Slice& s) { bool Matches(const Slice& s) {
if (bits_reader_ == nullptr) { if (bits_reader_ == nullptr) {
Build(); Build();
@ -305,6 +379,84 @@ TEST_F(FullBloomTest, FullVaryingLengths) {
ASSERT_LE(mediocre_filters, good_filters/5); ASSERT_LE(mediocre_filters, good_filters/5);
} }
namespace {
inline uint32_t SelectByCacheLineSize(uint32_t for64,
uint32_t for128,
uint32_t for256) {
(void)for64;
(void)for128;
(void)for256;
#if CACHE_LINE_SIZE == 64
return for64;
#elif CACHE_LINE_SIZE == 128
return for128;
#elif CACHE_LINE_SIZE == 256
return for256;
#else
#error "CACHE_LINE_SIZE unknown or unrecognized"
#endif
}
} // namespace
// Ensure the implementation doesn't accidentally change in an
// incompatible way
TEST_F(FullBloomTest, Schema) {
char buffer[sizeof(int)];
// Use enough keys so that changing bits / key by 1 is guaranteed to
// change number of allocated cache lines. So keys > max cache line bits.
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
for (int key = 0; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
ResetPolicy(NewBloomFilterPolicy(10));
for (int key = 1; key < 2087; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
ResetPolicy(NewBloomFilterPolicy(10));
for (int key = 1; key < 2088; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()),
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
ResetPolicy();
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

Loading…
Cancel
Save