Prepare filter tests for more implementations (#5967)

Summary:
This change sets up for alternate implementations underlying
BloomFilterPolicy:

* Refactor BloomFilterPolicy and expose in internal .h file so that it's easy to iterate over / select implementations for testing, regardless of what the best public interface will look like. Most notably updated db_bloom_filter_test to use this.
* Hide FullFilterBitsBuilder from unit tests (alternate derived classes planned); expose the part important for testing (CalculateSpace), as abstract class BuiltinFilterBitsBuilder. (Also cleaned up internally exposed interface to CalculateSpace.)
* Rename BloomTest -> BlockBasedBloomTest for clarity (despite ongoing confusion between block-based table and block-based filter)
* Assert that block-based filter construction interface is only used on BloomFilterPolicy appropriately constructed. (A couple of tests updated to add ", true".)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5967

Test Plan: make check

Differential Revision: D18138704

Pulled By: pdillinger

fbshipit-source-id: 55ef9273423b0696309e251f50b8c1b5e9ec7597
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 351e25401b
commit 685e895652
  1. 136
      db/db_bloom_filter_test.cc
  2. 4
      include/rocksdb/filter_policy.h
  3. 2
      table/block_based/block_based_filter_block_test.cc
  4. 140
      table/block_based/filter_policy.cc
  5. 90
      table/block_based/filter_policy_internal.h
  6. 44
      util/bloom_test.cc

@ -10,9 +10,14 @@
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "rocksdb/perf_context.h" #include "rocksdb/perf_context.h"
#include "table/block_based/filter_policy_internal.h"
namespace rocksdb { namespace rocksdb {
namespace {
using BFP = BloomFilterPolicy;
} // namespace
// DB tests related to bloom filter. // DB tests related to bloom filter.
class DBBloomFilterTest : public DBTestBase { class DBBloomFilterTest : public DBTestBase {
@ -20,12 +25,12 @@ class DBBloomFilterTest : public DBTestBase {
DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {}
}; };
class DBBloomFilterTestWithParam class DBBloomFilterTestWithParam : public DBTestBase,
: public DBTestBase, public testing::WithParamInterface<
public testing::WithParamInterface<std::tuple<bool, bool, uint32_t>> { std::tuple<BFP::Impl, bool, uint32_t>> {
// public testing::WithParamInterface<bool> { // public testing::WithParamInterface<bool> {
protected: protected:
bool use_block_based_filter_; BFP::Impl bfp_impl_;
bool partition_filters_; bool partition_filters_;
uint32_t format_version_; uint32_t format_version_;
@ -35,7 +40,7 @@ class DBBloomFilterTestWithParam
~DBBloomFilterTestWithParam() override {} ~DBBloomFilterTestWithParam() override {}
void SetUp() override { void SetUp() override {
use_block_based_filter_ = std::get<0>(GetParam()); bfp_impl_ = std::get<0>(GetParam());
partition_filters_ = std::get<1>(GetParam()); partition_filters_ = std::get<1>(GetParam());
format_version_ = std::get<2>(GetParam()); format_version_ = std::get<2>(GetParam());
} }
@ -71,8 +76,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
ReadOptions ropts; ReadOptions ropts;
std::string value; std::string value;
anon::OptionsOverride options_override; anon::OptionsOverride options_override;
options_override.filter_policy.reset( options_override.filter_policy.reset(new BFP(20, bfp_impl_));
NewBloomFilterPolicy(20, use_block_based_filter_));
options_override.partition_filters = partition_filters_; options_override.partition_filters = partition_filters_;
options_override.metadata_block_size = 32; options_override.metadata_block_size = 32;
Options options = CurrentOptions(options_override); Options options = CurrentOptions(options_override);
@ -432,8 +436,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
// trigger reset of table_factory // trigger reset of table_factory
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.no_block_cache = true; table_options.no_block_cache = true;
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl_));
NewBloomFilterPolicy(10, use_block_based_filter_));
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
if (partition_filters_) { if (partition_filters_) {
table_options.index_type = table_options.index_type =
@ -502,24 +505,24 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
#ifndef ROCKSDB_VALGRIND_RUN #ifndef ROCKSDB_VALGRIND_RUN
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion, FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion), ::testing::Values(
std::make_tuple(false, true, test::kDefaultFormatVersion), std::make_tuple(BFP::kBlock, false, test::kDefaultFormatVersion),
std::make_tuple(false, false, std::make_tuple(BFP::kFull, true, test::kDefaultFormatVersion),
test::kDefaultFormatVersion))); std::make_tuple(BFP::kFull, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam, FormatDef, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion), ::testing::Values(
std::make_tuple(false, true, test::kDefaultFormatVersion), std::make_tuple(BFP::kBlock, false, test::kDefaultFormatVersion),
std::make_tuple(false, false, std::make_tuple(BFP::kFull, true, test::kDefaultFormatVersion),
test::kDefaultFormatVersion))); std::make_tuple(BFP::kFull, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam, FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kLatestFormatVersion), ::testing::Values(
std::make_tuple(false, true, test::kLatestFormatVersion), std::make_tuple(BFP::kBlock, false, test::kLatestFormatVersion),
std::make_tuple(false, false, std::make_tuple(BFP::kFull, true, test::kLatestFormatVersion),
test::kLatestFormatVersion))); std::make_tuple(BFP::kFull, false, test::kLatestFormatVersion)));
#endif // ROCKSDB_VALGRIND_RUN #endif // ROCKSDB_VALGRIND_RUN
TEST_F(DBBloomFilterTest, BloomFilterRate) { TEST_F(DBBloomFilterTest, BloomFilterRate) {
@ -640,7 +643,7 @@ namespace {
class WrappedBloom : public FilterPolicy { class WrappedBloom : public FilterPolicy {
public: public:
explicit WrappedBloom(int bits_per_key) explicit WrappedBloom(int bits_per_key)
: filter_(NewBloomFilterPolicy(bits_per_key)), counter_(0) {} : filter_(NewBloomFilterPolicy(bits_per_key, true)), counter_(0) {}
~WrappedBloom() override { delete filter_; } ~WrappedBloom() override { delete filter_; }
@ -858,11 +861,11 @@ TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
class BloomStatsTestWithParam class BloomStatsTestWithParam
: public DBBloomFilterTest, : public DBBloomFilterTest,
public testing::WithParamInterface<std::tuple<bool, bool, bool>> { public testing::WithParamInterface<std::tuple<bool, BFP::Impl, bool>> {
public: public:
BloomStatsTestWithParam() { BloomStatsTestWithParam() {
use_block_table_ = std::get<0>(GetParam()); use_block_table_ = std::get<0>(GetParam());
use_block_based_builder_ = std::get<1>(GetParam()); bfp_impl_ = std::get<1>(GetParam());
partition_filters_ = std::get<2>(GetParam()); partition_filters_ = std::get<2>(GetParam());
options_.create_if_missing = true; options_.create_if_missing = true;
@ -873,13 +876,12 @@ class BloomStatsTestWithParam
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.hash_index_allow_collision = false; table_options.hash_index_allow_collision = false;
if (partition_filters_) { if (partition_filters_) {
assert(!use_block_based_builder_); assert(bfp_impl_ != BFP::kBlock);
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
table_options.index_type = table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
} }
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl_));
NewBloomFilterPolicy(10, use_block_based_builder_));
options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
} else { } else {
assert(!partition_filters_); // not supported in plain table assert(!partition_filters_); // not supported in plain table
@ -902,7 +904,7 @@ class BloomStatsTestWithParam
static void TearDownTestCase() {} static void TearDownTestCase() {}
bool use_block_table_; bool use_block_table_;
bool use_block_based_builder_; BFP::Impl bfp_impl_;
bool partition_filters_; bool partition_filters_;
Options options_; Options options_;
}; };
@ -1006,7 +1008,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_EQ(value3, iter->value().ToString()); ASSERT_EQ(value3, iter->value().ToString());
// The seek doesn't check block-based bloom filter because last index key // The seek doesn't check block-based bloom filter because last index key
// starts with the same prefix we're seeking to. // starts with the same prefix we're seeking to.
uint64_t expected_hits = use_block_based_builder_ ? 1 : 2; uint64_t expected_hits = bfp_impl_ == BFP::kBlock ? 1 : 2;
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
iter->Seek(key2); iter->Seek(key2);
@ -1016,12 +1018,12 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
} }
INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, INSTANTIATE_TEST_CASE_P(
::testing::Values(std::make_tuple(true, true, false), BloomStatsTestWithParam, BloomStatsTestWithParam,
std::make_tuple(true, false, false), ::testing::Values(std::make_tuple(true, BFP::kBlock, false),
std::make_tuple(true, false, true), std::make_tuple(true, BFP::kFull, false),
std::make_tuple(false, false, std::make_tuple(true, BFP::kFull, true),
false))); std::make_tuple(false, BFP::kFull, false)));
namespace { namespace {
void PrefixScanInit(DBBloomFilterTest* dbtest) { void PrefixScanInit(DBBloomFilterTest* dbtest) {
@ -1328,8 +1330,8 @@ int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
// into the same string, or 2) the transformed seek key is of the same length // into the same string, or 2) the transformed seek key is of the same length
// as the upper bound and two keys are adjacent according to the comparator. // as the upper bound and two keys are adjacent according to the comparator.
TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
int iteration = 0; for (auto bfp_impl : BFP::kAllImpls) {
for (bool use_block_based_builder : {true, false}) { int using_full_builder = bfp_impl != BFP::kBlock;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.prefix_extractor.reset(NewCappedPrefixTransform(4)); options.prefix_extractor.reset(NewCappedPrefixTransform(4));
@ -1338,8 +1340,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl));
NewBloomFilterPolicy(10, use_block_based_builder));
table_options.index_shortening = BlockBasedTableOptions:: table_options.index_shortening = BlockBasedTableOptions::
IndexShorteningMode::kShortenSeparatorsAndSuccessor; IndexShorteningMode::kShortenSeparatorsAndSuccessor;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -1382,7 +1383,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); ASSERT_EQ(CountIter(iter, "abcdxx00"), 4);
// should check bloom filter since upper bound meets requirement // should check bloom filter since upper bound meets requirement
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration); 2 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
} }
{ {
@ -1396,7 +1397,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); ASSERT_EQ(CountIter(iter, "abcdxx01"), 4);
// should skip bloom filter since upper bound is too long // should skip bloom filter since upper bound is too long
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration); 2 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
} }
{ {
@ -1410,7 +1411,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// should check bloom filter since upper bound matches transformed seek // should check bloom filter since upper bound matches transformed seek
// key // key
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration * 2); 2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
} }
{ {
@ -1424,7 +1425,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0);
// should skip bloom filter since mismatch is found // should skip bloom filter since mismatch is found
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration * 2); 2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
} }
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}}));
@ -1438,7 +1439,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options)); std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abc"), 4); ASSERT_EQ(CountIter(iter, "abc"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration * 2); 2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
} }
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}})); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}}));
@ -1451,18 +1452,17 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options)); std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abc"), 0); ASSERT_EQ(CountIter(iter, "abc"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
3 + iteration * 2); 3 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
} }
iteration++;
} }
} }
// Create multiple SST files each with a different prefix_extractor config, // Create multiple SST files each with a different prefix_extractor config,
// verify iterators can read all SST files using the latest config. // verify iterators can read all SST files using the latest config.
TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
int iteration = 0; for (auto bfp_impl : BFP::kAllImpls) {
for (bool use_block_based_builder : {true, false}) { int using_full_builder = bfp_impl != BFP::kBlock;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.prefix_extractor.reset(NewFixedPrefixTransform(1));
@ -1470,8 +1470,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
options.statistics = CreateDBStatistics(); options.statistics = CreateDBStatistics();
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl));
NewBloomFilterPolicy(10, use_block_based_builder));
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options); DestroyAndReopen(options);
@ -1497,10 +1496,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options)); std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "foo"), 2); ASSERT_EQ(CountIter(iter, "foo"), 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
1 + iteration); 1 + using_full_builder);
ASSERT_EQ(CountIter(iter, "gpk"), 0); ASSERT_EQ(CountIter(iter, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
1 + iteration); 1 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
// second SST with capped:3 BF // second SST with capped:3 BF
@ -1514,13 +1513,13 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options)); std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + iteration * 2); 2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
// both counters are incremented because BF is "not changed" for 1 of the // both counters are incremented because BF is "not changed" for 1 of the
// 2 SST files, so filter is checked once and found no match. // 2 SST files, so filter is checked once and found no match.
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
3 + iteration * 2); 3 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
} }
@ -1539,24 +1538,24 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); ASSERT_EQ(CountIter(iter_tmp, "foo"), 9);
// the first and last BF are checked // the first and last BF are checked
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
4 + iteration * 3); 4 + using_full_builder * 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
// only last BF is checked and not found // only last BF is checked and not found
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
5 + iteration * 3); 5 + using_full_builder * 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
} }
// iter_old can only see the first SST, so checked plus 1 // iter_old can only see the first SST, so checked plus 1
ASSERT_EQ(CountIter(iter_old, "foo"), 4); ASSERT_EQ(CountIter(iter_old, "foo"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
6 + iteration * 3); 6 + using_full_builder * 3);
// iter was created after the first setoptions call so only full filter // iter was created after the first setoptions call so only full filter
// will check the filter // will check the filter
ASSERT_EQ(CountIter(iter, "foo"), 2); ASSERT_EQ(CountIter(iter, "foo"), 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
6 + iteration * 4); 6 + using_full_builder * 4);
{ {
// keys in all three SSTs are visible to iterator // keys in all three SSTs are visible to iterator
@ -1565,11 +1564,11 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options)); std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter_all, "foo"), 9); ASSERT_EQ(CountIter(iter_all, "foo"), 9);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
7 + iteration * 5); 7 + using_full_builder * 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
ASSERT_EQ(CountIter(iter_all, "gpk"), 0); ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
8 + iteration * 5); 8 + using_full_builder * 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
} }
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
@ -1581,15 +1580,14 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// all three SST are checked because the current options has the same as // all three SST are checked because the current options has the same as
// the remaining SST (capped:3) // the remaining SST (capped:3)
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
9 + iteration * 7); 9 + using_full_builder * 7);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
ASSERT_EQ(CountIter(iter_all, "gpk"), 0); ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
10 + iteration * 7); 10 + using_full_builder * 7);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4);
} }
// TODO(Zhongyi): Maybe also need to add Get calls to test point look up? // TODO(Zhongyi): Maybe also need to add Get calls to test point look up?
iteration++;
} }
} }
@ -1598,7 +1596,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// as expected // as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
int iteration = 0; int iteration = 0;
for (bool use_block_based_builder : {true, false}) { for (auto bfp_impl : BFP::kAllImpls) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.create_if_missing = true; options.create_if_missing = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.prefix_extractor.reset(NewFixedPrefixTransform(1));
@ -1607,8 +1605,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl));
NewBloomFilterPolicy(10, use_block_based_builder));
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options); CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options);
ReadOptions read_options; ReadOptions read_options;
@ -1657,8 +1654,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Verify it's possible to change prefix_extractor at runtime and iterators // Verify it's possible to change prefix_extractor at runtime and iterators
// behaves as expected // behaves as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
int iteration = 0; for (auto bfp_impl : BFP::kAllImpls) {
for (bool use_block_based_builder : {true, false}) {
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.prefix_extractor.reset(NewFixedPrefixTransform(1));
@ -1667,8 +1663,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset( table_options.filter_policy.reset(new BFP(10, bfp_impl));
NewBloomFilterPolicy(10, use_block_based_builder));
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options); DestroyAndReopen(options);
@ -1719,7 +1714,6 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
ASSERT_EQ(CountIter(iter_old, "abc"), 0); ASSERT_EQ(CountIter(iter_old, "abc"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
iteration++;
} }
} }

@ -138,8 +138,8 @@ class FilterPolicy {
// //
// bits_per_key: bits per key in bloom filter. A good value for bits_per_key // bits_per_key: bits per key in bloom filter. A good value for bits_per_key
// is 10, which yields a filter with ~ 1% false positive rate. // is 10, which yields a filter with ~ 1% false positive rate.
// use_block_based_builder: use block based filter rather than full filter. // use_block_based_builder: use deprecated block based filter (true) rather
// If you want to builder full filter, it needs to be set to false. // than full or partitioned filter (false).
// //
// Callers must delete the result after any database that is using the // Callers must delete the result after any database that is using the
// result has been closed. // result has been closed.

@ -240,7 +240,7 @@ class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester,
public testing::Test { public testing::Test {
public: public:
BlockBasedFilterBlockTest() BlockBasedFilterBlockTest()
: mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10)) {} : mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, true)) {}
}; };
TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) {

@ -20,9 +20,66 @@
namespace rocksdb { namespace rocksdb {
namespace {
typedef LegacyLocalityBloomImpl</*ExtraRotates*/ false> LegacyFullFilterImpl; typedef LegacyLocalityBloomImpl</*ExtraRotates*/ false> LegacyFullFilterImpl;
class BlockBasedFilterBlockBuilder;
class FullFilterBlockBuilder; class FullFilterBitsBuilder : public BuiltinFilterBitsBuilder {
public:
explicit FullFilterBitsBuilder(const int bits_per_key, const int num_probes);
// No Copy allowed
FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete;
void operator=(const FullFilterBitsBuilder&) = delete;
~FullFilterBitsBuilder() override;
void AddKey(const Slice& key) override;
// Create a filter that for hashes [0, n-1], the filter is allocated here
// When creating filter, it is ensured that
// total_bits = num_lines * CACHE_LINE_SIZE * 8
// dst len is >= 5, 1 for num_probes, 4 for num_lines
// Then total_bits = (len - 5) * 8, and cache_line_size could be calculated
// +----------------------------------------------------------------+
// | filter data with length total_bits/8 |
// +----------------------------------------------------------------+
// | |
// | ... |
// | |
// +----------------------------------------------------------------+
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
// +----------------------------------------------------------------+
Slice Finish(std::unique_ptr<const char[]>* buf) override;
int CalculateNumEntry(const uint32_t bytes) override;
uint32_t CalculateSpace(const int num_entry) override {
uint32_t dont_care1;
uint32_t dont_care2;
return CalculateSpace(num_entry, &dont_care1, &dont_care2);
}
private:
friend class FullFilterBlockTest_DuplicateEntries_Test;
int bits_per_key_;
int num_probes_;
std::vector<uint32_t> hash_entries_;
// Get totalbits that optimized for cpu cache line
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
// Reserve space for new filter
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Implementation-specific variant of public CalculateSpace
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Assuming single threaded access to this function.
void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
};
FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key, FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key,
const int num_probes) const int num_probes)
@ -41,8 +98,8 @@ FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key,
Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) { Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
uint32_t total_bits, num_lines; uint32_t total_bits, num_lines;
char* data = ReserveSpace(static_cast<int>(hash_entries_.size()), char* data = ReserveSpace(static_cast<int>(hash_entries_.size()), &total_bits,
&total_bits, &num_lines); &num_lines);
assert(data); assert(data);
if (total_bits != 0 && num_lines != 0) { if (total_bits != 0 && num_lines != 0) {
@ -106,13 +163,11 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) { int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
assert(bits_per_key_); assert(bits_per_key_);
assert(bytes > 0); assert(bytes > 0);
uint32_t dont_care1, dont_care2;
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1); int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
int low = 1; int low = 1;
int n = high; int n = high;
for (; n >= low; n--) { for (; n >= low; n--) {
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2); if (CalculateSpace(n) <= bytes) {
if (sz <= bytes) {
break; break;
} }
} }
@ -131,7 +186,6 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
folly::constexpr_log2(CACHE_LINE_SIZE)); folly::constexpr_log2(CACHE_LINE_SIZE));
} }
namespace {
class AlwaysTrueFilter : public FilterBitsReader { class AlwaysTrueFilter : public FilterBitsReader {
public: public:
bool MayMatch(const Slice&) override { return true; } bool MayMatch(const Slice&) override { return true; }
@ -196,21 +250,33 @@ class FullFilterBitsReader : public FilterBitsReader {
const uint32_t log2_cache_line_size_; const uint32_t log2_cache_line_size_;
}; };
} // namespace
const std::vector<BloomFilterPolicy::Impl> BloomFilterPolicy::kAllImpls = {
kFull,
kBlock,
};
// An implementation of filter policy BloomFilterPolicy::BloomFilterPolicy(int bits_per_key, Impl impl)
class BloomFilterPolicy : public FilterPolicy { : bits_per_key_(bits_per_key), impl_(impl) {
public: // We intentionally round down to reduce probing cost a little bit
explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder) num_probes_ = static_cast<int>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
: bits_per_key_(bits_per_key), if (num_probes_ < 1) num_probes_ = 1;
use_block_based_builder_(use_block_based_builder) { if (num_probes_ > 30) num_probes_ = 30;
initialize();
} }
~BloomFilterPolicy() override {} BloomFilterPolicy::~BloomFilterPolicy() {}
const char* Name() const override { return "rocksdb.BuiltinBloomFilter"; } const char* BloomFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter";
}
void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
std::string* dst) const {
// We should ideally only be using this deprecated interface for
// appropriately constructed BloomFilterPolicy
assert(impl_ == kBlock);
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
// Compute bloom filter size (in both bits and bytes) // Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * bits_per_key_); uint32_t bits = static_cast<uint32_t>(n * bits_per_key_);
@ -231,7 +297,8 @@ class BloomFilterPolicy : public FilterPolicy {
} }
} }
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
const Slice& bloom_filter) const {
const size_t len = bloom_filter.size(); const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) { if (len < 2 || len > 0xffffffffU) {
return false; return false;
@ -253,17 +320,18 @@ class BloomFilterPolicy : public FilterPolicy {
array); array);
} }
FilterBitsBuilder* GetFilterBitsBuilder() const override { FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const {
if (use_block_based_builder_) { if (impl_ == kBlock) {
return nullptr; return nullptr;
} } else {
return new FullFilterBitsBuilder(bits_per_key_, num_probes_); return new FullFilterBitsBuilder(bits_per_key_, num_probes_);
} }
}
// Read metadata to determine what kind of FilterBitsReader is needed // Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. // and return a new one.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader(
const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
if (len_with_meta <= 5) { if (len_with_meta <= 5) {
// filter is empty or broken. Treat like zero keys added. // filter is empty or broken. Treat like zero keys added.
@ -317,27 +385,13 @@ class BloomFilterPolicy : public FilterPolicy {
log2_cache_line_size); log2_cache_line_size);
} }
// If choose to use block based builder
bool UseBlockBasedBuilder() { return use_block_based_builder_; }
private:
int bits_per_key_;
int num_probes_;
const bool use_block_based_builder_;
void initialize() {
// We intentionally round down to reduce probing cost a little bit
num_probes_ = static_cast<int>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
if (num_probes_ < 1) num_probes_ = 1;
if (num_probes_ > 30) num_probes_ = 30;
}
};
} // namespace
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key, const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
bool use_block_based_builder) { bool use_block_based_builder) {
return new BloomFilterPolicy(bits_per_key, use_block_based_builder); if (use_block_based_builder) {
return new BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kBlock);
} else {
return new BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kFull);
}
} }
FilterPolicy::~FilterPolicy() { } FilterPolicy::~FilterPolicy() { }

@ -18,57 +18,61 @@ namespace rocksdb {
class Slice; class Slice;
class FullFilterBitsBuilder : public FilterBitsBuilder { // Exposes any extra information needed for testing built-in
// FilterBitsBuilders
class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
public: public:
explicit FullFilterBitsBuilder(const int bits_per_key, const int num_probes);
// No Copy allowed
FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete;
void operator=(const FullFilterBitsBuilder&) = delete;
~FullFilterBitsBuilder();
void AddKey(const Slice& key) override;
// Create a filter that for hashes [0, n-1], the filter is allocated here
// When creating filter, it is ensured that
// total_bits = num_lines * CACHE_LINE_SIZE * 8
// dst len is >= 5, 1 for num_probes, 4 for num_lines
// Then total_bits = (len - 5) * 8, and cache_line_size could be calculated
// +----------------------------------------------------------------+
// | filter data with length total_bits/8 |
// +----------------------------------------------------------------+
// | |
// | ... |
// | |
// +----------------------------------------------------------------+
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
// +----------------------------------------------------------------+
Slice Finish(std::unique_ptr<const char[]>* buf) override;
int CalculateNumEntry(const uint32_t bytes) override;
// Calculate number of bytes needed for a new filter, including // Calculate number of bytes needed for a new filter, including
// metadata. Passing the result to CalculateNumEntry should // metadata. Passing the result to CalculateNumEntry should
// return >= the num_entry passed in. // return >= the num_entry passed in.
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits, virtual uint32_t CalculateSpace(const int num_entry) = 0;
uint32_t* num_lines); };
private: // RocksDB built-in filter policy for Bloom or Bloom-like filters.
friend class FullFilterBlockTest_DuplicateEntries_Test; // This class is considered internal API and subject to change.
int bits_per_key_; // See NewBloomFilterPolicy.
int num_probes_; class BloomFilterPolicy : public FilterPolicy {
std::vector<uint32_t> hash_entries_; public:
// An internal marker for which Bloom filter implementation to use.
// This makes it easier for tests to track or to walk over the built-in
// set of Bloom filter policies. The only variance in BloomFilterPolicy
// by implementation is in GetFilterBitsBuilder(), so an enum is practical
// here vs. subclasses.
enum Impl {
// Implementation of Bloom filter for full and partitioned filters.
// Set to 0 in case of value confusion with bool use_block_based_builder
kFull = 0,
// Deprecated block-based Bloom filter implementation.
// Set to 1 in case of value confusion with bool use_block_based_builder
kBlock = 1,
};
static const std::vector<Impl> kAllImpls;
explicit BloomFilterPolicy(int bits_per_key, Impl impl);
// Get totalbits that optimized for cpu cache line ~BloomFilterPolicy() override;
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
// Reserve space for new filter const char* Name() const override;
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Assuming single threaded access to this function. // Deprecated block-based filter only
void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits); void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
// Deprecated block-based filter only
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override;
FilterBitsBuilder* GetFilterBitsBuilder() const override;
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
private:
int bits_per_key_;
int num_probes_;
// Selected implementation for building new SST filters
Impl impl_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -55,15 +55,15 @@ static int NextLength(int length) {
return length; return length;
} }
class BloomTest : public testing::Test { class BlockBasedBloomTest : public testing::Test {
private: private:
std::unique_ptr<const FilterPolicy> policy_; std::unique_ptr<const FilterPolicy> policy_;
std::string filter_; std::string filter_;
std::vector<std::string> keys_; std::vector<std::string> keys_;
public: public:
BloomTest() : policy_( BlockBasedBloomTest()
NewBloomFilterPolicy(FLAGS_bits_per_key)) {} : policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, true)) {}
void Reset() { void Reset() {
keys_.clear(); keys_.clear();
@ -72,7 +72,7 @@ class BloomTest : public testing::Test {
void ResetPolicy(const FilterPolicy* policy = nullptr) { void ResetPolicy(const FilterPolicy* policy = nullptr) {
if (policy == nullptr) { if (policy == nullptr) {
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key)); policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, true));
} else { } else {
policy_.reset(policy); policy_.reset(policy);
} }
@ -131,12 +131,12 @@ class BloomTest : public testing::Test {
} }
}; };
TEST_F(BloomTest, EmptyFilter) { TEST_F(BlockBasedBloomTest, EmptyFilter) {
ASSERT_TRUE(! Matches("hello")); ASSERT_TRUE(! Matches("hello"));
ASSERT_TRUE(! Matches("world")); ASSERT_TRUE(! Matches("world"));
} }
TEST_F(BloomTest, Small) { TEST_F(BlockBasedBloomTest, Small) {
Add("hello"); Add("hello");
Add("world"); Add("world");
ASSERT_TRUE(Matches("hello")); ASSERT_TRUE(Matches("hello"));
@ -145,7 +145,7 @@ TEST_F(BloomTest, Small) {
ASSERT_TRUE(! Matches("foo")); ASSERT_TRUE(! Matches("foo"));
} }
TEST_F(BloomTest, VaryingLengths) { TEST_F(BlockBasedBloomTest, VaryingLengths) {
char buffer[sizeof(int)]; char buffer[sizeof(int)];
// Count number of filters that significantly exceed the false positive rate // Count number of filters that significantly exceed the false positive rate
@ -186,45 +186,45 @@ TEST_F(BloomTest, VaryingLengths) {
// Ensure the implementation doesn't accidentally change in an // Ensure the implementation doesn't accidentally change in an
// incompatible way // incompatible way
TEST_F(BloomTest, Schema) { TEST_F(BlockBasedBloomTest, Schema) {
char buffer[sizeof(int)]; char buffer[sizeof(int)];
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5 ResetPolicy(NewBloomFilterPolicy(8, true)); // num_probes = 5
for (int key = 0; key < 87; key++) { for (int key = 0; key < 87; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
Build(); Build();
ASSERT_EQ(BloomHash(FilterData()), 3589896109U); ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6 ResetPolicy(NewBloomFilterPolicy(9, true)); // num_probes = 6
for (int key = 0; key < 87; key++) { for (int key = 0; key < 87; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
Build(); Build();
ASSERT_EQ(BloomHash(FilterData()), 969445585); ASSERT_EQ(BloomHash(FilterData()), 969445585);
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7 ResetPolicy(NewBloomFilterPolicy(11, true)); // num_probes = 7
for (int key = 0; key < 87; key++) { for (int key = 0; key < 87; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
Build(); Build();
ASSERT_EQ(BloomHash(FilterData()), 1694458207); ASSERT_EQ(BloomHash(FilterData()), 1694458207);
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6 ResetPolicy(NewBloomFilterPolicy(10, true)); // num_probes = 6
for (int key = 0; key < 87; key++) { for (int key = 0; key < 87; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
Build(); Build();
ASSERT_EQ(BloomHash(FilterData()), 2373646410U); ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
ResetPolicy(NewBloomFilterPolicy(10)); ResetPolicy(NewBloomFilterPolicy(10, true));
for (int key = 1; key < 87; key++) { for (int key = 1; key < 87; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
Build(); Build();
ASSERT_EQ(BloomHash(FilterData()), 1908442116); ASSERT_EQ(BloomHash(FilterData()), 1908442116);
ResetPolicy(NewBloomFilterPolicy(10)); ResetPolicy(NewBloomFilterPolicy(10, true));
for (int key = 1; key < 88; key++) { for (int key = 1; key < 88; key++) {
Add(Key(key, buffer)); Add(Key(key, buffer));
} }
@ -251,8 +251,9 @@ class FullBloomTest : public testing::Test {
Reset(); Reset();
} }
FullFilterBitsBuilder* GetFullFilterBitsBuilder() { BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get()); // Throws on bad cast
return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_.get());
} }
void Reset() { void Reset() {
@ -322,15 +323,12 @@ class FullBloomTest : public testing::Test {
}; };
TEST_F(FullBloomTest, FilterSize) { TEST_F(FullBloomTest, FilterSize) {
uint32_t dont_care1, dont_care2; auto bits_builder = GetBuiltinFilterBitsBuilder();
auto full_bits_builder = GetFullFilterBitsBuilder();
ASSERT_TRUE(full_bits_builder != nullptr);
for (int n = 1; n < 100; n++) { for (int n = 1; n < 100; n++) {
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2); auto space = bits_builder->CalculateSpace(n);
auto n2 = full_bits_builder->CalculateNumEntry(space); auto n2 = bits_builder->CalculateNumEntry(space);
ASSERT_GE(n2, n); ASSERT_GE(n2, n);
auto space2 = auto space2 = bits_builder->CalculateSpace(n2);
full_bits_builder->CalculateSpace(n2, &dont_care1, &dont_care2);
ASSERT_EQ(space, space2); ASSERT_EQ(space, space2);
} }
} }

Loading…
Cancel
Save