Refactor FilterPolicies toward Customizable (#9567)

Summary:
Some changes to make it easier to make FilterPolicy
customizable. Especially, create distinct classes for the different
testing-only and user-facing built-in FilterPolicy modes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9567

Test Plan:
tests updated, with no intended difference in functionality
tested. No difference in test performance seen as a result of moving to
string-based filter type configuration.

Reviewed By: mrambacher

Differential Revision: D34234694

Pulled By: pdillinger

fbshipit-source-id: 8a94931a9e04c3bcca863a4f524cfd064aaf0122
main
Peter Dillinger 3 years ago committed by Facebook GitHub Bot
parent a0c569ee1d
commit 8c681087c7
  1. 234
      db/db_bloom_filter_test.cc
  2. 28
      microbench/ribbon_bench.cc
  3. 64
      options/options_test.cc
  4. 10
      table/block_based/block_based_filter_block.cc
  5. 3
      table/block_based/block_based_table_builder.cc
  6. 439
      table/block_based/filter_policy.cc
  7. 262
      table/block_based/filter_policy_internal.h
  8. 9
      table/block_based/mock_block_based_table.h
  9. 108
      util/bloom_test.cc
  10. 17
      util/filter_bench.cc

@ -10,6 +10,7 @@
#include <cstring> #include <cstring>
#include <iomanip> #include <iomanip>
#include <sstream> #include <sstream>
#include <string>
#include "cache/cache_entry_roles.h" #include "cache/cache_entry_roles.h"
#include "cache/cache_reservation_manager.h" #include "cache/cache_reservation_manager.h"
@ -27,7 +28,17 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace { namespace {
using BFP = BloomFilterPolicy; std::shared_ptr<const FilterPolicy> Create(double bits_per_key,
const std::string& name) {
return BloomLikeFilterPolicy::Create(name, bits_per_key);
}
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName();
const std::string kDeprecatedBlock =
DeprecatedBlockBasedBloomFilterPolicy::kName();
const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName();
const std::string kStandard128Ribbon =
test::Standard128RibbonFilterPolicy::kName();
const std::string kAutoBloom = BloomFilterPolicy::kName();
} // namespace } // namespace
// DB tests related to bloom filter. // DB tests related to bloom filter.
@ -38,12 +49,13 @@ class DBBloomFilterTest : public DBTestBase {
: DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {} : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {}
}; };
class DBBloomFilterTestWithParam : public DBTestBase, class DBBloomFilterTestWithParam
public testing::WithParamInterface< : public DBTestBase,
std::tuple<BFP::Mode, bool, uint32_t>> { public testing::WithParamInterface<
std::tuple<std::string, bool, uint32_t>> {
// public testing::WithParamInterface<bool> { // public testing::WithParamInterface<bool> {
protected: protected:
BFP::Mode bfp_impl_; std::string bfp_impl_;
bool partition_filters_; bool partition_filters_;
uint32_t format_version_; uint32_t format_version_;
@ -90,7 +102,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
ReadOptions ropts; ReadOptions ropts;
std::string value; std::string value;
anon::OptionsOverride options_override; anon::OptionsOverride options_override;
options_override.filter_policy.reset(new BFP(20, bfp_impl_)); options_override.filter_policy = Create(20, bfp_impl_);
options_override.partition_filters = partition_filters_; options_override.partition_filters = partition_filters_;
options_override.metadata_block_size = 32; options_override.metadata_block_size = 32;
Options options = CurrentOptions(options_override); Options options = CurrentOptions(options_override);
@ -477,7 +489,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
// trigger reset of table_factory // trigger reset of table_factory
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.no_block_cache = true; table_options.no_block_cache = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl_)); table_options.filter_policy = Create(10, bfp_impl_);
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
if (partition_filters_) { if (partition_filters_) {
table_options.index_type = table_options.index_type =
@ -573,11 +585,10 @@ class AlwaysTrueBitsBuilder : public FilterBitsBuilder {
size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; } size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; }
}; };
class AlwaysTrueFilterPolicy : public BloomFilterPolicy { class AlwaysTrueFilterPolicy : public BloomLikeFilterPolicy {
public: public:
explicit AlwaysTrueFilterPolicy(bool skip) explicit AlwaysTrueFilterPolicy(bool skip)
: BloomFilterPolicy(/* ignored */ 10, /* ignored */ BFP::kAutoBloom), : BloomLikeFilterPolicy(/* ignored */ 10), skip_(skip) {}
skip_(skip) {}
FilterBitsBuilder* GetBuilderWithContext( FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override { const FilterBuildingContext&) const override {
@ -588,6 +599,10 @@ class AlwaysTrueFilterPolicy : public BloomFilterPolicy {
} }
} }
std::string GetId() const override {
return "rocksdb.test.AlwaysTrueFilterPolicy";
}
private: private:
bool skip_; bool skip_;
}; };
@ -636,7 +651,7 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
// Test 1: bits per key < 0.5 means skip filters -> no filter // Test 1: bits per key < 0.5 means skip filters -> no filter
// constructed or read. // constructed or read.
table_options.filter_policy.reset(new BFP(0.4, bfp_impl_)); table_options.filter_policy = Create(0.4, bfp_impl_);
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options); DestroyAndReopen(options);
PutAndGetFn(); PutAndGetFn();
@ -724,25 +739,23 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion, FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values( ::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false, std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion)));
std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam, FormatDef, DBBloomFilterTestWithParam,
::testing::Values( ::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false, std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion)));
std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam, FormatLatest, DBBloomFilterTestWithParam,
::testing::Values( ::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion), std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion), std::make_tuple(kAutoBloom, true, kLatestFormatVersion),
std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion))); std::make_tuple(kAutoBloom, false, kLatestFormatVersion)));
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
TEST_F(DBBloomFilterTest, BloomFilterRate) { TEST_F(DBBloomFilterTest, BloomFilterRate) {
@ -941,7 +954,7 @@ using FilterConstructionReserveMemoryHash = uint64_t;
class DBFilterConstructionReserveMemoryTestWithParam class DBFilterConstructionReserveMemoryTestWithParam
: public DBTestBase, : public DBTestBase,
public testing::WithParamInterface< public testing::WithParamInterface<
std::tuple<bool, BloomFilterPolicy::Mode, bool, bool>> { std::tuple<bool, std::string, bool, bool>> {
public: public:
DBFilterConstructionReserveMemoryTestWithParam() DBFilterConstructionReserveMemoryTestWithParam()
: DBTestBase("db_bloom_filter_tests", : DBTestBase("db_bloom_filter_tests",
@ -951,9 +964,8 @@ class DBFilterConstructionReserveMemoryTestWithParam
policy_(std::get<1>(GetParam())), policy_(std::get<1>(GetParam())),
partition_filters_(std::get<2>(GetParam())), partition_filters_(std::get<2>(GetParam())),
detect_filter_construct_corruption_(std::get<3>(GetParam())) { detect_filter_construct_corruption_(std::get<3>(GetParam())) {
if (!reserve_table_builder_memory_ || if (!reserve_table_builder_memory_ || policy_ == kDeprecatedBlock ||
policy_ == BloomFilterPolicy::Mode::kDeprecatedBlock || policy_ == kLegacyBloom) {
policy_ == BloomFilterPolicy::Mode::kLegacyBloom) {
// For these cases, we only interested in whether filter construction // For these cases, we only interested in whether filter construction
// cache resevation happens instead of its accuracy. Therefore we don't // cache resevation happens instead of its accuracy. Therefore we don't
// need many keys. // need many keys.
@ -966,7 +978,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
// two partitions. // two partitions.
num_key_ = 18 * CacheReservationManager::GetDummyEntrySize() / num_key_ = 18 * CacheReservationManager::GetDummyEntrySize() /
sizeof(FilterConstructionReserveMemoryHash); sizeof(FilterConstructionReserveMemoryHash);
} else if (policy_ == BloomFilterPolicy::Mode::kFastLocalBloom) { } else if (policy_ == kFastLocalBloom) {
// For Bloom Filter + FullFilter case, since we design the num_key_ to // For Bloom Filter + FullFilter case, since we design the num_key_ to
// make hash entry cache reservation be a multiple of dummy entries, the // make hash entry cache reservation be a multiple of dummy entries, the
// correct behavior of charging final filter on top of it will trigger at // correct behavior of charging final filter on top of it will trigger at
@ -995,7 +1007,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024; constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024;
table_options.reserve_table_builder_memory = reserve_table_builder_memory_; table_options.reserve_table_builder_memory = reserve_table_builder_memory_;
table_options.filter_policy.reset(new BloomFilterPolicy(10, policy_)); table_options.filter_policy = Create(10, policy_);
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
if (table_options.partition_filters) { if (table_options.partition_filters) {
table_options.index_type = table_options.index_type =
@ -1023,7 +1035,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; } bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; }
BloomFilterPolicy::Mode GetFilterPolicy() { return policy_; } std::string GetFilterPolicy() { return policy_; }
bool PartitionFilters() { return partition_filters_; } bool PartitionFilters() { return partition_filters_; }
@ -1035,7 +1047,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
private: private:
std::size_t num_key_; std::size_t num_key_;
bool reserve_table_builder_memory_; bool reserve_table_builder_memory_;
BloomFilterPolicy::Mode policy_; std::string policy_;
bool partition_filters_; bool partition_filters_;
std::shared_ptr<FilterConstructResPeakTrackingCache> cache_; std::shared_ptr<FilterConstructResPeakTrackingCache> cache_;
bool detect_filter_construct_corruption_; bool detect_filter_construct_corruption_;
@ -1043,32 +1055,20 @@ class DBFilterConstructionReserveMemoryTestWithParam
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam, BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam,
::testing::Values( ::testing::Values(std::make_tuple(false, kFastLocalBloom, false, false),
std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false,
false), std::make_tuple(true, kFastLocalBloom, false, false),
std::make_tuple(true, kFastLocalBloom, false, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false, std::make_tuple(true, kFastLocalBloom, true, false),
false), std::make_tuple(true, kFastLocalBloom, true, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false,
true), std::make_tuple(true, kStandard128Ribbon, false, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true, std::make_tuple(true, kStandard128Ribbon, false, true),
false), std::make_tuple(true, kStandard128Ribbon, true, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true, std::make_tuple(true, kStandard128Ribbon, true, true),
true),
std::make_tuple(true, kDeprecatedBlock, false, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, std::make_tuple(true, kLegacyBloom, false, false)));
false, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
false, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true,
true),
std::make_tuple(true, BloomFilterPolicy::Mode::kDeprecatedBlock, false,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kLegacyBloom, false,
false)));
// TODO: Speed up this test. // TODO: Speed up this test.
// The current test inserts many keys (on the scale of dummy entry size) // The current test inserts many keys (on the scale of dummy entry size)
@ -1126,7 +1126,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
ASSERT_OK(Flush()); ASSERT_OK(Flush());
bool reserve_table_builder_memory = ReserveTableBuilderMemory(); bool reserve_table_builder_memory = ReserveTableBuilderMemory();
BloomFilterPolicy::Mode policy = GetFilterPolicy(); std::string policy = GetFilterPolicy();
bool partition_filters = PartitionFilters(); bool partition_filters = PartitionFilters();
bool detect_filter_construct_corruption = bool detect_filter_construct_corruption =
table_options.detect_filter_construct_corruption; table_options.detect_filter_construct_corruption;
@ -1141,12 +1141,11 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
return; return;
} }
if (policy == BloomFilterPolicy::Mode::kDeprecatedBlock || if (policy == kDeprecatedBlock || policy == kLegacyBloom) {
policy == BloomFilterPolicy::Mode::kLegacyBloom) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0)
<< "There shouldn't be filter construction cache reservation as this " << "There shouldn't be filter construction cache reservation as this "
"feature does not support BloomFilterPolicy::Mode::kDeprecatedBlock " "feature does not support kDeprecatedBlock "
"nor BloomFilterPolicy::Mode::kLegacyBloom"; "nor kLegacyBloom";
return; return;
} }
@ -1162,17 +1161,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
const std::size_t predicted_hash_entries_cache_res_dummy_entry_num = const std::size_t predicted_hash_entries_cache_res_dummy_entry_num =
predicted_hash_entries_cache_res / kDummyEntrySize; predicted_hash_entries_cache_res / kDummyEntrySize;
const std::size_t predicted_final_filter_cache_res = const std::size_t predicted_final_filter_cache_res =
static_cast<std::size_t>(std::ceil( static_cast<std::size_t>(
1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 * std::ceil(1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 *
(policy == BloomFilterPolicy::Mode::kStandard128Ribbon ? 0.7 : 1))) * (policy == kStandard128Ribbon ? 0.7 : 1))) *
kDummyEntrySize; kDummyEntrySize;
const std::size_t predicted_banding_cache_res = const std::size_t predicted_banding_cache_res =
static_cast<std::size_t>( static_cast<std::size_t>(
std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) * std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) *
kDummyEntrySize; kDummyEntrySize;
if (policy == BloomFilterPolicy::Mode::kFastLocalBloom) { if (policy == kFastLocalBloom) {
/* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter /* kFastLocalBloom + FullFilter
* p0 * p0
* / \ * / \
* b / \ * b / \
@ -1186,13 +1185,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* multiple of dummy entries so that reservation for (p0 - b) * multiple of dummy entries so that reservation for (p0 - b)
* will trigger at least another dummy entry insertion. * will trigger at least another dummy entry insertion.
* *
* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter + * kFastLocalBloom + FullFilter +
* detect_filter_construct_corruption * detect_filter_construct_corruption
* The peak p0 stays the same as * The peak p0 stays the same as
* (BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter) but just lasts * (kFastLocalBloom + FullFilter) but just lasts
* longer since we release hash entries reservation later. * longer since we release hash entries reservation later.
* *
* BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter * kFastLocalBloom + PartitionedFilter
* p1 * p1
* / \ * / \
* p0 b'/ \ * p0 b'/ \
@ -1209,17 +1208,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* + parittioned final filter1 + parittioned final filter2 * + parittioned final filter1 + parittioned final filter2
* = hash entries + final filter * = hash entries + final filter
* *
* BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter + * kFastLocalBloom + PartitionedFilter +
* detect_filter_construct_corruption * detect_filter_construct_corruption
* The peak p0, p1 stay the same as * The peak p0, p1 stay the same as
* (BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter) but just * (kFastLocalBloom + PartitionedFilter) but just
* last longer since we release hash entries reservation later. * last longer since we release hash entries reservation later.
* *
*/ */
if (!partition_filters) { if (!partition_filters) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
<< "Filter construction cache reservation should have only 1 peak in " << "Filter construction cache reservation should have only 1 peak in "
"case: BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter"; "case: kFastLocalBloom + FullFilter";
std::size_t filter_construction_cache_res_peak = std::size_t filter_construction_cache_res_peak =
filter_construction_cache_res_peaks[0]; filter_construction_cache_res_peaks[0];
EXPECT_GT(filter_construction_cache_res_peak, EXPECT_GT(filter_construction_cache_res_peak,
@ -1239,7 +1238,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
} else { } else {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have multiple peaks " << "Filter construction cache reservation should have multiple peaks "
"in case: BloomFilterPolicy::Mode::kFastLocalBloom + " "in case: kFastLocalBloom + "
"PartitionedFilter"; "PartitionedFilter";
std::size_t predicted_filter_construction_cache_res_increments_sum = std::size_t predicted_filter_construction_cache_res_increments_sum =
predicted_hash_entries_cache_res + predicted_final_filter_cache_res; predicted_hash_entries_cache_res + predicted_final_filter_cache_res;
@ -1251,8 +1250,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
} }
} }
if (policy == BloomFilterPolicy::Mode::kStandard128Ribbon) { if (policy == kStandard128Ribbon) {
/* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter /* kStandard128Ribbon + FullFilter
* p0 * p0
* / \ p1 * / \ p1
* / \/\ * / \/\
@ -1266,7 +1265,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* will trigger at least another dummy entry insertion * will trigger at least another dummy entry insertion
* (or equivelantly to saying, creating another peak). * (or equivelantly to saying, creating another peak).
* *
* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter + * kStandard128Ribbon + FullFilter +
* detect_filter_construct_corruption * detect_filter_construct_corruption
* *
* new p0 * new p0
@ -1287,7 +1286,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* entries reserveration (like p0 - b' previously) until after final filter * entries reserveration (like p0 - b' previously) until after final filter
* creation and post-verification * creation and post-verification
* *
* BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter * kStandard128Ribbon + PartitionedFilter
* p3 * p3
* p0 /\ p4 * p0 /\ p4
* / \ p1 / \ /\ * / \ p1 / \ /\
@ -1306,7 +1305,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* + parittioned final filter1 + parittioned final filter2 * + parittioned final filter1 + parittioned final filter2
* = hash entries + banding + final filter * = hash entries + banding + final filter
* *
* BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter + * kStandard128Ribbon + PartitionedFilter +
* detect_filter_construct_corruption * detect_filter_construct_corruption
* *
* new p3 * new p3
@ -1347,7 +1346,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
if (!detect_filter_construct_corruption) { if (!detect_filter_construct_corruption) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2) EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have 2 peaks in " << "Filter construction cache reservation should have 2 peaks in "
"case: BloomFilterPolicy::Mode::kStandard128Ribbon + " "case: kStandard128Ribbon + "
"FullFilter. " "FullFilter. "
"The second peak is resulted from charging the final filter " "The second peak is resulted from charging the final filter "
"after " "after "
@ -1366,7 +1365,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
} else { } else {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
<< "Filter construction cache reservation should have 1 peaks in " << "Filter construction cache reservation should have 1 peaks in "
"case: BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter " "case: kStandard128Ribbon + FullFilter "
"+ detect_filter_construct_corruption. " "+ detect_filter_construct_corruption. "
"The previous second peak now disappears since we don't " "The previous second peak now disappears since we don't "
"decrease the hash entry reservation" "decrease the hash entry reservation"
@ -1388,13 +1387,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 3) EXPECT_GE(filter_construction_cache_res_peaks.size(), 3)
<< "Filter construction cache reservation should have more than 3 " << "Filter construction cache reservation should have more than 3 "
"peaks " "peaks "
"in case: BloomFilterPolicy::Mode::kStandard128Ribbon + " "in case: kStandard128Ribbon + "
"PartitionedFilter"; "PartitionedFilter";
} else { } else {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have more than 2 " << "Filter construction cache reservation should have more than 2 "
"peaks " "peaks "
"in case: BloomFilterPolicy::Mode::kStandard128Ribbon + " "in case: kStandard128Ribbon + "
"PartitionedFilter + detect_filter_construct_corruption"; "PartitionedFilter + detect_filter_construct_corruption";
} }
std::size_t predicted_filter_construction_cache_res_increments_sum = std::size_t predicted_filter_construction_cache_res_increments_sum =
@ -1412,8 +1411,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
class DBFilterConstructionCorruptionTestWithParam class DBFilterConstructionCorruptionTestWithParam
: public DBTestBase, : public DBTestBase,
public testing::WithParamInterface< public testing::WithParamInterface<
std::tuple<bool /* detect_filter_construct_corruption */, std::tuple<bool /* detect_filter_construct_corruption */, std::string,
BloomFilterPolicy::Mode, bool /* partition_filters */>> { bool /* partition_filters */>> {
public: public:
DBFilterConstructionCorruptionTestWithParam() DBFilterConstructionCorruptionTestWithParam()
: DBTestBase("db_bloom_filter_tests", : DBTestBase("db_bloom_filter_tests",
@ -1422,8 +1421,7 @@ class DBFilterConstructionCorruptionTestWithParam
BlockBasedTableOptions GetBlockBasedTableOptions() { BlockBasedTableOptions GetBlockBasedTableOptions() {
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.detect_filter_construct_corruption = std::get<0>(GetParam()); table_options.detect_filter_construct_corruption = std::get<0>(GetParam());
table_options.filter_policy.reset( table_options.filter_policy = Create(10, std::get<1>(GetParam()));
new BloomFilterPolicy(10, std::get<1>(GetParam())));
table_options.partition_filters = std::get<2>(GetParam()); table_options.partition_filters = std::get<2>(GetParam());
if (table_options.partition_filters) { if (table_options.partition_filters) {
table_options.index_type = table_options.index_type =
@ -1444,14 +1442,11 @@ class DBFilterConstructionCorruptionTestWithParam
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
DBFilterConstructionCorruptionTestWithParam, DBFilterConstructionCorruptionTestWithParam,
DBFilterConstructionCorruptionTestWithParam, DBFilterConstructionCorruptionTestWithParam,
::testing::Values( ::testing::Values(std::make_tuple(false, kFastLocalBloom, false),
std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false), std::make_tuple(true, kFastLocalBloom, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false), std::make_tuple(true, kFastLocalBloom, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true), std::make_tuple(true, kStandard128Ribbon, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, std::make_tuple(true, kStandard128Ribbon, true)));
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
true)));
TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) {
Options options = CurrentOptions(); Options options = CurrentOptions();
@ -2139,16 +2134,12 @@ INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer,
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
namespace { namespace {
namespace BFP2 { static const std::string kPlainTable = "test_PlainTableBloom";
// Extends BFP::Mode with option to use Plain table
using PseudoMode = int;
static constexpr PseudoMode kPlainTable = -1;
} // namespace BFP2
} // namespace } // namespace
class BloomStatsTestWithParam class BloomStatsTestWithParam
: public DBBloomFilterTest, : public DBBloomFilterTest,
public testing::WithParamInterface<std::tuple<BFP2::PseudoMode, bool>> { public testing::WithParamInterface<std::tuple<std::string, bool>> {
public: public:
BloomStatsTestWithParam() { BloomStatsTestWithParam() {
bfp_impl_ = std::get<0>(GetParam()); bfp_impl_ = std::get<0>(GetParam());
@ -2159,7 +2150,7 @@ class BloomStatsTestWithParam
ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4)); ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4));
options_.memtable_prefix_bloom_size_ratio = options_.memtable_prefix_bloom_size_ratio =
8.0 * 1024.0 / static_cast<double>(options_.write_buffer_size); 8.0 * 1024.0 / static_cast<double>(options_.write_buffer_size);
if (bfp_impl_ == BFP2::kPlainTable) { if (bfp_impl_ == kPlainTable) {
assert(!partition_filters_); // not supported in plain table assert(!partition_filters_); // not supported in plain table
PlainTableOptions table_options; PlainTableOptions table_options;
options_.table_factory.reset(NewPlainTableFactory(table_options)); options_.table_factory.reset(NewPlainTableFactory(table_options));
@ -2167,13 +2158,12 @@ class BloomStatsTestWithParam
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.hash_index_allow_collision = false; table_options.hash_index_allow_collision = false;
if (partition_filters_) { if (partition_filters_) {
assert(bfp_impl_ != BFP::kDeprecatedBlock); assert(bfp_impl_ != kDeprecatedBlock);
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
table_options.index_type = table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
} }
table_options.filter_policy.reset( table_options.filter_policy = Create(10, bfp_impl_);
new BFP(10, static_cast<BFP::Mode>(bfp_impl_)));
options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
} }
options_.env = env_; options_.env = env_;
@ -2191,7 +2181,7 @@ class BloomStatsTestWithParam
static void SetUpTestCase() {} static void SetUpTestCase() {}
static void TearDownTestCase() {} static void TearDownTestCase() {}
BFP2::PseudoMode bfp_impl_; std::string bfp_impl_;
bool partition_filters_; bool partition_filters_;
Options options_; Options options_;
}; };
@ -2295,7 +2285,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_EQ(value3, iter->value().ToString()); ASSERT_EQ(value3, iter->value().ToString());
// The seek doesn't check block-based bloom filter because last index key // The seek doesn't check block-based bloom filter because last index key
// starts with the same prefix we're seeking to. // starts with the same prefix we're seeking to.
uint64_t expected_hits = bfp_impl_ == BFP::kDeprecatedBlock ? 1 : 2; uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2;
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
iter->Seek(key2); iter->Seek(key2);
@ -2307,12 +2297,12 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
BloomStatsTestWithParam, BloomStatsTestWithParam, BloomStatsTestWithParam, BloomStatsTestWithParam,
::testing::Values(std::make_tuple(BFP::kDeprecatedBlock, false), ::testing::Values(std::make_tuple(kDeprecatedBlock, false),
std::make_tuple(BFP::kLegacyBloom, false), std::make_tuple(kLegacyBloom, false),
std::make_tuple(BFP::kLegacyBloom, true), std::make_tuple(kLegacyBloom, true),
std::make_tuple(BFP::kFastLocalBloom, false), std::make_tuple(kFastLocalBloom, false),
std::make_tuple(BFP::kFastLocalBloom, true), std::make_tuple(kFastLocalBloom, true),
std::make_tuple(BFP2::kPlainTable, false))); std::make_tuple(kPlainTable, false)));
namespace { namespace {
void PrefixScanInit(DBBloomFilterTest* dbtest) { void PrefixScanInit(DBBloomFilterTest* dbtest) {
@ -2620,8 +2610,8 @@ int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
// into the same string, or 2) the transformed seek key is of the same length // into the same string, or 2) the transformed seek key is of the same length
// as the upper bound and two keys are adjacent according to the comparator. // as the upper bound and two keys are adjacent according to the comparator.
TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
for (auto bfp_impl : BFP::kAllFixedImpls) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options; Options options;
options.create_if_missing = true; options.create_if_missing = true;
options.env = CurrentOptions().env; options.env = CurrentOptions().env;
@ -2631,7 +2621,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.filter_policy = Create(10, bfp_impl);
table_options.index_shortening = BlockBasedTableOptions:: table_options.index_shortening = BlockBasedTableOptions::
IndexShorteningMode::kShortenSeparatorsAndSuccessor; IndexShorteningMode::kShortenSeparatorsAndSuccessor;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -2752,8 +2742,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// Create multiple SST files each with a different prefix_extractor config, // Create multiple SST files each with a different prefix_extractor config,
// verify iterators can read all SST files using the latest config. // verify iterators can read all SST files using the latest config.
TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
for (auto bfp_impl : BFP::kAllFixedImpls) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options; Options options;
options.env = CurrentOptions().env; options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
@ -2762,7 +2752,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
options.statistics = CreateDBStatistics(); options.statistics = CreateDBStatistics();
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.filter_policy = Create(10, bfp_impl);
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options); DestroyAndReopen(options);
@ -2888,7 +2878,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// as expected // as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
int iteration = 0; int iteration = 0;
for (auto bfp_impl : BFP::kAllFixedImpls) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.create_if_missing = true; options.create_if_missing = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.prefix_extractor.reset(NewFixedPrefixTransform(1));
@ -2897,7 +2887,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.filter_policy = Create(10, bfp_impl);
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options); CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options);
ReadOptions read_options; ReadOptions read_options;
@ -2944,7 +2934,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Verify it's possible to change prefix_extractor at runtime and iterators // Verify it's possible to change prefix_extractor at runtime and iterators
// behaves as expected // behaves as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
for (auto bfp_impl : BFP::kAllFixedImpls) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
Options options; Options options;
options.env = CurrentOptions().env; options.env = CurrentOptions().env;
options.create_if_missing = true; options.create_if_missing = true;
@ -2954,7 +2944,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
// Enable prefix bloom for SST files // Enable prefix bloom for SST files
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true; table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.filter_policy = Create(10, bfp_impl);
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options); DestroyAndReopen(options);

@ -47,30 +47,28 @@ struct KeyMaker {
}; };
// benchmark arguments: // benchmark arguments:
// 0. filter mode // 0. filter impl (like filter_bench -impl)
// 1. filter config bits_per_key // 1. filter config bits_per_key
// 2. average data key length // 2. average data key length
// 3. data entry number // 3. data entry number
static void CustomArguments(benchmark::internal::Benchmark *b) { static void CustomArguments(benchmark::internal::Benchmark *b) {
for (int filter_mode : for (int filter_impl : {0, 2, 3}) {
{BloomFilterPolicy::kLegacyBloom, BloomFilterPolicy::kFastLocalBloom,
BloomFilterPolicy::kStandard128Ribbon}) {
for (int bits_per_key : {10, 20}) { for (int bits_per_key : {10, 20}) {
for (int key_len_avg : {10, 100}) { for (int key_len_avg : {10, 100}) {
for (int64_t entry_num : {1 << 10, 1 << 20}) { for (int64_t entry_num : {1 << 10, 1 << 20}) {
b->Args({filter_mode, bits_per_key, key_len_avg, entry_num}); b->Args({filter_impl, bits_per_key, key_len_avg, entry_num});
} }
} }
} }
} }
b->ArgNames({"filter_mode", "bits_per_key", "key_len_avg", "entry_num"}); b->ArgNames({"filter_impl", "bits_per_key", "key_len_avg", "entry_num"});
} }
static void FilterBuild(benchmark::State &state) { static void FilterBuild(benchmark::State &state) {
// setup data // setup data
auto filter = new BloomFilterPolicy( auto filter = BloomLikeFilterPolicy::Create(
static_cast<double>(state.range(1)), BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<BloomFilterPolicy::Mode>(state.range(0))); static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter); auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2)); KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner; std::unique_ptr<const char[]> owner;
@ -91,9 +89,9 @@ BENCHMARK(FilterBuild)->Apply(CustomArguments);
static void FilterQueryPositive(benchmark::State &state) { static void FilterQueryPositive(benchmark::State &state) {
// setup data // setup data
auto filter = new BloomFilterPolicy( auto filter = BloomLikeFilterPolicy::Create(
static_cast<double>(state.range(1)), BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<BloomFilterPolicy::Mode>(state.range(0))); static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter); auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2)); KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner; std::unique_ptr<const char[]> owner;
@ -119,9 +117,9 @@ BENCHMARK(FilterQueryPositive)->Apply(CustomArguments);
static void FilterQueryNegative(benchmark::State &state) { static void FilterQueryNegative(benchmark::State &state) {
// setup data // setup data
auto filter = new BloomFilterPolicy( auto filter = BloomLikeFilterPolicy::Create(
static_cast<double>(state.range(1)), BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<BloomFilterPolicy::Mode>(state.range(0))); static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter); auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2)); KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner; std::unique_ptr<const char[]> owner;

@ -882,7 +882,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Verify that only the lower 32bits are stored in // Verify that only the lower 32bits are stored in
// new_opt.read_amp_bytes_per_bit. // new_opt.read_amp_bytes_per_bit.
EXPECT_EQ(1U, new_opt.read_amp_bytes_per_bit); EXPECT_EQ(1U, new_opt.read_amp_bytes_per_bit);
@ -936,7 +935,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// use_block_based_builder=true now ignored in public API (same as false) // use_block_based_builder=true now ignored in public API (same as false)
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
@ -944,82 +942,67 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Back door way of enabling deprecated block-based Bloom // Back door way of enabling deprecated block-based Bloom
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, config_options, table_opt,
"filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4", "filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4",
&new_opt)); &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); auto builtin =
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); // Only whole bits used dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kDeprecatedBlock); EXPECT_EQ(builtin->GetId(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:4");
// Test configuring using other internal names // Test configuring using other internal names
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, config_options, table_opt,
"filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt)); "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); builtin =
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 3); // Only whole bits used dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kLegacyBloom); EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3");
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, config_options, table_opt,
"filter_policy=rocksdb.internal.FastLocalBloomFilter:1.234", &new_opt)); "filter_policy=rocksdb.internal.FastLocalBloomFilter:1.234", &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); builtin =
EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234); dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom); EXPECT_EQ(builtin->GetId(), "rocksdb.internal.FastLocalBloomFilter:1.234");
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, config_options, table_opt,
"filter_policy=rocksdb.internal.Standard128RibbonFilter:1.234", "filter_policy=rocksdb.internal.Standard128RibbonFilter:1.234",
&new_opt)); &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); builtin =
EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234); dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); EXPECT_EQ(builtin->GetId(), "rocksdb.internal.Standard128RibbonFilter:1.234");
// Ribbon filter policy (no Bloom hybrid) // Ribbon filter policy (no Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;", config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;",
&new_opt)); &new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr); ASSERT_TRUE(new_opt.filter_policy != nullptr);
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); auto rfp =
EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678); dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); EXPECT_EQ(rfp->GetMillibitsPerKey(), 5678);
EXPECT_EQ(rfp->GetBloomBeforeLevel(), -1);
// Ribbon filter policy (default Bloom hybrid) // Ribbon filter policy (default Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789;", config_options, table_opt, "filter_policy=ribbonfilter:6.789;",
&new_opt)); &new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr); ASSERT_TRUE(new_opt.filter_policy != nullptr);
auto ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>( rfp = dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
new_opt.filter_policy.get()); EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 0); EXPECT_EQ(rfp->GetBloomBeforeLevel(), 0);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
// Ribbon filter policy (custom Bloom hybrid) // Ribbon filter policy (custom Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;", config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;",
&new_opt)); &new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr); ASSERT_TRUE(new_opt.filter_policy != nullptr);
ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>( rfp = dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
new_opt.filter_policy.get()); EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 5); EXPECT_EQ(rfp->GetBloomBeforeLevel(), 5);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
// Check block cache options are overwritten when specified // Check block cache options are overwritten when specified
// in new format as a struct. // in new format as a struct.
@ -2877,7 +2860,6 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Check block cache options are overwritten when specified // Check block cache options are overwritten when specified
// in new format as a struct. // in new format as a struct.

@ -15,6 +15,7 @@
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
#include "util/cast_util.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/string_util.h" #include "util/string_util.h"
@ -157,9 +158,9 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() {
// Generate filter for current set of keys and append to result_. // Generate filter for current set of keys and append to result_.
filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
BloomFilterPolicy::CreateFilter(tmp_entries_.data(), DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
static_cast<int>(num_entries), bits_per_key_, tmp_entries_.data(), static_cast<int>(num_entries), bits_per_key_,
&result_); &result_);
tmp_entries_.clear(); tmp_entries_.clear();
entries_.clear(); entries_.clear();
@ -283,7 +284,8 @@ bool BlockBasedFilterBlockReader::MayMatch(
assert(table()); assert(table());
assert(table()->get_rep()); assert(table()->get_rep());
const bool may_match = BloomFilterPolicy::KeyMayMatch(entry, filter); const bool may_match =
DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter);
if (may_match) { if (may_match) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1); PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true; return true;

@ -82,7 +82,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
} else { } else {
// Check for backdoor deprecated block-based bloom config // Check for backdoor deprecated block-based bloom config
size_t starting_est = filter_bits_builder->EstimateEntriesAdded(); size_t starting_est = filter_bits_builder->EstimateEntriesAdded();
constexpr auto kSecretStart = BloomFilterPolicy::kSecretBitsPerKeyStart; constexpr auto kSecretStart =
DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart;
if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) { if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) {
int bits_per_key = static_cast<int>(starting_est - kSecretStart); int bits_per_key = static_cast<int>(starting_est - kSecretStart);
delete filter_bits_builder; delete filter_bits_builder;

@ -10,6 +10,7 @@
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include <array> #include <array>
#include <climits>
#include <cstring> #include <cstring>
#include <deque> #include <deque>
#include <limits> #include <limits>
@ -18,6 +19,8 @@
#include "cache/cache_entry_roles.h" #include "cache/cache_entry_roles.h"
#include "cache/cache_reservation_manager.h" #include "cache/cache_reservation_manager.h"
#include "logging/logging.h" #include "logging/logging.h"
#include "port/lang.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
@ -29,6 +32,7 @@
#include "util/hash.h" #include "util/hash.h"
#include "util/ribbon_config.h" #include "util/ribbon_config.h"
#include "util/ribbon_impl.h" #include "util/ribbon_impl.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -1307,21 +1311,8 @@ Status XXPH3FilterBitsBuilder::MaybePostVerify(const Slice& filter_content) {
} }
} // namespace } // namespace
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllFixedImpls = { BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key)
kLegacyBloom, : warned_(false), aggregate_rounding_balance_(0) {
kDeprecatedBlock,
kFastLocalBloom,
kStandard128Ribbon,
};
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
kDeprecatedBlock,
kAutoBloom,
kStandard128Ribbon,
};
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode), warned_(false), aggregate_rounding_balance_(0) {
// Sanitize bits_per_key // Sanitize bits_per_key
if (bits_per_key < 0.5) { if (bits_per_key < 0.5) {
// Round down to no filter // Round down to no filter
@ -1353,14 +1344,48 @@ BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000; whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
} }
BloomFilterPolicy::~BloomFilterPolicy() {} BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {}
const char* BuiltinFilterPolicy::Name() const { const char* BuiltinFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter"; return "rocksdb.BuiltinBloomFilter";
} }
void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key, const char* DeprecatedBlockBasedBloomFilterPolicy::kName() {
std::string* dst) { return "rocksdb.internal.DeprecatedBlockBasedBloomFilter";
}
std::string DeprecatedBlockBasedBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy(
double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext&) const {
if (GetWholeBitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded()
struct B : public FilterBitsBuilder {
explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {}
size_t est;
size_t EstimateEntriesAdded() override { return est; }
void AddKey(const Slice&) override {}
using FilterBitsBuilder::Finish; // FIXME
Slice Finish(std::unique_ptr<const char[]>*) override { return Slice(); }
size_t ApproximateNumEntries(size_t) override { return 0; }
};
return new B(GetWholeBitsPerKey());
}
void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys,
int n,
int bits_per_key,
std::string* dst) {
// Compute bloom filter size (in both bits and bytes) // Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * bits_per_key); uint32_t bits = static_cast<uint32_t>(n * bits_per_key);
@ -1383,8 +1408,8 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key,
} }
} }
bool BloomFilterPolicy::KeyMayMatch(const Slice& key, bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(
const Slice& bloom_filter) { const Slice& key, const Slice& bloom_filter) {
const size_t len = bloom_filter.size(); const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) { if (len < 2 || len > 0xffffffffU) {
return false; return false;
@ -1406,13 +1431,31 @@ bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
array); array);
} }
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const { const FilterBuildingContext& context) const {
if (millibits_per_key_ == 0) { if (GetMillibitsPerKey() == 0) {
// "No filter" special case // "No filter" special case
return nullptr; return nullptr;
} else if (context.table_options.format_version < 5) {
return GetLegacyBloomBuilderWithContext(context);
} else {
return GetFastLocalBloomBuilderWithContext(context);
} }
Mode cur = mode_; }
const char* BloomFilterPolicy::kName() { return "bloomfilter"; }
std::string BloomFilterPolicy::GetId() const {
// Including ":false" for better forward-compatibility with 6.29 and earlier
// which required a boolean `use_block_based_builder` parameter
return kName() + GetBitsPerKeySuffix() + ":false";
}
FilterBitsBuilder* BloomLikeFilterPolicy::GetFastLocalBloomBuilderWithContext(
const FilterBuildingContext& context) const {
bool offm = context.table_options.optimize_filters_for_memory; bool offm = context.table_options.optimize_filters_for_memory;
bool reserve_filter_construction_mem = bool reserve_filter_construction_mem =
(context.table_options.reserve_table_builder_memory && (context.table_options.reserve_table_builder_memory &&
@ -1422,80 +1465,73 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
cache_res_mgr = std::make_shared<CacheReservationManager>( cache_res_mgr = std::make_shared<CacheReservationManager>(
context.table_options.block_cache); context.table_options.block_cache);
} }
// Unusual code construction so that we can have just
// one exhaustive switch without (risky) recursion
for (int i = 0; i < 2; ++i) {
switch (cur) {
case kAutoBloom:
if (context.table_options.format_version < 5) {
cur = kLegacyBloom;
} else {
cur = kFastLocalBloom;
}
break;
case kDeprecatedBlock: {
if (context.info_log && !warned_.load(std::memory_order_relaxed)) {
warned_ = true;
ROCKS_LOG_WARN(context.info_log,
"Using deprecated block-based Bloom filter is "
"inefficient (%d bits per key).",
whole_bits_per_key_);
}
// Internal contract: returns a new fake builder that encodes bits per
// key into a special value from EstimateEntriesAdded()
struct B : public FilterBitsBuilder {
explicit B(int bits_per_key)
: est(kSecretBitsPerKeyStart + bits_per_key) {}
size_t est;
size_t EstimateEntriesAdded() override { return est; }
void AddKey(const Slice&) override {}
using FilterBitsBuilder::Finish; // FIXME
Slice Finish(std::unique_ptr<const char[]>*) override {
return Slice();
}
size_t ApproximateNumEntries(size_t) override { return 0; }
};
return new B(GetWholeBitsPerKey());
}
case kFastLocalBloom:
return new FastLocalBloomBitsBuilder( return new FastLocalBloomBitsBuilder(
millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr, millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr,
cache_res_mgr, cache_res_mgr,
context.table_options.detect_filter_construct_corruption); context.table_options.detect_filter_construct_corruption);
case kLegacyBloom: }
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) { FilterBitsBuilder* BloomLikeFilterPolicy::GetLegacyBloomBuilderWithContext(
warned_ = true; const FilterBuildingContext& context) const {
const char* adjective; if (whole_bits_per_key_ >= 14 && context.info_log &&
if (whole_bits_per_key_ >= 20) { !warned_.load(std::memory_order_relaxed)) {
adjective = "Dramatic"; warned_ = true;
} else { const char* adjective;
adjective = "Significant"; if (whole_bits_per_key_ >= 20) {
} adjective = "Dramatic";
// For more details, see } else {
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter adjective = "Significant";
ROCKS_LOG_WARN( }
context.info_log, // For more details, see
"Using legacy Bloom filter with high (%d) bits/key. " // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
"%s filter space and/or accuracy improvement is available " ROCKS_LOG_WARN(context.info_log,
"with format_version>=5.", "Using legacy Bloom filter with high (%d) bits/key. "
whole_bits_per_key_, adjective); "%s filter space and/or accuracy improvement is available "
} "with format_version>=5.",
return new LegacyBloomBitsBuilder(whole_bits_per_key_, whole_bits_per_key_, adjective);
context.info_log); }
case kStandard128Ribbon: return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log);
return new Standard128RibbonBitsBuilder( }
desired_one_in_fp_rate_, millibits_per_key_,
offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr, FilterBitsBuilder*
context.table_options.detect_filter_construct_corruption, BloomLikeFilterPolicy::GetStandard128RibbonBuilderWithContext(
context.info_log); const FilterBuildingContext& context) const {
// FIXME: code duplication with GetFastLocalBloomBuilderWithContext
bool offm = context.table_options.optimize_filters_for_memory;
bool reserve_filter_construction_mem =
(context.table_options.reserve_table_builder_memory &&
context.table_options.block_cache);
std::shared_ptr<CacheReservationManager> cache_res_mgr;
if (reserve_filter_construction_mem) {
cache_res_mgr = std::make_shared<CacheReservationManager>(
context.table_options.block_cache);
}
return new Standard128RibbonBitsBuilder(
desired_one_in_fp_rate_, millibits_per_key_,
offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr,
context.table_options.detect_filter_construct_corruption,
context.info_log);
}
std::string BloomLikeFilterPolicy::GetBitsPerKeySuffix() const {
std::string rv = ":" + ROCKSDB_NAMESPACE::ToString(millibits_per_key_ / 1000);
int frac = millibits_per_key_ % 1000;
if (frac > 0) {
rv.push_back('.');
rv.push_back(static_cast<char>('0' + (frac / 100)));
frac %= 100;
if (frac > 0) {
rv.push_back(static_cast<char>('0' + (frac / 10)));
frac %= 10;
if (frac > 0) {
rv.push_back(static_cast<char>('0' + frac));
}
} }
} }
assert(false); return rv;
return nullptr; // something legal
} }
FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext( FilterBitsBuilder* BuiltinFilterPolicy::GetBuilderFromContext(
const FilterBuildingContext& context) { const FilterBuildingContext& context) {
if (context.table_options.filter_policy) { if (context.table_options.filter_policy) {
return context.table_options.filter_policy->GetBuilderWithContext(context); return context.table_options.filter_policy->GetBuilderWithContext(context);
@ -1504,6 +1540,62 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
} }
} }
// For testing only, but always constructable with internal names
namespace test {
const char* LegacyBloomFilterPolicy::kName() {
return "rocksdb.internal.LegacyBloomFilter";
}
std::string LegacyBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* LegacyBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetLegacyBloomBuilderWithContext(context);
}
const char* FastLocalBloomFilterPolicy::kName() {
return "rocksdb.internal.FastLocalBloomFilter";
}
std::string FastLocalBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* FastLocalBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetFastLocalBloomBuilderWithContext(context);
}
const char* Standard128RibbonFilterPolicy::kName() {
return "rocksdb.internal.Standard128RibbonFilter";
}
std::string Standard128RibbonFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* Standard128RibbonFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetStandard128RibbonBuilderWithContext(context);
}
} // namespace test
BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader( BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader(
const Slice& contents) { const Slice& contents) {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
@ -1679,70 +1771,58 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
bool /*use_block_based_builder*/) { bool /*use_block_based_builder*/) {
// NOTE: use_block_based_builder now ignored so block-based filter is no // NOTE: use_block_based_builder now ignored so block-based filter is no
// longer accessible in public API. // longer accessible in public API.
BloomFilterPolicy::Mode m = BloomFilterPolicy::kAutoBloom; return new BloomFilterPolicy(bits_per_key);
assert(std::find(BloomFilterPolicy::kAllUserModes.begin(),
BloomFilterPolicy::kAllUserModes.end(),
m) != BloomFilterPolicy::kAllUserModes.end());
return new BloomFilterPolicy(bits_per_key, m);
} }
// Chooses between two filter policies based on LSM level, but RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
// only for Level and Universal compaction styles. Flush is treated int bloom_before_level)
// as level -1. Policy b is considered fallback / primary policy. : BloomLikeFilterPolicy(bloom_equivalent_bits_per_key),
LevelThresholdFilterPolicy::LevelThresholdFilterPolicy( bloom_before_level_(bloom_before_level) {}
std::unique_ptr<const FilterPolicy>&& a,
std::unique_ptr<const FilterPolicy>&& b, int starting_level_for_b)
: policy_a_(std::move(a)),
policy_b_(std::move(b)),
starting_level_for_b_(starting_level_for_b) {
// Don't use this wrapper class if you were going to set to -1
assert(starting_level_for_b_ >= 0);
}
FilterBitsBuilder* LevelThresholdFilterPolicy::GetBuilderWithContext( FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const { const FilterBuildingContext& context) const {
// Treat unknown same as bottommost
int levelish = INT_MAX;
switch (context.compaction_style) { switch (context.compaction_style) {
case kCompactionStyleLevel: case kCompactionStyleLevel:
case kCompactionStyleUniversal: { case kCompactionStyleUniversal: {
int levelish;
if (context.reason == TableFileCreationReason::kFlush) { if (context.reason == TableFileCreationReason::kFlush) {
// Treat flush as level -1 // Treat flush as level -1
assert(context.level_at_creation == 0); assert(context.level_at_creation == 0);
levelish = -1; levelish = -1;
} else if (context.level_at_creation == -1) { } else if (context.level_at_creation == -1) {
// Unknown level // Unknown level
// Policy b considered fallback / primary assert(levelish == INT_MAX);
return policy_b_->GetBuilderWithContext(context);
} else { } else {
levelish = context.level_at_creation; levelish = context.level_at_creation;
} }
if (levelish >= starting_level_for_b_) { break;
return policy_b_->GetBuilderWithContext(context);
} else {
return policy_a_->GetBuilderWithContext(context);
}
} }
case kCompactionStyleFIFO: case kCompactionStyleFIFO:
case kCompactionStyleNone: case kCompactionStyleNone:
// Treat as bottommost
assert(levelish == INT_MAX);
break; break;
} }
// Policy b considered fallback / primary if (levelish < bloom_before_level_) {
return policy_b_->GetBuilderWithContext(context); return GetFastLocalBloomBuilderWithContext(context);
} else {
return GetStandard128RibbonBuilderWithContext(context);
}
}
const char* RibbonFilterPolicy::kName() { return "ribbonfilter"; }
std::string RibbonFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix() + ":" +
ROCKSDB_NAMESPACE::ToString(bloom_before_level_);
} }
const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level) { int bloom_before_level) {
std::unique_ptr<const FilterPolicy> ribbon_only{new BloomFilterPolicy( return new RibbonFilterPolicy(bloom_equivalent_bits_per_key,
bloom_equivalent_bits_per_key, BloomFilterPolicy::kStandard128Ribbon)}; bloom_before_level);
if (bloom_before_level > -1) {
// Could also use Bloom policy
std::unique_ptr<const FilterPolicy> bloom_only{new BloomFilterPolicy(
bloom_equivalent_bits_per_key, BloomFilterPolicy::kFastLocalBloom)};
return new LevelThresholdFilterPolicy(
std::move(bloom_only), std::move(ribbon_only), bloom_before_level);
} else {
return ribbon_only.release();
}
} }
FilterBuildingContext::FilterBuildingContext( FilterBuildingContext::FilterBuildingContext(
@ -1751,55 +1831,84 @@ FilterBuildingContext::FilterBuildingContext(
FilterPolicy::~FilterPolicy() { } FilterPolicy::~FilterPolicy() { }
std::shared_ptr<const FilterPolicy> BloomLikeFilterPolicy::Create(
const std::string& name, double bits_per_key) {
if (name == test::LegacyBloomFilterPolicy::kName()) {
return std::make_shared<test::LegacyBloomFilterPolicy>(bits_per_key);
} else if (name == test::FastLocalBloomFilterPolicy::kName()) {
return std::make_shared<test::FastLocalBloomFilterPolicy>(bits_per_key);
} else if (name == test::Standard128RibbonFilterPolicy::kName()) {
return std::make_shared<test::Standard128RibbonFilterPolicy>(bits_per_key);
} else if (name == DeprecatedBlockBasedBloomFilterPolicy::kName()) {
return std::make_shared<DeprecatedBlockBasedBloomFilterPolicy>(
bits_per_key);
} else if (name == BloomFilterPolicy::kName()) {
// For testing
return std::make_shared<BloomFilterPolicy>(bits_per_key);
} else if (name == RibbonFilterPolicy::kName()) {
// For testing
return std::make_shared<RibbonFilterPolicy>(bits_per_key,
/*bloom_before_level*/ 0);
} else {
return nullptr;
}
}
Status FilterPolicy::CreateFromString( Status FilterPolicy::CreateFromString(
const ConfigOptions& /*options*/, const std::string& value, const ConfigOptions& /*options*/, const std::string& value,
std::shared_ptr<const FilterPolicy>* policy) { std::shared_ptr<const FilterPolicy>* policy) {
const std::string kBloomName = "bloomfilter:";
const std::string kRibbonName = "ribbonfilter:";
if (value == kNullptrString) { if (value == kNullptrString) {
policy->reset(); policy->reset();
return Status::OK();
} else if (value == "rocksdb.BuiltinBloomFilter") { } else if (value == "rocksdb.BuiltinBloomFilter") {
*policy = std::make_shared<BuiltinFilterPolicy>(); *policy = std::make_shared<ReadOnlyBuiltinFilterPolicy>();
} else { return Status::OK();
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
const std::vector<std::string> vals = StringSplit(value, ':'); const std::vector<std::string> vals = StringSplit(value, ':');
if (vals.size() < 2) { if (vals.size() < 2) {
return Status::NotFound("Invalid filter policy name ", value); return Status::NotFound("Invalid filter policy name ", value);
}
const std::string& name = vals[0];
double bits_per_key = ParseDouble(trim(vals[1]));
if (name == BloomFilterPolicy::kName()) {
bool use_block_based_builder = false;
if (vals.size() > 2) {
use_block_based_builder =
ParseBoolean("use_block_based_builder", trim(vals[2]));
} }
const std::string& name = vals[0]; policy->reset(NewBloomFilterPolicy(bits_per_key, use_block_based_builder));
double bits_per_key = ParseDouble(trim(vals[1])); } else if (name == RibbonFilterPolicy::kName()) {
if (name == "bloomfilter") { // TODO: constants for names int bloom_before_level;
// NOTE: ignoring obsolete bool for "use_block_based_builder" if (vals.size() < 3) {
policy->reset(NewBloomFilterPolicy(bits_per_key)); bloom_before_level = 0;
} else if (name == "ribbonfilter") {
int bloom_before_level;
if (vals.size() < 3) {
bloom_before_level = 0;
} else {
bloom_before_level = ParseInt(trim(vals[2]));
}
policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key,
bloom_before_level));
} else if (name == "rocksdb.internal.DeprecatedBlockBasedBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kDeprecatedBlock);
} else if (name == "rocksdb.internal.LegacyBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kLegacyBloom);
} else if (name == "rocksdb.internal.FastLocalBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kFastLocalBloom);
} else if (name == "rocksdb.internal.Standard128RibbonFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kStandard128Ribbon);
} else { } else {
return Status::NotFound("Invalid filter policy name ", value); bloom_before_level = ParseInt(trim(vals[2]));
} }
policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key,
bloom_before_level));
} else {
*policy = BloomLikeFilterPolicy::Create(name, bits_per_key);
}
if (*policy) {
return Status::OK();
} else {
return Status::NotFound("Invalid filter policy name ", value);
}
#else #else
return Status::NotSupported("Cannot load filter policy in LITE mode ", return Status::NotSupported("Cannot load filter policy in LITE mode ", value);
value);
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
}
return Status::OK();
} }
const std::vector<std::string>& BloomLikeFilterPolicy::GetAllFixedImpls() {
STATIC_AVOID_DESTRUCTION(std::vector<std::string>, impls){
// Match filter_bench -impl=x ordering
test::LegacyBloomFilterPolicy::kName(),
DeprecatedBlockBasedBloomFilterPolicy::kName(),
test::FastLocalBloomFilterPolicy::kName(),
test::Standard128RibbonFilterPolicy::kName(),
};
return impls;
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -46,31 +46,41 @@ class BuiltinFilterBitsReader : public FilterBitsReader {
virtual bool HashMayMatch(const uint64_t /* h */) { return true; } virtual bool HashMayMatch(const uint64_t /* h */) { return true; }
}; };
// Base class for RocksDB built-in filter policies. This can read all // Base class for RocksDB built-in filter policies. This provides the
// kinds of built-in filters (for backward compatibility with old // ability to read all kinds of built-in filters (so that old filters can
// OPTIONS files) but does not build filters, so new SST files generated // be used even when you change between built-in policies).
// under the policy will get no filters (like nullptr FilterPolicy).
// This class is considered internal API and subject to change.
class BuiltinFilterPolicy : public FilterPolicy { class BuiltinFilterPolicy : public FilterPolicy {
public: public: // overrides
static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
const Slice& contents);
// Shared name because any built-in policy can read filters from // Shared name because any built-in policy can read filters from
// any other // any other
// FIXME when making filter policies Configurable. For now, this
// is still rocksdb.BuiltinBloomFilter
const char* Name() const override; const char* Name() const override;
// Convert to a string understood by FilterPolicy::CreateFromString
virtual std::string GetId() const = 0;
// Read metadata to determine what kind of FilterBitsReader is needed // Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data // and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl // generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter. // chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
// Does not write filters. public: // new
FilterBitsBuilder* GetBuilderWithContext( // An internal function for the implementation of
const FilterBuildingContext&) const override { // BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance
return nullptr; // or working around potential virtual overrides.
} static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
const Slice& contents);
// Returns a new FilterBitsBuilder from the filter_policy in
// table_options of a context, or nullptr if not applicable.
// (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
protected:
// Deprecated block-based filter only (no longer in public API)
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const;
private: private:
// For Bloom filter implementation(s) (except deprecated block-based filter) // For Bloom filter implementation(s) (except deprecated block-based filter)
@ -80,85 +90,58 @@ class BuiltinFilterPolicy : public FilterPolicy {
static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents); static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents);
}; };
// RocksDB built-in filter policy for Bloom or Bloom-like filters including // A "read only" filter policy used for backward compatibility with old
// Ribbon filters. // OPTIONS files, which did not specifying a Bloom configuration, just
// "rocksdb.BuiltinBloomFilter". Although this can read existing filters,
// this policy does not build new filters, so new SST files generated
// under the policy will get no filters (like nullptr FilterPolicy).
// This class is considered internal API and subject to change. // This class is considered internal API and subject to change.
// See NewBloomFilterPolicy and NewRibbonFilterPolicy. class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy {
class BloomFilterPolicy : public BuiltinFilterPolicy {
public: public:
// An internal marker for operating modes of BloomFilterPolicy, in terms // Convert to a string understood by FilterPolicy::CreateFromString
// of selecting an implementation. This makes it easier for tests to track virtual std::string GetId() const override { return Name(); }
// or to walk over the built-in set of Bloom filter implementations. The
// only variance in BloomFilterPolicy by mode/implementation is in
// GetFilterBitsBuilder(), so an enum is practical here vs. subclasses.
//
// This enum is essentially the union of all the different kinds of return
// value from GetFilterBitsBuilder, or "underlying implementation", and
// higher-level modes that choose an underlying implementation based on
// context information.
enum Mode {
// Legacy implementation of Bloom filter for full and partitioned filters.
// Set to 0 in case of value confusion with bool use_block_based_builder
// NOTE: TESTING ONLY as this mode does not use best compatible
// implementation
kLegacyBloom = 0,
// Deprecated block-based Bloom filter implementation.
// Set to 1 in case of value confusion with bool use_block_based_builder
// NOTE: DEPRECATED but user exposed
kDeprecatedBlock = 1,
// A fast, cache-local Bloom filter implementation. See description in
// FastLocalBloomImpl.
// NOTE: TESTING ONLY as this mode does not check format_version
kFastLocalBloom = 2,
// A Bloom alternative saving about 30% space for ~3-4x construction
// CPU time. See ribbon_alg.h and ribbon_impl.h.
kStandard128Ribbon = 3,
// Automatically choose between kLegacyBloom and kFastLocalBloom based on
// context at build time, including compatibility with format_version.
kAutoBloom = 100,
};
// All the different underlying implementations that a BloomFilterPolicy
// might use, as a mode that says "always use this implementation."
// Only appropriate for unit tests.
static const std::vector<Mode> kAllFixedImpls;
// All the different modes of BloomFilterPolicy that are exposed from
// user APIs. Only appropriate for higher-level unit tests. Integration
// tests should prefer using NewBloomFilterPolicy (user-exposed).
static const std::vector<Mode> kAllUserModes;
explicit BloomFilterPolicy(double bits_per_key, Mode mode);
~BloomFilterPolicy() override;
// For Deprecated block-based filter (no longer customizable in public API)
static void CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst);
static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter);
// To use this function, call GetBuilderFromContext(). // Does not write filters.
//
// Neither the context nor any objects therein should be saved beyond
// the call to this function, unless it's shared_ptr.
FilterBitsBuilder* GetBuilderWithContext( FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override; const FilterBuildingContext&) const override {
return nullptr;
}
};
// Internal contract: for kDeprecatedBlock, GetBuilderWithContext returns // RocksDB built-in filter policy for Bloom or Bloom-like filters including
// a new fake builder that encodes bits per key into a special value from // Ribbon filters.
// EstimateEntriesAdded(), using kSecretBitsPerKeyStart + bits_per_key // This class is considered internal API and subject to change.
static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; // See NewBloomFilterPolicy and NewRibbonFilterPolicy.
class BloomLikeFilterPolicy : public BuiltinFilterPolicy {
public:
explicit BloomLikeFilterPolicy(double bits_per_key);
// Returns a new FilterBitsBuilder from the filter_policy in ~BloomLikeFilterPolicy() override;
// table_options of a context, or nullptr if not applicable.
// (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
// Essentially for testing only: configured millibits/key // Essentially for testing only: configured millibits/key
int GetMillibitsPerKey() const { return millibits_per_key_; } int GetMillibitsPerKey() const { return millibits_per_key_; }
// Essentially for testing only: legacy whole bits/key // Essentially for testing only: legacy whole bits/key
int GetWholeBitsPerKey() const { return whole_bits_per_key_; } int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
// Testing only
Mode GetMode() const { return mode_; } // All the different underlying implementations that a BloomLikeFilterPolicy
// might use, as a configuration string name for a testing mode for
// "always use this implementation." Only appropriate for unit tests.
static const std::vector<std::string>& GetAllFixedImpls();
// Convenience function for creating by name for fixed impls
static std::shared_ptr<const FilterPolicy> Create(const std::string& name,
double bits_per_key);
protected:
// Some implementations used by aggregating policies
FilterBitsBuilder* GetLegacyBloomBuilderWithContext(
const FilterBuildingContext& context) const;
FilterBitsBuilder* GetFastLocalBloomBuilderWithContext(
const FilterBuildingContext& context) const;
FilterBitsBuilder* GetStandard128RibbonBuilderWithContext(
const FilterBuildingContext& context) const;
std::string GetBitsPerKeySuffix() const;
private: private:
// Bits per key settings are for configuring Bloom filters. // Bits per key settings are for configuring Bloom filters.
@ -177,10 +160,6 @@ class BloomFilterPolicy : public BuiltinFilterPolicy {
// example, 100 -> 1% fp rate. // example, 100 -> 1% fp rate.
double desired_one_in_fp_rate_; double desired_one_in_fp_rate_;
// Selected mode (a specific implementation or way of selecting an
// implementation) for building new SST filters.
Mode mode_;
// Whether relevant warnings have been logged already. (Remember so we // Whether relevant warnings have been logged already. (Remember so we
// only report once per BloomFilterPolicy instance, to keep the noise down.) // only report once per BloomFilterPolicy instance, to keep the noise down.)
mutable std::atomic<bool> warned_; mutable std::atomic<bool> warned_;
@ -196,28 +175,111 @@ class BloomFilterPolicy : public BuiltinFilterPolicy {
mutable std::atomic<int64_t> aggregate_rounding_balance_; mutable std::atomic<int64_t> aggregate_rounding_balance_;
}; };
// Chooses between two filter policies based on LSM level, but // For NewBloomFilterPolicy
// only for Level and Universal compaction styles. Flush is treated //
// as level -1. Policy b is considered fallback / primary policy. // This is a user-facing policy that automatically choose between
class LevelThresholdFilterPolicy : public BuiltinFilterPolicy { // LegacyBloom and FastLocalBloom based on context at build time,
// including compatibility with format_version.
class BloomFilterPolicy : public BloomLikeFilterPolicy {
public: public:
LevelThresholdFilterPolicy(std::unique_ptr<const FilterPolicy>&& a, explicit BloomFilterPolicy(double bits_per_key);
std::unique_ptr<const FilterPolicy>&& b,
int starting_level_for_b);
// To use this function, call BuiltinFilterPolicy::GetBuilderFromContext().
//
// Neither the context nor any objects therein should be saved beyond
// the call to this function, unless it's shared_ptr.
FilterBitsBuilder* GetBuilderWithContext( FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override; const FilterBuildingContext&) const override;
static const char* kName();
std::string GetId() const override;
};
inline int TEST_GetStartingLevelForB() const { return starting_level_for_b_; } // For NewRibbonFilterPolicy
//
// This is a user-facing policy that chooses between Standard128Ribbon
// and FastLocalBloom based on context at build time (LSM level and other
// factors in extreme cases).
class RibbonFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level);
inline const FilterPolicy* TEST_GetPolicyA() const { return policy_a_.get(); } FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
inline const FilterPolicy* TEST_GetPolicyB() const { return policy_b_.get(); } int GetBloomBeforeLevel() const { return bloom_before_level_; }
static const char* kName();
std::string GetId() const override;
private: private:
const std::unique_ptr<const FilterPolicy> policy_a_; const int bloom_before_level_;
const std::unique_ptr<const FilterPolicy> policy_b_;
int starting_level_for_b_;
}; };
// Deprecated block-based filter only. We still support reading old
// block-based filters from any BuiltinFilterPolicy, but there is no public
// option to build them. However, this class is used to build them for testing
// and for a public backdoor to building them by constructing this policy from
// a string.
class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key);
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded(), using
// kSecretBitsPerKeyStart
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
static constexpr size_t kSecretBitsPerKeyStart = 1234567890U;
static const char* kName();
std::string GetId() const override;
static void CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst);
static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter);
};
// For testing only, but always constructable with internal names
namespace test {
class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit LegacyBloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit FastLocalBloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key)
: BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
} // namespace test
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -4,6 +4,8 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#pragma once #pragma once
#include <memory>
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
@ -30,10 +32,15 @@ class MockBlockBasedTableTester {
std::unique_ptr<BlockBasedTable> table_; std::unique_ptr<BlockBasedTable> table_;
explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy) explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy)
: MockBlockBasedTableTester(
std::shared_ptr<const FilterPolicy>(filter_policy)){};
explicit MockBlockBasedTableTester(
std::shared_ptr<const FilterPolicy> filter_policy)
: ioptions_(options_), : ioptions_(options_),
env_options_(options_), env_options_(options_),
icomp_(options_.comparator) { icomp_(options_.comparator) {
table_options_.filter_policy.reset(filter_policy); table_options_.filter_policy = std::move(filter_policy);
constexpr bool skip_filters = false; constexpr bool skip_filters = false;
constexpr bool immortal_table = false; constexpr bool immortal_table = false;

@ -39,6 +39,13 @@ DEFINE_int32(bits_per_key, 10, "");
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace {
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName();
const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName();
const std::string kStandard128Ribbon =
test::Standard128RibbonFilterPolicy::kName();
} // namespace
static const int kVerbose = 1; static const int kVerbose = 1;
static Slice Key(int i, char* buffer) { static Slice Key(int i, char* buffer) {
@ -63,7 +70,7 @@ static int NextLength(int length) {
class BlockBasedBloomTest : public testing::Test { class BlockBasedBloomTest : public testing::Test {
private: private:
int bits_per_key_; std::unique_ptr<const DeprecatedBlockBasedBloomFilterPolicy> policy_;
std::string filter_; std::string filter_;
std::vector<std::string> keys_; std::vector<std::string> keys_;
@ -76,9 +83,7 @@ class BlockBasedBloomTest : public testing::Test {
} }
void ResetPolicy(double bits_per_key) { void ResetPolicy(double bits_per_key) {
bits_per_key_ = policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key));
BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kDeprecatedBlock)
.GetWholeBitsPerKey();
Reset(); Reset();
} }
@ -94,9 +99,9 @@ class BlockBasedBloomTest : public testing::Test {
key_slices.push_back(Slice(keys_[i])); key_slices.push_back(Slice(keys_[i]));
} }
filter_.clear(); filter_.clear();
BloomFilterPolicy::CreateFilter(key_slices.data(), DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
static_cast<int>(key_slices.size()), &key_slices[0], static_cast<int>(key_slices.size()),
bits_per_key_, &filter_); policy_->GetWholeBitsPerKey(), &filter_);
keys_.clear(); keys_.clear();
if (kVerbose >= 2) DumpFilter(); if (kVerbose >= 2) DumpFilter();
} }
@ -122,7 +127,7 @@ class BlockBasedBloomTest : public testing::Test {
if (!keys_.empty()) { if (!keys_.empty()) {
Build(); Build();
} }
return BloomFilterPolicy::KeyMayMatch(s, filter_); return DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(s, filter_);
} }
double FalsePositiveRate() { double FalsePositiveRate() {
@ -264,7 +269,7 @@ TEST_F(BlockBasedBloomTest, Schema) {
// Different bits-per-byte // Different bits-per-byte
class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> { class FullBloomTest : public testing::TestWithParam<std::string> {
protected: protected:
BlockBasedTableOptions table_options_; BlockBasedTableOptions table_options_;
@ -285,9 +290,9 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get()); return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get());
} }
const BloomFilterPolicy* GetBloomFilterPolicy() { const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
// Throws on bad cast // Throws on bad cast
return &dynamic_cast<const BloomFilterPolicy&>(*policy_); return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
} }
void Reset() { void Reset() {
@ -299,7 +304,7 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
} }
void ResetPolicy(double bits_per_key) { void ResetPolicy(double bits_per_key) {
policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam())); policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
Reset(); Reset();
} }
@ -420,7 +425,7 @@ TEST_P(FullBloomTest, FilterSize) {
{INFINITY, 100000}, {INFINITY, 100000},
{NAN, 100000}}) { {NAN, 100000}}) {
ResetPolicy(bpk.first); ResetPolicy(bpk.first);
auto bfp = GetBloomFilterPolicy(); auto bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
@ -433,7 +438,7 @@ TEST_P(FullBloomTest, FilterSize) {
computed -= 0.5; computed -= 0.5;
some_computed_less_than_denoted |= (computed < bpk.first); some_computed_less_than_denoted |= (computed < bpk.first);
ResetPolicy(computed); ResetPolicy(computed);
bfp = GetBloomFilterPolicy(); bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
@ -451,7 +456,7 @@ TEST_P(FullBloomTest, FilterSize) {
size_t n2 = bits_builder->ApproximateNumEntries(space); size_t n2 = bits_builder->ApproximateNumEntries(space);
EXPECT_GE(n2, n); EXPECT_GE(n2, n);
size_t space2 = bits_builder->CalculateSpace(n2); size_t space2 = bits_builder->CalculateSpace(n2);
if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) { if (n > 12000 && GetParam() == kStandard128Ribbon) {
// TODO(peterd): better approximation? // TODO(peterd): better approximation?
EXPECT_GE(space2, space); EXPECT_GE(space2, space);
EXPECT_LE(space2 * 0.998, space * 1.0); EXPECT_LE(space2 * 0.998, space * 1.0);
@ -568,14 +573,14 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
} }
int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8; int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { if (GetParam() == kStandard128Ribbon) {
// ~ 30% savings vs. Bloom filter // ~ 30% savings vs. Bloom filter
ex_min_total_size = 7 * ex_min_total_size / 10; ex_min_total_size = 7 * ex_min_total_size / 10;
} }
EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size); EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5); int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
if (GetParam() == BloomFilterPolicy::kLegacyBloom) { if (GetParam() == kLegacyBloom) {
// this config can add extra cache line to make odd number // this config can add extra cache line to make odd number
blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE; blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
} }
@ -583,7 +588,7 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
EXPECT_GE(total_mem, total_size); EXPECT_GE(total_mem, total_size);
// optimize_filters_for_memory not implemented with legacy Bloom // optimize_filters_for_memory not implemented with legacy Bloom
if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) { if (offm && GetParam() != kLegacyBloom) {
// This value can include a small extra penalty for kExtraPadding // This value can include a small extra penalty for kExtraPadding
fprintf(stderr, "Internal fragmentation (optimized): %g%%\n", fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
(total_mem - total_size) * 100.0 / total_size); (total_mem - total_size) * 100.0 / total_size);
@ -629,8 +634,8 @@ TEST(FullBloomFilterConstructionReserveMemTest,
lo.strict_capacity_limit = true; lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache(NewLRUCache(lo)); std::shared_ptr<Cache> cache(NewLRUCache(lo));
table_options.block_cache = cache; table_options.block_cache = cache;
table_options.filter_policy.reset(new BloomFilterPolicy( table_options.filter_policy =
FLAGS_bits_per_key, BloomFilterPolicy::Mode::kStandard128Ribbon)); BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
FilterBuildingContext ctx(table_options); FilterBuildingContext ctx(table_options);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder( std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
table_options.filter_policy->GetBuilderWithContext(ctx)); table_options.filter_policy->GetBuilderWithContext(ctx));
@ -692,35 +697,35 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
// ability to read filters generated using other cache line sizes. // ability to read filters generated using other cache line sizes.
// See RawSchema. // See RawSchema.
TEST_P(FullBloomTest, Schema) { TEST_P(FullBloomTest, Schema) {
#define EXPECT_EQ_Bloom(a, b) \ #define EXPECT_EQ_Bloom(a, b) \
{ \ { \
if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \ if (GetParam() != kStandard128Ribbon) { \
EXPECT_EQ(a, b); \ EXPECT_EQ(a, b); \
} \ } \
} }
#define EXPECT_EQ_Ribbon(a, b) \ #define EXPECT_EQ_Ribbon(a, b) \
{ \ { \
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \ if (GetParam() == kStandard128Ribbon) { \
EXPECT_EQ(a, b); \ EXPECT_EQ(a, b); \
} \ } \
} }
#define EXPECT_EQ_FastBloom(a, b) \ #define EXPECT_EQ_FastBloom(a, b) \
{ \ { \
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \ if (GetParam() == kFastLocalBloom) { \
EXPECT_EQ(a, b); \ EXPECT_EQ(a, b); \
} \ } \
} }
#define EXPECT_EQ_LegacyBloom(a, b) \ #define EXPECT_EQ_LegacyBloom(a, b) \
{ \ { \
if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \ if (GetParam() == kLegacyBloom) { \
EXPECT_EQ(a, b); \ EXPECT_EQ(a, b); \
} \ } \
} }
#define EXPECT_EQ_NotLegacy(a, b) \ #define EXPECT_EQ_NotLegacy(a, b) \
{ \ { \
if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \ if (GetParam() != kLegacyBloom) { \
EXPECT_EQ(a, b); \ EXPECT_EQ(a, b); \
} \ } \
} }
char buffer[sizeof(int)]; char buffer[sizeof(int)];
@ -1259,9 +1264,8 @@ TEST_P(FullBloomTest, CorruptFilters) {
} }
INSTANTIATE_TEST_CASE_P(Full, FullBloomTest, INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
testing::Values(BloomFilterPolicy::kLegacyBloom, testing::Values(kLegacyBloom, kFastLocalBloom,
BloomFilterPolicy::kFastLocalBloom, kStandard128Ribbon));
BloomFilterPolicy::kStandard128Ribbon));
static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) { static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
union { union {

@ -31,6 +31,7 @@ int main() {
#include "util/random.h" #include "util/random.h"
#include "util/stderr_logger.h" #include "util/stderr_logger.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/string_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::ParseCommandLineFlags;
using GFLAGS_NAMESPACE::RegisterFlagValidator; using GFLAGS_NAMESPACE::RegisterFlagValidator;
@ -140,6 +141,7 @@ using ROCKSDB_NAMESPACE::Arena;
using ROCKSDB_NAMESPACE::BlockContents; using ROCKSDB_NAMESPACE::BlockContents;
using ROCKSDB_NAMESPACE::BloomFilterPolicy; using ROCKSDB_NAMESPACE::BloomFilterPolicy;
using ROCKSDB_NAMESPACE::BloomHash; using ROCKSDB_NAMESPACE::BloomHash;
using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy;
using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder; using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
using ROCKSDB_NAMESPACE::CachableEntry; using ROCKSDB_NAMESPACE::CachableEntry;
using ROCKSDB_NAMESPACE::Cache; using ROCKSDB_NAMESPACE::Cache;
@ -147,6 +149,7 @@ using ROCKSDB_NAMESPACE::EncodeFixed32;
using ROCKSDB_NAMESPACE::FastRange32; using ROCKSDB_NAMESPACE::FastRange32;
using ROCKSDB_NAMESPACE::FilterBitsReader; using ROCKSDB_NAMESPACE::FilterBitsReader;
using ROCKSDB_NAMESPACE::FilterBuildingContext; using ROCKSDB_NAMESPACE::FilterBuildingContext;
using ROCKSDB_NAMESPACE::FilterPolicy;
using ROCKSDB_NAMESPACE::FullFilterBlockReader; using ROCKSDB_NAMESPACE::FullFilterBlockReader;
using ROCKSDB_NAMESPACE::GetSliceHash; using ROCKSDB_NAMESPACE::GetSliceHash;
using ROCKSDB_NAMESPACE::GetSliceHash64; using ROCKSDB_NAMESPACE::GetSliceHash64;
@ -287,6 +290,16 @@ static uint32_t DryRunHash64(Slice &s) {
return Lower32of64(GetSliceHash64(s)); return Lower32of64(GetSliceHash64(s));
} }
const std::shared_ptr<const FilterPolicy> &GetPolicy() {
static std::shared_ptr<const FilterPolicy> policy;
if (!policy) {
policy = BloomLikeFilterPolicy::Create(
BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl),
FLAGS_bits_per_key);
}
return policy;
}
struct FilterBench : public MockBlockBasedTableTester { struct FilterBench : public MockBlockBasedTableTester {
std::vector<KeyMaker> kms_; std::vector<KeyMaker> kms_;
std::vector<FilterInfo> infos_; std::vector<FilterInfo> infos_;
@ -297,9 +310,7 @@ struct FilterBench : public MockBlockBasedTableTester {
StderrLogger stderr_logger_; StderrLogger stderr_logger_;
FilterBench() FilterBench()
: MockBlockBasedTableTester(new BloomFilterPolicy( : MockBlockBasedTableTester(GetPolicy()),
FLAGS_bits_per_key,
static_cast<BloomFilterPolicy::Mode>(FLAGS_impl))),
random_(FLAGS_seed), random_(FLAGS_seed),
m_queries_(0) { m_queries_(0) {
for (uint32_t i = 0; i < FLAGS_batch_size; ++i) { for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {

Loading…
Cancel
Save