From 8c681087c79744f02692fc736ad501d173540527 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 16 Feb 2022 08:27:37 -0800 Subject: [PATCH] Refactor FilterPolicies toward Customizable (#9567) Summary: Some changes to make it easier to make FilterPolicy customizable. Especially, create distinct classes for the different testing-only and user-facing built-in FilterPolicy modes. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9567 Test Plan: tests updated, with no intended difference in functionality tested. No difference in test performance seen as a result of moving to string-based filter type configuration. Reviewed By: mrambacher Differential Revision: D34234694 Pulled By: pdillinger fbshipit-source-id: 8a94931a9e04c3bcca863a4f524cfd064aaf0122 --- db/db_bloom_filter_test.cc | 234 +++++----- microbench/ribbon_bench.cc | 28 +- options/options_test.cc | 64 +-- table/block_based/block_based_filter_block.cc | 10 +- .../block_based/block_based_table_builder.cc | 3 +- table/block_based/filter_policy.cc | 439 +++++++++++------- table/block_based/filter_policy_internal.h | 262 +++++++---- table/block_based/mock_block_based_table.h | 9 +- util/bloom_test.cc | 108 ++--- util/filter_bench.cc | 17 +- 10 files changed, 670 insertions(+), 504 deletions(-) diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 534d2f656..df51163e5 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include "cache/cache_entry_roles.h" #include "cache/cache_reservation_manager.h" @@ -27,7 +28,17 @@ namespace ROCKSDB_NAMESPACE { namespace { -using BFP = BloomFilterPolicy; +std::shared_ptr Create(double bits_per_key, + const std::string& name) { + return BloomLikeFilterPolicy::Create(name, bits_per_key); +} +const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName(); +const std::string kDeprecatedBlock = + DeprecatedBlockBasedBloomFilterPolicy::kName(); +const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName(); +const std::string kStandard128Ribbon = + test::Standard128RibbonFilterPolicy::kName(); +const std::string kAutoBloom = BloomFilterPolicy::kName(); } // namespace // DB tests related to bloom filter. @@ -38,12 +49,13 @@ class DBBloomFilterTest : public DBTestBase { : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {} }; -class DBBloomFilterTestWithParam : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { +class DBBloomFilterTestWithParam + : public DBTestBase, + public testing::WithParamInterface< + std::tuple> { // public testing::WithParamInterface { protected: - BFP::Mode bfp_impl_; + std::string bfp_impl_; bool partition_filters_; uint32_t format_version_; @@ -90,7 +102,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { ReadOptions ropts; std::string value; anon::OptionsOverride options_override; - options_override.filter_policy.reset(new BFP(20, bfp_impl_)); + options_override.filter_policy = Create(20, bfp_impl_); options_override.partition_filters = partition_filters_; options_override.metadata_block_size = 32; Options options = CurrentOptions(options_override); @@ -477,7 +489,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) { // trigger reset of table_factory BlockBasedTableOptions table_options; table_options.no_block_cache = true; - table_options.filter_policy.reset(new BFP(10, bfp_impl_)); + table_options.filter_policy = Create(10, bfp_impl_); table_options.partition_filters = partition_filters_; if (partition_filters_) { table_options.index_type = @@ -573,11 +585,10 @@ class AlwaysTrueBitsBuilder : public FilterBitsBuilder { size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; } }; -class AlwaysTrueFilterPolicy : public BloomFilterPolicy { +class AlwaysTrueFilterPolicy : public BloomLikeFilterPolicy { public: explicit AlwaysTrueFilterPolicy(bool skip) - : BloomFilterPolicy(/* ignored */ 10, /* ignored */ BFP::kAutoBloom), - skip_(skip) {} + : BloomLikeFilterPolicy(/* ignored */ 10), skip_(skip) {} FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const override { @@ -588,6 +599,10 @@ class AlwaysTrueFilterPolicy : public BloomFilterPolicy { } } + std::string GetId() const override { + return "rocksdb.test.AlwaysTrueFilterPolicy"; + } + private: bool skip_; }; @@ -636,7 +651,7 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { // Test 1: bits per key < 0.5 means skip filters -> no filter // constructed or read. - table_options.filter_policy.reset(new BFP(0.4, bfp_impl_)); + table_options.filter_policy = Create(0.4, bfp_impl_); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); PutAndGetFn(); @@ -724,25 +739,23 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, ::testing::Values( - std::make_tuple(BFP::kDeprecatedBlock, false, - test::kDefaultFormatVersion), - std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion))); + std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), + std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), + std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(BFP::kDeprecatedBlock, false, - test::kDefaultFormatVersion), - std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion))); + std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), + std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), + std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion), - std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion), - std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion))); + std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion), + std::make_tuple(kAutoBloom, true, kLatestFormatVersion), + std::make_tuple(kAutoBloom, false, kLatestFormatVersion))); #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) TEST_F(DBBloomFilterTest, BloomFilterRate) { @@ -941,7 +954,7 @@ using FilterConstructionReserveMemoryHash = uint64_t; class DBFilterConstructionReserveMemoryTestWithParam : public DBTestBase, public testing::WithParamInterface< - std::tuple> { + std::tuple> { public: DBFilterConstructionReserveMemoryTestWithParam() : DBTestBase("db_bloom_filter_tests", @@ -951,9 +964,8 @@ class DBFilterConstructionReserveMemoryTestWithParam policy_(std::get<1>(GetParam())), partition_filters_(std::get<2>(GetParam())), detect_filter_construct_corruption_(std::get<3>(GetParam())) { - if (!reserve_table_builder_memory_ || - policy_ == BloomFilterPolicy::Mode::kDeprecatedBlock || - policy_ == BloomFilterPolicy::Mode::kLegacyBloom) { + if (!reserve_table_builder_memory_ || policy_ == kDeprecatedBlock || + policy_ == kLegacyBloom) { // For these cases, we only interested in whether filter construction // cache resevation happens instead of its accuracy. Therefore we don't // need many keys. @@ -966,7 +978,7 @@ class DBFilterConstructionReserveMemoryTestWithParam // two partitions. num_key_ = 18 * CacheReservationManager::GetDummyEntrySize() / sizeof(FilterConstructionReserveMemoryHash); - } else if (policy_ == BloomFilterPolicy::Mode::kFastLocalBloom) { + } else if (policy_ == kFastLocalBloom) { // For Bloom Filter + FullFilter case, since we design the num_key_ to // make hash entry cache reservation be a multiple of dummy entries, the // correct behavior of charging final filter on top of it will trigger at @@ -995,7 +1007,7 @@ class DBFilterConstructionReserveMemoryTestWithParam constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024; table_options.reserve_table_builder_memory = reserve_table_builder_memory_; - table_options.filter_policy.reset(new BloomFilterPolicy(10, policy_)); + table_options.filter_policy = Create(10, policy_); table_options.partition_filters = partition_filters_; if (table_options.partition_filters) { table_options.index_type = @@ -1023,7 +1035,7 @@ class DBFilterConstructionReserveMemoryTestWithParam bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; } - BloomFilterPolicy::Mode GetFilterPolicy() { return policy_; } + std::string GetFilterPolicy() { return policy_; } bool PartitionFilters() { return partition_filters_; } @@ -1035,7 +1047,7 @@ class DBFilterConstructionReserveMemoryTestWithParam private: std::size_t num_key_; bool reserve_table_builder_memory_; - BloomFilterPolicy::Mode policy_; + std::string policy_; bool partition_filters_; std::shared_ptr cache_; bool detect_filter_construct_corruption_; @@ -1043,32 +1055,20 @@ class DBFilterConstructionReserveMemoryTestWithParam INSTANTIATE_TEST_CASE_P( BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam, - ::testing::Values( - std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false, - false), - - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false, - false), - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false, - true), - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true, - false), - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true, - true), - - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, - false, false), - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, - false, true), - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true, - false), - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true, - true), - - std::make_tuple(true, BloomFilterPolicy::Mode::kDeprecatedBlock, false, - false), - std::make_tuple(true, BloomFilterPolicy::Mode::kLegacyBloom, false, - false))); + ::testing::Values(std::make_tuple(false, kFastLocalBloom, false, false), + + std::make_tuple(true, kFastLocalBloom, false, false), + std::make_tuple(true, kFastLocalBloom, false, true), + std::make_tuple(true, kFastLocalBloom, true, false), + std::make_tuple(true, kFastLocalBloom, true, true), + + std::make_tuple(true, kStandard128Ribbon, false, false), + std::make_tuple(true, kStandard128Ribbon, false, true), + std::make_tuple(true, kStandard128Ribbon, true, false), + std::make_tuple(true, kStandard128Ribbon, true, true), + + std::make_tuple(true, kDeprecatedBlock, false, false), + std::make_tuple(true, kLegacyBloom, false, false))); // TODO: Speed up this test. // The current test inserts many keys (on the scale of dummy entry size) @@ -1126,7 +1126,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { ASSERT_OK(Flush()); bool reserve_table_builder_memory = ReserveTableBuilderMemory(); - BloomFilterPolicy::Mode policy = GetFilterPolicy(); + std::string policy = GetFilterPolicy(); bool partition_filters = PartitionFilters(); bool detect_filter_construct_corruption = table_options.detect_filter_construct_corruption; @@ -1141,12 +1141,11 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { return; } - if (policy == BloomFilterPolicy::Mode::kDeprecatedBlock || - policy == BloomFilterPolicy::Mode::kLegacyBloom) { + if (policy == kDeprecatedBlock || policy == kLegacyBloom) { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) << "There shouldn't be filter construction cache reservation as this " - "feature does not support BloomFilterPolicy::Mode::kDeprecatedBlock " - "nor BloomFilterPolicy::Mode::kLegacyBloom"; + "feature does not support kDeprecatedBlock " + "nor kLegacyBloom"; return; } @@ -1162,17 +1161,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { const std::size_t predicted_hash_entries_cache_res_dummy_entry_num = predicted_hash_entries_cache_res / kDummyEntrySize; const std::size_t predicted_final_filter_cache_res = - static_cast(std::ceil( - 1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 * - (policy == BloomFilterPolicy::Mode::kStandard128Ribbon ? 0.7 : 1))) * + static_cast( + std::ceil(1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 * + (policy == kStandard128Ribbon ? 0.7 : 1))) * kDummyEntrySize; const std::size_t predicted_banding_cache_res = static_cast( std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) * kDummyEntrySize; - if (policy == BloomFilterPolicy::Mode::kFastLocalBloom) { - /* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter + if (policy == kFastLocalBloom) { + /* kFastLocalBloom + FullFilter * p0 * / \ * b / \ @@ -1186,13 +1185,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { * multiple of dummy entries so that reservation for (p0 - b) * will trigger at least another dummy entry insertion. * - * BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter + + * kFastLocalBloom + FullFilter + * detect_filter_construct_corruption * The peak p0 stays the same as - * (BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter) but just lasts + * (kFastLocalBloom + FullFilter) but just lasts * longer since we release hash entries reservation later. * - * BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter + * kFastLocalBloom + PartitionedFilter * p1 * / \ * p0 b'/ \ @@ -1209,17 +1208,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { * + parittioned final filter1 + parittioned final filter2 * = hash entries + final filter * - * BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter + + * kFastLocalBloom + PartitionedFilter + * detect_filter_construct_corruption * The peak p0, p1 stay the same as - * (BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter) but just + * (kFastLocalBloom + PartitionedFilter) but just * last longer since we release hash entries reservation later. * */ if (!partition_filters) { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) << "Filter construction cache reservation should have only 1 peak in " - "case: BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter"; + "case: kFastLocalBloom + FullFilter"; std::size_t filter_construction_cache_res_peak = filter_construction_cache_res_peaks[0]; EXPECT_GT(filter_construction_cache_res_peak, @@ -1239,7 +1238,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { } else { EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) << "Filter construction cache reservation should have multiple peaks " - "in case: BloomFilterPolicy::Mode::kFastLocalBloom + " + "in case: kFastLocalBloom + " "PartitionedFilter"; std::size_t predicted_filter_construction_cache_res_increments_sum = predicted_hash_entries_cache_res + predicted_final_filter_cache_res; @@ -1251,8 +1250,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { } } - if (policy == BloomFilterPolicy::Mode::kStandard128Ribbon) { - /* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter + if (policy == kStandard128Ribbon) { + /* kStandard128Ribbon + FullFilter * p0 * / \ p1 * / \/\ @@ -1266,7 +1265,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { * will trigger at least another dummy entry insertion * (or equivelantly to saying, creating another peak). * - * BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter + + * kStandard128Ribbon + FullFilter + * detect_filter_construct_corruption * * new p0 @@ -1287,7 +1286,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { * entries reserveration (like p0 - b' previously) until after final filter * creation and post-verification * - * BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter + * kStandard128Ribbon + PartitionedFilter * p3 * p0 /\ p4 * / \ p1 / \ /\ @@ -1306,7 +1305,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { * + parittioned final filter1 + parittioned final filter2 * = hash entries + banding + final filter * - * BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter + + * kStandard128Ribbon + PartitionedFilter + * detect_filter_construct_corruption * * new p3 @@ -1347,7 +1346,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { if (!detect_filter_construct_corruption) { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2) << "Filter construction cache reservation should have 2 peaks in " - "case: BloomFilterPolicy::Mode::kStandard128Ribbon + " + "case: kStandard128Ribbon + " "FullFilter. " "The second peak is resulted from charging the final filter " "after " @@ -1366,7 +1365,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { } else { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) << "Filter construction cache reservation should have 1 peaks in " - "case: BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter " + "case: kStandard128Ribbon + FullFilter " "+ detect_filter_construct_corruption. " "The previous second peak now disappears since we don't " "decrease the hash entry reservation" @@ -1388,13 +1387,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { EXPECT_GE(filter_construction_cache_res_peaks.size(), 3) << "Filter construction cache reservation should have more than 3 " "peaks " - "in case: BloomFilterPolicy::Mode::kStandard128Ribbon + " + "in case: kStandard128Ribbon + " "PartitionedFilter"; } else { EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) << "Filter construction cache reservation should have more than 2 " "peaks " - "in case: BloomFilterPolicy::Mode::kStandard128Ribbon + " + "in case: kStandard128Ribbon + " "PartitionedFilter + detect_filter_construct_corruption"; } std::size_t predicted_filter_construction_cache_res_increments_sum = @@ -1412,8 +1411,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { class DBFilterConstructionCorruptionTestWithParam : public DBTestBase, public testing::WithParamInterface< - std::tuple> { + std::tuple> { public: DBFilterConstructionCorruptionTestWithParam() : DBTestBase("db_bloom_filter_tests", @@ -1422,8 +1421,7 @@ class DBFilterConstructionCorruptionTestWithParam BlockBasedTableOptions GetBlockBasedTableOptions() { BlockBasedTableOptions table_options; table_options.detect_filter_construct_corruption = std::get<0>(GetParam()); - table_options.filter_policy.reset( - new BloomFilterPolicy(10, std::get<1>(GetParam()))); + table_options.filter_policy = Create(10, std::get<1>(GetParam())); table_options.partition_filters = std::get<2>(GetParam()); if (table_options.partition_filters) { table_options.index_type = @@ -1444,14 +1442,11 @@ class DBFilterConstructionCorruptionTestWithParam INSTANTIATE_TEST_CASE_P( DBFilterConstructionCorruptionTestWithParam, DBFilterConstructionCorruptionTestWithParam, - ::testing::Values( - std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false), - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false), - std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true), - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, - false), - std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, - true))); + ::testing::Values(std::make_tuple(false, kFastLocalBloom, false), + std::make_tuple(true, kFastLocalBloom, false), + std::make_tuple(true, kFastLocalBloom, true), + std::make_tuple(true, kStandard128Ribbon, false), + std::make_tuple(true, kStandard128Ribbon, true))); TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { Options options = CurrentOptions(); @@ -2139,16 +2134,12 @@ INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer, #ifndef ROCKSDB_LITE namespace { -namespace BFP2 { -// Extends BFP::Mode with option to use Plain table -using PseudoMode = int; -static constexpr PseudoMode kPlainTable = -1; -} // namespace BFP2 +static const std::string kPlainTable = "test_PlainTableBloom"; } // namespace class BloomStatsTestWithParam : public DBBloomFilterTest, - public testing::WithParamInterface> { + public testing::WithParamInterface> { public: BloomStatsTestWithParam() { bfp_impl_ = std::get<0>(GetParam()); @@ -2159,7 +2150,7 @@ class BloomStatsTestWithParam ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4)); options_.memtable_prefix_bloom_size_ratio = 8.0 * 1024.0 / static_cast(options_.write_buffer_size); - if (bfp_impl_ == BFP2::kPlainTable) { + if (bfp_impl_ == kPlainTable) { assert(!partition_filters_); // not supported in plain table PlainTableOptions table_options; options_.table_factory.reset(NewPlainTableFactory(table_options)); @@ -2167,13 +2158,12 @@ class BloomStatsTestWithParam BlockBasedTableOptions table_options; table_options.hash_index_allow_collision = false; if (partition_filters_) { - assert(bfp_impl_ != BFP::kDeprecatedBlock); + assert(bfp_impl_ != kDeprecatedBlock); table_options.partition_filters = partition_filters_; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } - table_options.filter_policy.reset( - new BFP(10, static_cast(bfp_impl_))); + table_options.filter_policy = Create(10, bfp_impl_); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); } options_.env = env_; @@ -2191,7 +2181,7 @@ class BloomStatsTestWithParam static void SetUpTestCase() {} static void TearDownTestCase() {} - BFP2::PseudoMode bfp_impl_; + std::string bfp_impl_; bool partition_filters_; Options options_; }; @@ -2295,7 +2285,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { ASSERT_EQ(value3, iter->value().ToString()); // The seek doesn't check block-based bloom filter because last index key // starts with the same prefix we're seeking to. - uint64_t expected_hits = bfp_impl_ == BFP::kDeprecatedBlock ? 1 : 2; + uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2; ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); iter->Seek(key2); @@ -2307,12 +2297,12 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { INSTANTIATE_TEST_CASE_P( BloomStatsTestWithParam, BloomStatsTestWithParam, - ::testing::Values(std::make_tuple(BFP::kDeprecatedBlock, false), - std::make_tuple(BFP::kLegacyBloom, false), - std::make_tuple(BFP::kLegacyBloom, true), - std::make_tuple(BFP::kFastLocalBloom, false), - std::make_tuple(BFP::kFastLocalBloom, true), - std::make_tuple(BFP2::kPlainTable, false))); + ::testing::Values(std::make_tuple(kDeprecatedBlock, false), + std::make_tuple(kLegacyBloom, false), + std::make_tuple(kLegacyBloom, true), + std::make_tuple(kFastLocalBloom, false), + std::make_tuple(kFastLocalBloom, true), + std::make_tuple(kPlainTable, false))); namespace { void PrefixScanInit(DBBloomFilterTest* dbtest) { @@ -2620,8 +2610,8 @@ int CountIter(std::unique_ptr& iter, const Slice& key) { // into the same string, or 2) the transformed seek key is of the same length // as the upper bound and two keys are adjacent according to the comparator. TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { - for (auto bfp_impl : BFP::kAllFixedImpls) { - int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; + for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { + int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.create_if_missing = true; options.env = CurrentOptions().env; @@ -2631,7 +2621,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(new BFP(10, bfp_impl)); + table_options.filter_policy = Create(10, bfp_impl); table_options.index_shortening = BlockBasedTableOptions:: IndexShorteningMode::kShortenSeparatorsAndSuccessor; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -2752,8 +2742,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { // Create multiple SST files each with a different prefix_extractor config, // verify iterators can read all SST files using the latest config. TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { - for (auto bfp_impl : BFP::kAllFixedImpls) { - int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; + for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { + int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.env = CurrentOptions().env; options.create_if_missing = true; @@ -2762,7 +2752,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { options.statistics = CreateDBStatistics(); // Enable prefix bloom for SST files BlockBasedTableOptions table_options; - table_options.filter_policy.reset(new BFP(10, bfp_impl)); + table_options.filter_policy = Create(10, bfp_impl); table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); @@ -2888,7 +2878,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // as expected TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { int iteration = 0; - for (auto bfp_impl : BFP::kAllFixedImpls) { + for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { Options options = CurrentOptions(); options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); @@ -2897,7 +2887,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(new BFP(10, bfp_impl)); + table_options.filter_policy = Create(10, bfp_impl); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options); ReadOptions read_options; @@ -2944,7 +2934,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { // Verify it's possible to change prefix_extractor at runtime and iterators // behaves as expected TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { - for (auto bfp_impl : BFP::kAllFixedImpls) { + for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { Options options; options.env = CurrentOptions().env; options.create_if_missing = true; @@ -2954,7 +2944,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(new BFP(10, bfp_impl)); + table_options.filter_policy = Create(10, bfp_impl); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); diff --git a/microbench/ribbon_bench.cc b/microbench/ribbon_bench.cc index 19e736027..ee981db58 100644 --- a/microbench/ribbon_bench.cc +++ b/microbench/ribbon_bench.cc @@ -47,30 +47,28 @@ struct KeyMaker { }; // benchmark arguments: -// 0. filter mode +// 0. filter impl (like filter_bench -impl) // 1. filter config bits_per_key // 2. average data key length // 3. data entry number static void CustomArguments(benchmark::internal::Benchmark *b) { - for (int filter_mode : - {BloomFilterPolicy::kLegacyBloom, BloomFilterPolicy::kFastLocalBloom, - BloomFilterPolicy::kStandard128Ribbon}) { + for (int filter_impl : {0, 2, 3}) { for (int bits_per_key : {10, 20}) { for (int key_len_avg : {10, 100}) { for (int64_t entry_num : {1 << 10, 1 << 20}) { - b->Args({filter_mode, bits_per_key, key_len_avg, entry_num}); + b->Args({filter_impl, bits_per_key, key_len_avg, entry_num}); } } } } - b->ArgNames({"filter_mode", "bits_per_key", "key_len_avg", "entry_num"}); + b->ArgNames({"filter_impl", "bits_per_key", "key_len_avg", "entry_num"}); } static void FilterBuild(benchmark::State &state) { // setup data - auto filter = new BloomFilterPolicy( - static_cast(state.range(1)), - static_cast(state.range(0))); + auto filter = BloomLikeFilterPolicy::Create( + BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), + static_cast(state.range(1))); auto tester = new mock::MockBlockBasedTableTester(filter); KeyMaker km(state.range(2)); std::unique_ptr owner; @@ -91,9 +89,9 @@ BENCHMARK(FilterBuild)->Apply(CustomArguments); static void FilterQueryPositive(benchmark::State &state) { // setup data - auto filter = new BloomFilterPolicy( - static_cast(state.range(1)), - static_cast(state.range(0))); + auto filter = BloomLikeFilterPolicy::Create( + BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), + static_cast(state.range(1))); auto tester = new mock::MockBlockBasedTableTester(filter); KeyMaker km(state.range(2)); std::unique_ptr owner; @@ -119,9 +117,9 @@ BENCHMARK(FilterQueryPositive)->Apply(CustomArguments); static void FilterQueryNegative(benchmark::State &state) { // setup data - auto filter = new BloomFilterPolicy( - static_cast(state.range(1)), - static_cast(state.range(0))); + auto filter = BloomLikeFilterPolicy::Create( + BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), + static_cast(state.range(1))); auto tester = new mock::MockBlockBasedTableTester(filter); KeyMaker km(state.range(2)); std::unique_ptr owner; diff --git a/options/options_test.cc b/options/options_test.cc index 9869a0ae1..3dd068925 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -882,7 +882,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // Verify that only the lower 32bits are stored in // new_opt.read_amp_bytes_per_bit. EXPECT_EQ(1U, new_opt.read_amp_bytes_per_bit); @@ -936,7 +935,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { bfp = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // use_block_based_builder=true now ignored in public API (same as false) ASSERT_OK(GetBlockBasedTableOptionsFromString( @@ -944,82 +942,67 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { bfp = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // Back door way of enabling deprecated block-based Bloom ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); // Only whole bits used - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kDeprecatedBlock); + auto builtin = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(builtin->GetId(), + "rocksdb.internal.DeprecatedBlockBasedBloomFilter:4"); // Test configuring using other internal names ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 3); // Only whole bits used - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kLegacyBloom); + builtin = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3"); ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.FastLocalBloomFilter:1.234", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom); + builtin = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(builtin->GetId(), "rocksdb.internal.FastLocalBloomFilter:1.234"); ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.Standard128RibbonFilter:1.234", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); + builtin = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(builtin->GetId(), "rocksdb.internal.Standard128RibbonFilter:1.234"); // Ribbon filter policy (no Bloom hybrid) ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;", &new_opt)); ASSERT_TRUE(new_opt.filter_policy != nullptr); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); + auto rfp = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(rfp->GetMillibitsPerKey(), 5678); + EXPECT_EQ(rfp->GetBloomBeforeLevel(), -1); // Ribbon filter policy (default Bloom hybrid) ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=ribbonfilter:6.789;", &new_opt)); ASSERT_TRUE(new_opt.filter_policy != nullptr); - auto ltfp = dynamic_cast( - new_opt.filter_policy.get()); - EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 0); - - bfp = dynamic_cast(ltfp->TEST_GetPolicyA()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom); - - bfp = dynamic_cast(ltfp->TEST_GetPolicyB()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); + rfp = dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789); + EXPECT_EQ(rfp->GetBloomBeforeLevel(), 0); // Ribbon filter policy (custom Bloom hybrid) ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;", &new_opt)); ASSERT_TRUE(new_opt.filter_policy != nullptr); - ltfp = dynamic_cast( - new_opt.filter_policy.get()); - EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 5); - - bfp = dynamic_cast(ltfp->TEST_GetPolicyA()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom); - - bfp = dynamic_cast(ltfp->TEST_GetPolicyB()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); + rfp = dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789); + EXPECT_EQ(rfp->GetBloomBeforeLevel(), 5); // Check block cache options are overwritten when specified // in new format as a struct. @@ -2877,7 +2860,6 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { bfp = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // Check block cache options are overwritten when specified // in new format as a struct. diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index 710a9cb49..c56936474 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -15,6 +15,7 @@ #include "monitoring/perf_context_imp.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" +#include "util/cast_util.h" #include "util/coding.h" #include "util/string_util.h" @@ -157,9 +158,9 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() { // Generate filter for current set of keys and append to result_. filter_offsets_.push_back(static_cast(result_.size())); - BloomFilterPolicy::CreateFilter(tmp_entries_.data(), - static_cast(num_entries), bits_per_key_, - &result_); + DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( + tmp_entries_.data(), static_cast(num_entries), bits_per_key_, + &result_); tmp_entries_.clear(); entries_.clear(); @@ -283,7 +284,8 @@ bool BlockBasedFilterBlockReader::MayMatch( assert(table()); assert(table()->get_rep()); - const bool may_match = BloomFilterPolicy::KeyMayMatch(entry, filter); + const bool may_match = + DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter); if (may_match) { PERF_COUNTER_ADD(bloom_sst_hit_count, 1); return true; diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index f8aec88b1..6c622555d 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -82,7 +82,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder( } else { // Check for backdoor deprecated block-based bloom config size_t starting_est = filter_bits_builder->EstimateEntriesAdded(); - constexpr auto kSecretStart = BloomFilterPolicy::kSecretBitsPerKeyStart; + constexpr auto kSecretStart = + DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart; if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) { int bits_per_key = static_cast(starting_est - kSecretStart); delete filter_bits_builder; diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 2e03779af..7e2cbc6d7 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -10,6 +10,7 @@ #include "rocksdb/filter_policy.h" #include +#include #include #include #include @@ -18,6 +19,8 @@ #include "cache/cache_entry_roles.h" #include "cache/cache_reservation_manager.h" #include "logging/logging.h" +#include "port/lang.h" +#include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" @@ -29,6 +32,7 @@ #include "util/hash.h" #include "util/ribbon_config.h" #include "util/ribbon_impl.h" +#include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -1307,21 +1311,8 @@ Status XXPH3FilterBitsBuilder::MaybePostVerify(const Slice& filter_content) { } } // namespace -const std::vector BloomFilterPolicy::kAllFixedImpls = { - kLegacyBloom, - kDeprecatedBlock, - kFastLocalBloom, - kStandard128Ribbon, -}; - -const std::vector BloomFilterPolicy::kAllUserModes = { - kDeprecatedBlock, - kAutoBloom, - kStandard128Ribbon, -}; - -BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) - : mode_(mode), warned_(false), aggregate_rounding_balance_(0) { +BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key) + : warned_(false), aggregate_rounding_balance_(0) { // Sanitize bits_per_key if (bits_per_key < 0.5) { // Round down to no filter @@ -1353,14 +1344,48 @@ BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000; } -BloomFilterPolicy::~BloomFilterPolicy() {} +BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {} const char* BuiltinFilterPolicy::Name() const { return "rocksdb.BuiltinBloomFilter"; } -void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key, - std::string* dst) { +const char* DeprecatedBlockBasedBloomFilterPolicy::kName() { + return "rocksdb.internal.DeprecatedBlockBasedBloomFilter"; +} + +std::string DeprecatedBlockBasedBloomFilterPolicy::GetId() const { + return kName() + GetBitsPerKeySuffix(); +} + +DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy( + double bits_per_key) + : BloomLikeFilterPolicy(bits_per_key) {} + +FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext&) const { + if (GetWholeBitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + // Internal contract: returns a new fake builder that encodes bits per key + // into a special value from EstimateEntriesAdded() + struct B : public FilterBitsBuilder { + explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {} + size_t est; + size_t EstimateEntriesAdded() override { return est; } + void AddKey(const Slice&) override {} + using FilterBitsBuilder::Finish; // FIXME + Slice Finish(std::unique_ptr*) override { return Slice(); } + size_t ApproximateNumEntries(size_t) override { return 0; } + }; + return new B(GetWholeBitsPerKey()); +} + +void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys, + int n, + int bits_per_key, + std::string* dst) { // Compute bloom filter size (in both bits and bytes) uint32_t bits = static_cast(n * bits_per_key); @@ -1383,8 +1408,8 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key, } } -bool BloomFilterPolicy::KeyMayMatch(const Slice& key, - const Slice& bloom_filter) { +bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch( + const Slice& key, const Slice& bloom_filter) { const size_t len = bloom_filter.size(); if (len < 2 || len > 0xffffffffU) { return false; @@ -1406,13 +1431,31 @@ bool BloomFilterPolicy::KeyMayMatch(const Slice& key, array); } +BloomFilterPolicy::BloomFilterPolicy(double bits_per_key) + : BloomLikeFilterPolicy(bits_per_key) {} + FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { - if (millibits_per_key_ == 0) { + if (GetMillibitsPerKey() == 0) { // "No filter" special case return nullptr; + } else if (context.table_options.format_version < 5) { + return GetLegacyBloomBuilderWithContext(context); + } else { + return GetFastLocalBloomBuilderWithContext(context); } - Mode cur = mode_; +} + +const char* BloomFilterPolicy::kName() { return "bloomfilter"; } + +std::string BloomFilterPolicy::GetId() const { + // Including ":false" for better forward-compatibility with 6.29 and earlier + // which required a boolean `use_block_based_builder` parameter + return kName() + GetBitsPerKeySuffix() + ":false"; +} + +FilterBitsBuilder* BloomLikeFilterPolicy::GetFastLocalBloomBuilderWithContext( + const FilterBuildingContext& context) const { bool offm = context.table_options.optimize_filters_for_memory; bool reserve_filter_construction_mem = (context.table_options.reserve_table_builder_memory && @@ -1422,80 +1465,73 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( cache_res_mgr = std::make_shared( context.table_options.block_cache); } - // Unusual code construction so that we can have just - // one exhaustive switch without (risky) recursion - for (int i = 0; i < 2; ++i) { - switch (cur) { - case kAutoBloom: - if (context.table_options.format_version < 5) { - cur = kLegacyBloom; - } else { - cur = kFastLocalBloom; - } - break; - case kDeprecatedBlock: { - if (context.info_log && !warned_.load(std::memory_order_relaxed)) { - warned_ = true; - ROCKS_LOG_WARN(context.info_log, - "Using deprecated block-based Bloom filter is " - "inefficient (%d bits per key).", - whole_bits_per_key_); - } - // Internal contract: returns a new fake builder that encodes bits per - // key into a special value from EstimateEntriesAdded() - struct B : public FilterBitsBuilder { - explicit B(int bits_per_key) - : est(kSecretBitsPerKeyStart + bits_per_key) {} - size_t est; - size_t EstimateEntriesAdded() override { return est; } - void AddKey(const Slice&) override {} - using FilterBitsBuilder::Finish; // FIXME - Slice Finish(std::unique_ptr*) override { - return Slice(); - } - size_t ApproximateNumEntries(size_t) override { return 0; } - }; - return new B(GetWholeBitsPerKey()); - } - case kFastLocalBloom: return new FastLocalBloomBitsBuilder( millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr, context.table_options.detect_filter_construct_corruption); - case kLegacyBloom: - if (whole_bits_per_key_ >= 14 && context.info_log && - !warned_.load(std::memory_order_relaxed)) { - warned_ = true; - const char* adjective; - if (whole_bits_per_key_ >= 20) { - adjective = "Dramatic"; - } else { - adjective = "Significant"; - } - // For more details, see - // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter - ROCKS_LOG_WARN( - context.info_log, - "Using legacy Bloom filter with high (%d) bits/key. " - "%s filter space and/or accuracy improvement is available " - "with format_version>=5.", - whole_bits_per_key_, adjective); - } - return new LegacyBloomBitsBuilder(whole_bits_per_key_, - context.info_log); - case kStandard128Ribbon: - return new Standard128RibbonBitsBuilder( - desired_one_in_fp_rate_, millibits_per_key_, - offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr, - context.table_options.detect_filter_construct_corruption, - context.info_log); +} + +FilterBitsBuilder* BloomLikeFilterPolicy::GetLegacyBloomBuilderWithContext( + const FilterBuildingContext& context) const { + if (whole_bits_per_key_ >= 14 && context.info_log && + !warned_.load(std::memory_order_relaxed)) { + warned_ = true; + const char* adjective; + if (whole_bits_per_key_ >= 20) { + adjective = "Dramatic"; + } else { + adjective = "Significant"; + } + // For more details, see + // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter + ROCKS_LOG_WARN(context.info_log, + "Using legacy Bloom filter with high (%d) bits/key. " + "%s filter space and/or accuracy improvement is available " + "with format_version>=5.", + whole_bits_per_key_, adjective); + } + return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log); +} + +FilterBitsBuilder* +BloomLikeFilterPolicy::GetStandard128RibbonBuilderWithContext( + const FilterBuildingContext& context) const { + // FIXME: code duplication with GetFastLocalBloomBuilderWithContext + bool offm = context.table_options.optimize_filters_for_memory; + bool reserve_filter_construction_mem = + (context.table_options.reserve_table_builder_memory && + context.table_options.block_cache); + std::shared_ptr cache_res_mgr; + if (reserve_filter_construction_mem) { + cache_res_mgr = std::make_shared( + context.table_options.block_cache); + } + return new Standard128RibbonBitsBuilder( + desired_one_in_fp_rate_, millibits_per_key_, + offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr, + context.table_options.detect_filter_construct_corruption, + context.info_log); +} + +std::string BloomLikeFilterPolicy::GetBitsPerKeySuffix() const { + std::string rv = ":" + ROCKSDB_NAMESPACE::ToString(millibits_per_key_ / 1000); + int frac = millibits_per_key_ % 1000; + if (frac > 0) { + rv.push_back('.'); + rv.push_back(static_cast('0' + (frac / 100))); + frac %= 100; + if (frac > 0) { + rv.push_back(static_cast('0' + (frac / 10))); + frac %= 10; + if (frac > 0) { + rv.push_back(static_cast('0' + frac)); + } } } - assert(false); - return nullptr; // something legal + return rv; } -FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext( +FilterBitsBuilder* BuiltinFilterPolicy::GetBuilderFromContext( const FilterBuildingContext& context) { if (context.table_options.filter_policy) { return context.table_options.filter_policy->GetBuilderWithContext(context); @@ -1504,6 +1540,62 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext( } } +// For testing only, but always constructable with internal names +namespace test { + +const char* LegacyBloomFilterPolicy::kName() { + return "rocksdb.internal.LegacyBloomFilter"; +} + +std::string LegacyBloomFilterPolicy::GetId() const { + return kName() + GetBitsPerKeySuffix(); +} + +FilterBitsBuilder* LegacyBloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetLegacyBloomBuilderWithContext(context); +} + +const char* FastLocalBloomFilterPolicy::kName() { + return "rocksdb.internal.FastLocalBloomFilter"; +} + +std::string FastLocalBloomFilterPolicy::GetId() const { + return kName() + GetBitsPerKeySuffix(); +} + +FilterBitsBuilder* FastLocalBloomFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetFastLocalBloomBuilderWithContext(context); +} + +const char* Standard128RibbonFilterPolicy::kName() { + return "rocksdb.internal.Standard128RibbonFilter"; +} + +std::string Standard128RibbonFilterPolicy::GetId() const { + return kName() + GetBitsPerKeySuffix(); +} + +FilterBitsBuilder* Standard128RibbonFilterPolicy::GetBuilderWithContext( + const FilterBuildingContext& context) const { + if (GetMillibitsPerKey() == 0) { + // "No filter" special case + return nullptr; + } + return GetStandard128RibbonBuilderWithContext(context); +} + +} // namespace test + BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader( const Slice& contents) { uint32_t len_with_meta = static_cast(contents.size()); @@ -1679,70 +1771,58 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key, bool /*use_block_based_builder*/) { // NOTE: use_block_based_builder now ignored so block-based filter is no // longer accessible in public API. - BloomFilterPolicy::Mode m = BloomFilterPolicy::kAutoBloom; - assert(std::find(BloomFilterPolicy::kAllUserModes.begin(), - BloomFilterPolicy::kAllUserModes.end(), - m) != BloomFilterPolicy::kAllUserModes.end()); - return new BloomFilterPolicy(bits_per_key, m); + return new BloomFilterPolicy(bits_per_key); } -// Chooses between two filter policies based on LSM level, but -// only for Level and Universal compaction styles. Flush is treated -// as level -1. Policy b is considered fallback / primary policy. -LevelThresholdFilterPolicy::LevelThresholdFilterPolicy( - std::unique_ptr&& a, - std::unique_ptr&& b, int starting_level_for_b) - : policy_a_(std::move(a)), - policy_b_(std::move(b)), - starting_level_for_b_(starting_level_for_b) { - // Don't use this wrapper class if you were going to set to -1 - assert(starting_level_for_b_ >= 0); -} +RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level) + : BloomLikeFilterPolicy(bloom_equivalent_bits_per_key), + bloom_before_level_(bloom_before_level) {} -FilterBitsBuilder* LevelThresholdFilterPolicy::GetBuilderWithContext( +FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { + // Treat unknown same as bottommost + int levelish = INT_MAX; + switch (context.compaction_style) { case kCompactionStyleLevel: case kCompactionStyleUniversal: { - int levelish; if (context.reason == TableFileCreationReason::kFlush) { // Treat flush as level -1 assert(context.level_at_creation == 0); levelish = -1; } else if (context.level_at_creation == -1) { // Unknown level - // Policy b considered fallback / primary - return policy_b_->GetBuilderWithContext(context); + assert(levelish == INT_MAX); } else { levelish = context.level_at_creation; } - if (levelish >= starting_level_for_b_) { - return policy_b_->GetBuilderWithContext(context); - } else { - return policy_a_->GetBuilderWithContext(context); - } + break; } case kCompactionStyleFIFO: case kCompactionStyleNone: + // Treat as bottommost + assert(levelish == INT_MAX); break; } - // Policy b considered fallback / primary - return policy_b_->GetBuilderWithContext(context); + if (levelish < bloom_before_level_) { + return GetFastLocalBloomBuilderWithContext(context); + } else { + return GetStandard128RibbonBuilderWithContext(context); + } +} + +const char* RibbonFilterPolicy::kName() { return "ribbonfilter"; } + +std::string RibbonFilterPolicy::GetId() const { + return kName() + GetBitsPerKeySuffix() + ":" + + ROCKSDB_NAMESPACE::ToString(bloom_before_level_); } const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, int bloom_before_level) { - std::unique_ptr ribbon_only{new BloomFilterPolicy( - bloom_equivalent_bits_per_key, BloomFilterPolicy::kStandard128Ribbon)}; - if (bloom_before_level > -1) { - // Could also use Bloom policy - std::unique_ptr bloom_only{new BloomFilterPolicy( - bloom_equivalent_bits_per_key, BloomFilterPolicy::kFastLocalBloom)}; - return new LevelThresholdFilterPolicy( - std::move(bloom_only), std::move(ribbon_only), bloom_before_level); - } else { - return ribbon_only.release(); - } + return new RibbonFilterPolicy(bloom_equivalent_bits_per_key, + bloom_before_level); } FilterBuildingContext::FilterBuildingContext( @@ -1751,55 +1831,84 @@ FilterBuildingContext::FilterBuildingContext( FilterPolicy::~FilterPolicy() { } +std::shared_ptr BloomLikeFilterPolicy::Create( + const std::string& name, double bits_per_key) { + if (name == test::LegacyBloomFilterPolicy::kName()) { + return std::make_shared(bits_per_key); + } else if (name == test::FastLocalBloomFilterPolicy::kName()) { + return std::make_shared(bits_per_key); + } else if (name == test::Standard128RibbonFilterPolicy::kName()) { + return std::make_shared(bits_per_key); + } else if (name == DeprecatedBlockBasedBloomFilterPolicy::kName()) { + return std::make_shared( + bits_per_key); + } else if (name == BloomFilterPolicy::kName()) { + // For testing + return std::make_shared(bits_per_key); + } else if (name == RibbonFilterPolicy::kName()) { + // For testing + return std::make_shared(bits_per_key, + /*bloom_before_level*/ 0); + } else { + return nullptr; + } +} + Status FilterPolicy::CreateFromString( const ConfigOptions& /*options*/, const std::string& value, std::shared_ptr* policy) { - const std::string kBloomName = "bloomfilter:"; - const std::string kRibbonName = "ribbonfilter:"; if (value == kNullptrString) { policy->reset(); + return Status::OK(); } else if (value == "rocksdb.BuiltinBloomFilter") { - *policy = std::make_shared(); - } else { + *policy = std::make_shared(); + return Status::OK(); + } #ifndef ROCKSDB_LITE - const std::vector vals = StringSplit(value, ':'); - if (vals.size() < 2) { - return Status::NotFound("Invalid filter policy name ", value); + const std::vector vals = StringSplit(value, ':'); + if (vals.size() < 2) { + return Status::NotFound("Invalid filter policy name ", value); + } + const std::string& name = vals[0]; + double bits_per_key = ParseDouble(trim(vals[1])); + if (name == BloomFilterPolicy::kName()) { + bool use_block_based_builder = false; + if (vals.size() > 2) { + use_block_based_builder = + ParseBoolean("use_block_based_builder", trim(vals[2])); } - const std::string& name = vals[0]; - double bits_per_key = ParseDouble(trim(vals[1])); - if (name == "bloomfilter") { // TODO: constants for names - // NOTE: ignoring obsolete bool for "use_block_based_builder" - policy->reset(NewBloomFilterPolicy(bits_per_key)); - } else if (name == "ribbonfilter") { - int bloom_before_level; - if (vals.size() < 3) { - bloom_before_level = 0; - } else { - bloom_before_level = ParseInt(trim(vals[2])); - } - policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key, - bloom_before_level)); - } else if (name == "rocksdb.internal.DeprecatedBlockBasedBloomFilter") { - *policy = std::make_shared( - bits_per_key, BloomFilterPolicy::kDeprecatedBlock); - } else if (name == "rocksdb.internal.LegacyBloomFilter") { - *policy = std::make_shared( - bits_per_key, BloomFilterPolicy::kLegacyBloom); - } else if (name == "rocksdb.internal.FastLocalBloomFilter") { - *policy = std::make_shared( - bits_per_key, BloomFilterPolicy::kFastLocalBloom); - } else if (name == "rocksdb.internal.Standard128RibbonFilter") { - *policy = std::make_shared( - bits_per_key, BloomFilterPolicy::kStandard128Ribbon); + policy->reset(NewBloomFilterPolicy(bits_per_key, use_block_based_builder)); + } else if (name == RibbonFilterPolicy::kName()) { + int bloom_before_level; + if (vals.size() < 3) { + bloom_before_level = 0; } else { - return Status::NotFound("Invalid filter policy name ", value); + bloom_before_level = ParseInt(trim(vals[2])); } + policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key, + bloom_before_level)); + } else { + *policy = BloomLikeFilterPolicy::Create(name, bits_per_key); + } + if (*policy) { + return Status::OK(); + } else { + return Status::NotFound("Invalid filter policy name ", value); + } #else - return Status::NotSupported("Cannot load filter policy in LITE mode ", - value); + return Status::NotSupported("Cannot load filter policy in LITE mode ", value); #endif // ROCKSDB_LITE - } - return Status::OK(); } + +const std::vector& BloomLikeFilterPolicy::GetAllFixedImpls() { + STATIC_AVOID_DESTRUCTION(std::vector, impls){ + // Match filter_bench -impl=x ordering + test::LegacyBloomFilterPolicy::kName(), + DeprecatedBlockBasedBloomFilterPolicy::kName(), + test::FastLocalBloomFilterPolicy::kName(), + test::Standard128RibbonFilterPolicy::kName(), + }; + return impls; +} + } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index efe46ca64..4db0389f6 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -46,31 +46,41 @@ class BuiltinFilterBitsReader : public FilterBitsReader { virtual bool HashMayMatch(const uint64_t /* h */) { return true; } }; -// Base class for RocksDB built-in filter policies. This can read all -// kinds of built-in filters (for backward compatibility with old -// OPTIONS files) but does not build filters, so new SST files generated -// under the policy will get no filters (like nullptr FilterPolicy). -// This class is considered internal API and subject to change. +// Base class for RocksDB built-in filter policies. This provides the +// ability to read all kinds of built-in filters (so that old filters can +// be used even when you change between built-in policies). class BuiltinFilterPolicy : public FilterPolicy { - public: - static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader( - const Slice& contents); - + public: // overrides // Shared name because any built-in policy can read filters from // any other + // FIXME when making filter policies Configurable. For now, this + // is still rocksdb.BuiltinBloomFilter const char* Name() const override; + // Convert to a string understood by FilterPolicy::CreateFromString + virtual std::string GetId() const = 0; + // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. This must successfully process any filter data // generated by a built-in FilterBitsBuilder, regardless of the impl // chosen for this BloomFilterPolicy. Not compatible with CreateFilter. FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; - // Does not write filters. - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override { - return nullptr; - } + public: // new + // An internal function for the implementation of + // BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance + // or working around potential virtual overrides. + static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader( + const Slice& contents); + + // Returns a new FilterBitsBuilder from the filter_policy in + // table_options of a context, or nullptr if not applicable. + // (An internal convenience function to save boilerplate.) + static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); + + protected: + // Deprecated block-based filter only (no longer in public API) + bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const; private: // For Bloom filter implementation(s) (except deprecated block-based filter) @@ -80,85 +90,58 @@ class BuiltinFilterPolicy : public FilterPolicy { static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents); }; -// RocksDB built-in filter policy for Bloom or Bloom-like filters including -// Ribbon filters. +// A "read only" filter policy used for backward compatibility with old +// OPTIONS files, which did not specifying a Bloom configuration, just +// "rocksdb.BuiltinBloomFilter". Although this can read existing filters, +// this policy does not build new filters, so new SST files generated +// under the policy will get no filters (like nullptr FilterPolicy). // This class is considered internal API and subject to change. -// See NewBloomFilterPolicy and NewRibbonFilterPolicy. -class BloomFilterPolicy : public BuiltinFilterPolicy { +class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy { public: - // An internal marker for operating modes of BloomFilterPolicy, in terms - // of selecting an implementation. This makes it easier for tests to track - // or to walk over the built-in set of Bloom filter implementations. The - // only variance in BloomFilterPolicy by mode/implementation is in - // GetFilterBitsBuilder(), so an enum is practical here vs. subclasses. - // - // This enum is essentially the union of all the different kinds of return - // value from GetFilterBitsBuilder, or "underlying implementation", and - // higher-level modes that choose an underlying implementation based on - // context information. - enum Mode { - // Legacy implementation of Bloom filter for full and partitioned filters. - // Set to 0 in case of value confusion with bool use_block_based_builder - // NOTE: TESTING ONLY as this mode does not use best compatible - // implementation - kLegacyBloom = 0, - // Deprecated block-based Bloom filter implementation. - // Set to 1 in case of value confusion with bool use_block_based_builder - // NOTE: DEPRECATED but user exposed - kDeprecatedBlock = 1, - // A fast, cache-local Bloom filter implementation. See description in - // FastLocalBloomImpl. - // NOTE: TESTING ONLY as this mode does not check format_version - kFastLocalBloom = 2, - // A Bloom alternative saving about 30% space for ~3-4x construction - // CPU time. See ribbon_alg.h and ribbon_impl.h. - kStandard128Ribbon = 3, - // Automatically choose between kLegacyBloom and kFastLocalBloom based on - // context at build time, including compatibility with format_version. - kAutoBloom = 100, - }; - // All the different underlying implementations that a BloomFilterPolicy - // might use, as a mode that says "always use this implementation." - // Only appropriate for unit tests. - static const std::vector kAllFixedImpls; - - // All the different modes of BloomFilterPolicy that are exposed from - // user APIs. Only appropriate for higher-level unit tests. Integration - // tests should prefer using NewBloomFilterPolicy (user-exposed). - static const std::vector kAllUserModes; - - explicit BloomFilterPolicy(double bits_per_key, Mode mode); - - ~BloomFilterPolicy() override; - - // For Deprecated block-based filter (no longer customizable in public API) - static void CreateFilter(const Slice* keys, int n, int bits_per_key, - std::string* dst); - static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter); + // Convert to a string understood by FilterPolicy::CreateFromString + virtual std::string GetId() const override { return Name(); } - // To use this function, call GetBuilderFromContext(). - // - // Neither the context nor any objects therein should be saved beyond - // the call to this function, unless it's shared_ptr. + // Does not write filters. FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override; + const FilterBuildingContext&) const override { + return nullptr; + } +}; - // Internal contract: for kDeprecatedBlock, GetBuilderWithContext returns - // a new fake builder that encodes bits per key into a special value from - // EstimateEntriesAdded(), using kSecretBitsPerKeyStart + bits_per_key - static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; +// RocksDB built-in filter policy for Bloom or Bloom-like filters including +// Ribbon filters. +// This class is considered internal API and subject to change. +// See NewBloomFilterPolicy and NewRibbonFilterPolicy. +class BloomLikeFilterPolicy : public BuiltinFilterPolicy { + public: + explicit BloomLikeFilterPolicy(double bits_per_key); - // Returns a new FilterBitsBuilder from the filter_policy in - // table_options of a context, or nullptr if not applicable. - // (An internal convenience function to save boilerplate.) - static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); + ~BloomLikeFilterPolicy() override; // Essentially for testing only: configured millibits/key int GetMillibitsPerKey() const { return millibits_per_key_; } // Essentially for testing only: legacy whole bits/key int GetWholeBitsPerKey() const { return whole_bits_per_key_; } - // Testing only - Mode GetMode() const { return mode_; } + + // All the different underlying implementations that a BloomLikeFilterPolicy + // might use, as a configuration string name for a testing mode for + // "always use this implementation." Only appropriate for unit tests. + static const std::vector& GetAllFixedImpls(); + + // Convenience function for creating by name for fixed impls + static std::shared_ptr Create(const std::string& name, + double bits_per_key); + + protected: + // Some implementations used by aggregating policies + FilterBitsBuilder* GetLegacyBloomBuilderWithContext( + const FilterBuildingContext& context) const; + FilterBitsBuilder* GetFastLocalBloomBuilderWithContext( + const FilterBuildingContext& context) const; + FilterBitsBuilder* GetStandard128RibbonBuilderWithContext( + const FilterBuildingContext& context) const; + + std::string GetBitsPerKeySuffix() const; private: // Bits per key settings are for configuring Bloom filters. @@ -177,10 +160,6 @@ class BloomFilterPolicy : public BuiltinFilterPolicy { // example, 100 -> 1% fp rate. double desired_one_in_fp_rate_; - // Selected mode (a specific implementation or way of selecting an - // implementation) for building new SST filters. - Mode mode_; - // Whether relevant warnings have been logged already. (Remember so we // only report once per BloomFilterPolicy instance, to keep the noise down.) mutable std::atomic warned_; @@ -196,28 +175,111 @@ class BloomFilterPolicy : public BuiltinFilterPolicy { mutable std::atomic aggregate_rounding_balance_; }; -// Chooses between two filter policies based on LSM level, but -// only for Level and Universal compaction styles. Flush is treated -// as level -1. Policy b is considered fallback / primary policy. -class LevelThresholdFilterPolicy : public BuiltinFilterPolicy { +// For NewBloomFilterPolicy +// +// This is a user-facing policy that automatically choose between +// LegacyBloom and FastLocalBloom based on context at build time, +// including compatibility with format_version. +class BloomFilterPolicy : public BloomLikeFilterPolicy { public: - LevelThresholdFilterPolicy(std::unique_ptr&& a, - std::unique_ptr&& b, - int starting_level_for_b); + explicit BloomFilterPolicy(double bits_per_key); + // To use this function, call BuiltinFilterPolicy::GetBuilderFromContext(). + // + // Neither the context nor any objects therein should be saved beyond + // the call to this function, unless it's shared_ptr. FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext& context) const override; + const FilterBuildingContext&) const override; + + static const char* kName(); + std::string GetId() const override; +}; - inline int TEST_GetStartingLevelForB() const { return starting_level_for_b_; } +// For NewRibbonFilterPolicy +// +// This is a user-facing policy that chooses between Standard128Ribbon +// and FastLocalBloom based on context at build time (LSM level and other +// factors in extreme cases). +class RibbonFilterPolicy : public BloomLikeFilterPolicy { + public: + explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level); - inline const FilterPolicy* TEST_GetPolicyA() const { return policy_a_.get(); } + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext&) const override; - inline const FilterPolicy* TEST_GetPolicyB() const { return policy_b_.get(); } + int GetBloomBeforeLevel() const { return bloom_before_level_; } + + static const char* kName(); + std::string GetId() const override; private: - const std::unique_ptr policy_a_; - const std::unique_ptr policy_b_; - int starting_level_for_b_; + const int bloom_before_level_; }; +// Deprecated block-based filter only. We still support reading old +// block-based filters from any BuiltinFilterPolicy, but there is no public +// option to build them. However, this class is used to build them for testing +// and for a public backdoor to building them by constructing this policy from +// a string. +class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy { + public: + explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key); + + // Internal contract: returns a new fake builder that encodes bits per key + // into a special value from EstimateEntriesAdded(), using + // kSecretBitsPerKeyStart + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext&) const override; + static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; + + static const char* kName(); + std::string GetId() const override; + + static void CreateFilter(const Slice* keys, int n, int bits_per_key, + std::string* dst); + static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter); +}; + +// For testing only, but always constructable with internal names +namespace test { + +class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy { + public: + explicit LegacyBloomFilterPolicy(double bits_per_key) + : BloomLikeFilterPolicy(bits_per_key) {} + + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext& context) const override; + + static const char* kName(); + std::string GetId() const override; +}; + +class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy { + public: + explicit FastLocalBloomFilterPolicy(double bits_per_key) + : BloomLikeFilterPolicy(bits_per_key) {} + + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext& context) const override; + + static const char* kName(); + std::string GetId() const override; +}; + +class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy { + public: + explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key) + : BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {} + + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext& context) const override; + + static const char* kName(); + std::string GetId() const override; +}; + +} // namespace test + } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/mock_block_based_table.h b/table/block_based/mock_block_based_table.h index 1d6ec9fee..4dd6e392b 100644 --- a/table/block_based/mock_block_based_table.h +++ b/table/block_based/mock_block_based_table.h @@ -4,6 +4,8 @@ // (found in the LICENSE.Apache file in the root directory). #pragma once +#include + #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" @@ -30,10 +32,15 @@ class MockBlockBasedTableTester { std::unique_ptr table_; explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy) + : MockBlockBasedTableTester( + std::shared_ptr(filter_policy)){}; + + explicit MockBlockBasedTableTester( + std::shared_ptr filter_policy) : ioptions_(options_), env_options_(options_), icomp_(options_.comparator) { - table_options_.filter_policy.reset(filter_policy); + table_options_.filter_policy = std::move(filter_policy); constexpr bool skip_filters = false; constexpr bool immortal_table = false; diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 0fa1e3b14..791f71ba4 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -39,6 +39,13 @@ DEFINE_int32(bits_per_key, 10, ""); namespace ROCKSDB_NAMESPACE { +namespace { +const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName(); +const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName(); +const std::string kStandard128Ribbon = + test::Standard128RibbonFilterPolicy::kName(); +} // namespace + static const int kVerbose = 1; static Slice Key(int i, char* buffer) { @@ -63,7 +70,7 @@ static int NextLength(int length) { class BlockBasedBloomTest : public testing::Test { private: - int bits_per_key_; + std::unique_ptr policy_; std::string filter_; std::vector keys_; @@ -76,9 +83,7 @@ class BlockBasedBloomTest : public testing::Test { } void ResetPolicy(double bits_per_key) { - bits_per_key_ = - BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kDeprecatedBlock) - .GetWholeBitsPerKey(); + policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key)); Reset(); } @@ -94,9 +99,9 @@ class BlockBasedBloomTest : public testing::Test { key_slices.push_back(Slice(keys_[i])); } filter_.clear(); - BloomFilterPolicy::CreateFilter(key_slices.data(), - static_cast(key_slices.size()), - bits_per_key_, &filter_); + DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( + &key_slices[0], static_cast(key_slices.size()), + policy_->GetWholeBitsPerKey(), &filter_); keys_.clear(); if (kVerbose >= 2) DumpFilter(); } @@ -122,7 +127,7 @@ class BlockBasedBloomTest : public testing::Test { if (!keys_.empty()) { Build(); } - return BloomFilterPolicy::KeyMayMatch(s, filter_); + return DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(s, filter_); } double FalsePositiveRate() { @@ -264,7 +269,7 @@ TEST_F(BlockBasedBloomTest, Schema) { // Different bits-per-byte -class FullBloomTest : public testing::TestWithParam { +class FullBloomTest : public testing::TestWithParam { protected: BlockBasedTableOptions table_options_; @@ -285,9 +290,9 @@ class FullBloomTest : public testing::TestWithParam { return dynamic_cast(bits_builder_.get()); } - const BloomFilterPolicy* GetBloomFilterPolicy() { + const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() { // Throws on bad cast - return &dynamic_cast(*policy_); + return &dynamic_cast(*policy_); } void Reset() { @@ -299,7 +304,7 @@ class FullBloomTest : public testing::TestWithParam { } void ResetPolicy(double bits_per_key) { - policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam())); + policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key); Reset(); } @@ -420,7 +425,7 @@ TEST_P(FullBloomTest, FilterSize) { {INFINITY, 100000}, {NAN, 100000}}) { ResetPolicy(bpk.first); - auto bfp = GetBloomFilterPolicy(); + auto bfp = GetBloomLikeFilterPolicy(); EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); @@ -433,7 +438,7 @@ TEST_P(FullBloomTest, FilterSize) { computed -= 0.5; some_computed_less_than_denoted |= (computed < bpk.first); ResetPolicy(computed); - bfp = GetBloomFilterPolicy(); + bfp = GetBloomLikeFilterPolicy(); EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); @@ -451,7 +456,7 @@ TEST_P(FullBloomTest, FilterSize) { size_t n2 = bits_builder->ApproximateNumEntries(space); EXPECT_GE(n2, n); size_t space2 = bits_builder->CalculateSpace(n2); - if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) { + if (n > 12000 && GetParam() == kStandard128Ribbon) { // TODO(peterd): better approximation? EXPECT_GE(space2, space); EXPECT_LE(space2 * 0.998, space * 1.0); @@ -568,14 +573,14 @@ TEST_P(FullBloomTest, OptimizeForMemory) { } int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8; - if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { + if (GetParam() == kStandard128Ribbon) { // ~ 30% savings vs. Bloom filter ex_min_total_size = 7 * ex_min_total_size / 10; } EXPECT_GE(static_cast(total_size), ex_min_total_size); int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5); - if (GetParam() == BloomFilterPolicy::kLegacyBloom) { + if (GetParam() == kLegacyBloom) { // this config can add extra cache line to make odd number blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE; } @@ -583,7 +588,7 @@ TEST_P(FullBloomTest, OptimizeForMemory) { EXPECT_GE(total_mem, total_size); // optimize_filters_for_memory not implemented with legacy Bloom - if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) { + if (offm && GetParam() != kLegacyBloom) { // This value can include a small extra penalty for kExtraPadding fprintf(stderr, "Internal fragmentation (optimized): %g%%\n", (total_mem - total_size) * 100.0 / total_size); @@ -629,8 +634,8 @@ TEST(FullBloomFilterConstructionReserveMemTest, lo.strict_capacity_limit = true; std::shared_ptr cache(NewLRUCache(lo)); table_options.block_cache = cache; - table_options.filter_policy.reset(new BloomFilterPolicy( - FLAGS_bits_per_key, BloomFilterPolicy::Mode::kStandard128Ribbon)); + table_options.filter_policy = + BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key); FilterBuildingContext ctx(table_options); std::unique_ptr filter_bits_builder( table_options.filter_policy->GetBuilderWithContext(ctx)); @@ -692,35 +697,35 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128, // ability to read filters generated using other cache line sizes. // See RawSchema. TEST_P(FullBloomTest, Schema) { -#define EXPECT_EQ_Bloom(a, b) \ - { \ - if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_Ribbon(a, b) \ - { \ - if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_FastBloom(a, b) \ - { \ - if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_LegacyBloom(a, b) \ - { \ - if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_NotLegacy(a, b) \ - { \ - if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \ - EXPECT_EQ(a, b); \ - } \ +#define EXPECT_EQ_Bloom(a, b) \ + { \ + if (GetParam() != kStandard128Ribbon) { \ + EXPECT_EQ(a, b); \ + } \ + } +#define EXPECT_EQ_Ribbon(a, b) \ + { \ + if (GetParam() == kStandard128Ribbon) { \ + EXPECT_EQ(a, b); \ + } \ + } +#define EXPECT_EQ_FastBloom(a, b) \ + { \ + if (GetParam() == kFastLocalBloom) { \ + EXPECT_EQ(a, b); \ + } \ + } +#define EXPECT_EQ_LegacyBloom(a, b) \ + { \ + if (GetParam() == kLegacyBloom) { \ + EXPECT_EQ(a, b); \ + } \ + } +#define EXPECT_EQ_NotLegacy(a, b) \ + { \ + if (GetParam() != kLegacyBloom) { \ + EXPECT_EQ(a, b); \ + } \ } char buffer[sizeof(int)]; @@ -1259,9 +1264,8 @@ TEST_P(FullBloomTest, CorruptFilters) { } INSTANTIATE_TEST_CASE_P(Full, FullBloomTest, - testing::Values(BloomFilterPolicy::kLegacyBloom, - BloomFilterPolicy::kFastLocalBloom, - BloomFilterPolicy::kStandard128Ribbon)); + testing::Values(kLegacyBloom, kFastLocalBloom, + kStandard128Ribbon)); static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) { union { diff --git a/util/filter_bench.cc b/util/filter_bench.cc index 5cbad3970..6160cac8c 100644 --- a/util/filter_bench.cc +++ b/util/filter_bench.cc @@ -31,6 +31,7 @@ int main() { #include "util/random.h" #include "util/stderr_logger.h" #include "util/stop_watch.h" +#include "util/string_util.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::RegisterFlagValidator; @@ -140,6 +141,7 @@ using ROCKSDB_NAMESPACE::Arena; using ROCKSDB_NAMESPACE::BlockContents; using ROCKSDB_NAMESPACE::BloomFilterPolicy; using ROCKSDB_NAMESPACE::BloomHash; +using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy; using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder; using ROCKSDB_NAMESPACE::CachableEntry; using ROCKSDB_NAMESPACE::Cache; @@ -147,6 +149,7 @@ using ROCKSDB_NAMESPACE::EncodeFixed32; using ROCKSDB_NAMESPACE::FastRange32; using ROCKSDB_NAMESPACE::FilterBitsReader; using ROCKSDB_NAMESPACE::FilterBuildingContext; +using ROCKSDB_NAMESPACE::FilterPolicy; using ROCKSDB_NAMESPACE::FullFilterBlockReader; using ROCKSDB_NAMESPACE::GetSliceHash; using ROCKSDB_NAMESPACE::GetSliceHash64; @@ -287,6 +290,16 @@ static uint32_t DryRunHash64(Slice &s) { return Lower32of64(GetSliceHash64(s)); } +const std::shared_ptr &GetPolicy() { + static std::shared_ptr policy; + if (!policy) { + policy = BloomLikeFilterPolicy::Create( + BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl), + FLAGS_bits_per_key); + } + return policy; +} + struct FilterBench : public MockBlockBasedTableTester { std::vector kms_; std::vector infos_; @@ -297,9 +310,7 @@ struct FilterBench : public MockBlockBasedTableTester { StderrLogger stderr_logger_; FilterBench() - : MockBlockBasedTableTester(new BloomFilterPolicy( - FLAGS_bits_per_key, - static_cast(FLAGS_impl))), + : MockBlockBasedTableTester(GetPolicy()), random_(FLAGS_seed), m_queries_(0) { for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {