From 126c22371495c79df2306becf79da4d16e444faa Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 16 Jun 2022 15:51:33 -0700 Subject: [PATCH] Remove deprecated block-based filter (#10184) Summary: In https://github.com/facebook/rocksdb/issues/9535, release 7.0, we hid the old block-based filter from being created using the public API, because of its inefficiency. Although we normally maintain read compatibility on old DBs forever, filters are not required for reading a DB, only for optimizing read performance. Thus, it should be acceptable to remove this code and the substantial maintenance burden it carries as useful features are developed and validated (such as user timestamp). This change completely removes the code for reading and writing the old block-based filters, net removing about 1370 lines of code no longer needed. Options removed from testing / benchmarking tools. The prior existence is only evident in a couple of places: * `CacheEntryRole::kDeprecatedFilterBlock` - We can update this public API enum in a major release to minimize source code incompatibilities. * A warning is logged when an old table file is opened that used the old block-based filter. This is provided as a courtesy, and would be a pain to unit test, so manual testing should suffice. Unfortunately, sst_dump does not tell you whether a file uses block-based filter, and the structure of the code makes it very difficult to fix. * To detect that case, `kObsoleteFilterBlockPrefix` (renamed from `kFilterBlockPrefix`) for metaindex is maintained (for now). Other notes: * In some cases where numbers are associated with filter configurations, we have had to update the assigned numbers so that they all correspond to something that exists. * Fixed potential stat counting bug by assuming `filter_checked = false` for cases like `filter == nullptr` rather than assuming `filter_checked = true` * Removed obsolete `block_offset` and `prefix_extractor` parameters from several functions. * Removed some unnecessary checks `if (!table_prefix_extractor() && !prefix_extractor)` because the caller guarantees the prefix extractor exists and is compatible Pull Request resolved: https://github.com/facebook/rocksdb/pull/10184 Test Plan: tests updated, manually test new warning in LOG using base version to generate a DB Reviewed By: riversand963 Differential Revision: D37212647 Pulled By: pdillinger fbshipit-source-id: 06ee020d8de3b81260ffc36ad0c1202cbf463a80 --- CMakeLists.txt | 2 - HISTORY.md | 1 + Makefile | 3 - TARGETS | 8 - db/db_block_cache_test.cc | 11 +- db/db_bloom_filter_test.cc | 101 ++--- db_stress_tool/db_stress_common.h | 1 - db_stress_tool/db_stress_gflags.cc | 4 - db_stress_tool/db_stress_test_base.cc | 11 +- include/rocksdb/cache.h | 2 +- options/options_test.cc | 12 +- src.mk | 2 - table/block_based/block_based_filter_block.cc | 358 ------------------ table/block_based/block_based_filter_block.h | 127 ------- .../block_based_filter_block_test.cc | 219 ----------- .../block_based/block_based_table_builder.cc | 37 +- table/block_based/block_based_table_reader.cc | 148 ++------ table/block_based/block_based_table_reader.h | 4 +- .../block_based_table_reader_sync_and_async.h | 2 +- table/block_based/block_like_traits.h | 13 +- table/block_based/block_type.h | 1 - table/block_based/filter_block.h | 38 +- .../block_based/filter_block_reader_common.cc | 5 +- table/block_based/filter_policy.cc | 91 ----- table/block_based/filter_policy_internal.h | 32 +- table/block_based/full_filter_block.cc | 33 +- table/block_based/full_filter_block.h | 24 +- table/block_based/full_filter_block_test.cc | 68 ++-- table/block_based/mock_block_based_table.h | 1 - table/block_based/partitioned_filter_block.cc | 65 ++-- table/block_based/partitioned_filter_block.h | 33 +- .../partitioned_filter_block_test.cc | 49 ++- table/block_fetcher.cc | 1 - table/table_test.cc | 21 +- tools/db_bench_tool.cc | 16 - tools/db_crashtest.py | 6 - util/bloom_test.cc | 201 ---------- util/filter_bench.cc | 16 +- utilities/cache_dump_load_impl.cc | 28 +- utilities/cache_dump_load_impl.h | 2 +- 40 files changed, 219 insertions(+), 1578 deletions(-) delete mode 100644 table/block_based/block_based_filter_block.cc delete mode 100644 table/block_based/block_based_filter_block.h delete mode 100644 table/block_based/block_based_filter_block_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 07f89a9cb..91fbade1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -738,7 +738,6 @@ set(SOURCES table/adaptive/adaptive_table_factory.cc table/block_based/binary_search_index_reader.cc table/block_based/block.cc - table/block_based/block_based_filter_block.cc table/block_based/block_based_table_builder.cc table/block_based/block_based_table_factory.cc table/block_based/block_based_table_iterator.cc @@ -1321,7 +1320,6 @@ if(WITH_TESTS) options/customizable_test.cc options/options_settable_test.cc options/options_test.cc - table/block_based/block_based_filter_block_test.cc table/block_based/block_based_table_reader_test.cc table/block_based/block_test.cc table/block_based/data_block_hash_index_test.cc diff --git a/HISTORY.md b/HISTORY.md index 4b80bc66b..fadf57eb9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -37,6 +37,7 @@ ### Behavior changes * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) +* Removed support for reading Bloom filters using obsolete block-based filter format. (Support for writing such filters was dropped in 7.0.) For good read performance on old DBs using these filters, a full compaction is required. ## 7.3.0 (05/20/2022) ### Bug Fixes diff --git a/Makefile b/Makefile index 7494987c5..187abe6a1 100644 --- a/Makefile +++ b/Makefile @@ -1594,9 +1594,6 @@ random_access_file_reader_test: $(OBJ_DIR)/file/random_access_file_reader_test.o file_reader_writer_test: $(OBJ_DIR)/util/file_reader_writer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_based_filter_block_test: $(OBJ_DIR)/table/block_based/block_based_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY) - $(AM_LINK) - block_based_table_reader_test: table/block_based/block_based_table_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) diff --git a/TARGETS b/TARGETS index 6f311616c..d19f5eb6d 100644 --- a/TARGETS +++ b/TARGETS @@ -165,7 +165,6 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "table/adaptive/adaptive_table_factory.cc", "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", - "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", "table/block_based/block_based_table_iterator.cc", @@ -494,7 +493,6 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "table/adaptive/adaptive_table_factory.cc", "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", - "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", "table/block_based/block_based_table_iterator.cc", @@ -4800,12 +4798,6 @@ cpp_unittest_wrapper(name="blob_garbage_meter_test", extra_compiler_flags=[]) -cpp_unittest_wrapper(name="block_based_filter_block_test", - srcs=["table/block_based/block_based_filter_block_test.cc"], - deps=[":rocksdb_test_lib"], - extra_compiler_flags=[]) - - cpp_unittest_wrapper(name="block_based_table_reader_test", srcs=["table/block_based/block_based_table_reader_test.cc"], deps=[":rocksdb_test_lib"], diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 9ab2d3db6..353db60dd 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -670,7 +670,7 @@ class DBBlockCacheTest1 : public DBTestBase, }; INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, - ::testing::Values(1, 2, 3)); + ::testing::Values(1, 2)); TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { Options options = CurrentOptions(); @@ -686,13 +686,10 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); break; - case 2: // block-based filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - break; - case 3: // full filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + case 2: // full filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); break; default: assert(false); diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index db93596a4..a2a2a7a21 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -37,8 +37,6 @@ std::shared_ptr Create(double bits_per_key, return BloomLikeFilterPolicy::Create(name, bits_per_key); } const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); -const std::string kDeprecatedBlock = - DeprecatedBlockBasedBloomFilterPolicy::kClassName(); const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kClassName(); const std::string kStandard128Ribbon = @@ -189,7 +187,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -267,7 +265,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -331,7 +329,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); bbto.whole_key_filtering = false; if (partition_filters) { bbto.partition_filters = true; @@ -747,7 +745,6 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); @@ -755,7 +752,6 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); @@ -763,7 +759,6 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion), std::make_tuple(kAutoBloom, true, kLatestFormatVersion), std::make_tuple(kAutoBloom, false, kLatestFormatVersion), std::make_tuple(kAutoRibbon, false, kLatestFormatVersion))); @@ -829,8 +824,6 @@ std::shared_ptr kCompatibilityRibbonPolicy{ NewRibbonFilterPolicy(20, -1)}; std::vector kCompatibilityConfigs = { - {Create(20, kDeprecatedBlock), false, - BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U}, @@ -878,9 +871,8 @@ TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive // Filter negative, with high probability ASSERT_EQ("NOT_FOUND", Get(prefix + "Q")); - // FULL_POSITIVE does not include block-based filter case (j == 0) EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - j == 0 ? 0 : 2); + 2); EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1); } } @@ -904,7 +896,7 @@ class ChargeFilterConstructionTestWithParam detect_filter_construct_corruption_(std::get<3>(GetParam())) { if (charge_filter_construction_ == CacheEntryRoleOptions::Decision::kDisabled || - policy_ == kDeprecatedBlock || policy_ == kLegacyBloom) { + policy_ == kLegacyBloom) { // For these cases, we only interested in whether filter construction // cache charging happens instead of its accuracy. Therefore we don't // need many keys. @@ -1031,8 +1023,6 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kStandard128Ribbon, true, true), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kDeprecatedBlock, false, false), std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom, false, false))); @@ -1109,11 +1099,10 @@ TEST_P(ChargeFilterConstructionTestWithParam, Basic) { return; } - if (policy == kDeprecatedBlock || policy == kLegacyBloom) { + if (policy == kLegacyBloom) { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) << "There shouldn't be filter construction cache charging as this " - "feature does not support kDeprecatedBlock " - "nor kLegacyBloom"; + "feature does not support kLegacyBloom"; return; } @@ -1798,10 +1787,9 @@ class SliceTransformLimitedDomain : public SliceTransform { } }; -TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { +TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) { BlockBasedTableOptions bbto; - // Full Filter Block - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); bbto.whole_key_filtering = false; Options options = CurrentOptions(); @@ -1827,10 +1815,9 @@ TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); } -TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) { +TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) { BlockBasedTableOptions bbto; - // Block Filter Block - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true)); + bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); @@ -2221,7 +2208,6 @@ class BloomStatsTestWithParam } else { BlockBasedTableOptions table_options; if (partition_filters_) { - assert(bfp_impl_ != kDeprecatedBlock); table_options.partition_filters = partition_filters_; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -2346,9 +2332,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value3, iter->value().ToString()); - // The seek doesn't check block-based bloom filter because last index key - // starts with the same prefix we're seeking to. - uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2; + uint64_t expected_hits = 2; ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); iter->Seek(key2); @@ -2360,8 +2344,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { INSTANTIATE_TEST_CASE_P( BloomStatsTestWithParam, BloomStatsTestWithParam, - ::testing::Values(std::make_tuple(kDeprecatedBlock, false), - std::make_tuple(kLegacyBloom, false), + ::testing::Values(std::make_tuple(kLegacyBloom, false), std::make_tuple(kLegacyBloom, true), std::make_tuple(kFastLocalBloom, false), std::make_tuple(kFastLocalBloom, true), @@ -2486,7 +2469,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { options.level_compaction_dynamic_level_bytes = true; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.optimize_filters_for_hits = true; @@ -2674,7 +2657,6 @@ int CountIter(std::unique_ptr& iter, const Slice& key) { // as the upper bound and two keys are adjacent according to the comparator. TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.create_if_missing = true; options.env = CurrentOptions().env; @@ -2726,8 +2708,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); // should check bloom filter since upper bound meets requirement - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2740,8 +2721,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); // should skip bloom filter since upper bound is too long - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2754,8 +2734,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); // should check bloom filter since upper bound matches transformed seek // key - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2768,8 +2747,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); // should skip bloom filter since mismatch is found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); @@ -2782,8 +2760,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}})); @@ -2795,8 +2772,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } } @@ -2806,7 +2782,6 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { // verify iterators can read all SST files using the latest config. TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.env = CurrentOptions().env; options.create_if_missing = true; @@ -2840,11 +2815,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); // second SST with capped:3 BF @@ -2857,14 +2830,12 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // both counters are incremented because BF is "not changed" for 1 of the // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } @@ -2882,25 +2853,21 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); // the first and last BF are checked - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 4 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // only last BF is checked and not found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 5 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); } // iter_old can only see the first SST, so checked plus 1 ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); // iter was created after the first setoptions call so only full filter // will check the filter ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 4); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10); { // keys in all three SSTs are visible to iterator @@ -2908,12 +2875,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // so +2 for checked counter std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 7 + using_full_builder * 5); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 8 + using_full_builder * 5); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); @@ -2924,12 +2889,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { ASSERT_EQ(CountIter(iter_all, "foo"), 6); // all three SST are checked because the current options has the same as // the remaining SST (capped:3) - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 9 + using_full_builder * 7); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 10 + using_full_builder * 7); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); } // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 64b39b741..1dd99884f 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -152,7 +152,6 @@ DECLARE_double(experimental_mempurge_threshold); DECLARE_bool(enable_write_thread_adaptive_yield); DECLARE_int32(reopen); DECLARE_double(bloom_bits); -DECLARE_bool(use_block_based_filter); DECLARE_int32(ribbon_starting_level); DECLARE_bool(partition_filters); DECLARE_bool(optimize_filters_for_memory); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index c2594beb6..b68902812 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -469,10 +469,6 @@ DEFINE_double(bloom_bits, 10, "Bloom filter bits per key. " "Negative means use default settings."); -DEFINE_bool(use_block_based_filter, false, - "use block based filter" - "instead of full filter for block based table"); - DEFINE_int32( ribbon_starting_level, 999, "Use Bloom filter on levels below specified and Ribbon beginning on level " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b9ac4734b..466cef209 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -36,16 +36,7 @@ std::shared_ptr CreateFilterPolicy() { return BlockBasedTableOptions().filter_policy; } const FilterPolicy* new_policy; - if (FLAGS_use_block_based_filter) { - if (FLAGS_ribbon_starting_level < 999) { - fprintf( - stderr, - "Cannot combine use_block_based_filter and ribbon_starting_level\n"); - exit(1); - } else { - new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true); - } - } else if (FLAGS_ribbon_starting_level >= 999) { + if (FLAGS_ribbon_starting_level >= 999) { // Use Bloom API new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false); } else { diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 593a53eb4..6ec2e3332 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -553,7 +553,7 @@ enum class CacheEntryRole { kFilterBlock, // Block-based table metadata block for partitioned filter kFilterMetaBlock, - // Block-based table deprecated filter block (old "block-based" filter) + // OBSOLETE / DEPRECATED: old/removed block-based filter kDeprecatedFilterBlock, // Block-based table index block kIndexBlock, diff --git a/options/options_test.cc b/options/options_test.cc index 7c688f290..264500758 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -999,21 +999,11 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - // Back door way of enabling deprecated block-based Bloom - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4", - &new_opt)); - auto builtin = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(builtin->GetId(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:4"); - // Test configuring using other internal names ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt)); - builtin = + auto builtin = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3"); diff --git a/src.mk b/src.mk index dbd9fa15a..882d6fa48 100644 --- a/src.mk +++ b/src.mk @@ -156,7 +156,6 @@ LIB_SOURCES = \ table/adaptive/adaptive_table_factory.cc \ table/block_based/binary_search_index_reader.cc \ table/block_based/block.cc \ - table/block_based/block_based_filter_block.cc \ table/block_based/block_based_table_builder.cc \ table/block_based/block_based_table_factory.cc \ table/block_based/block_based_table_iterator.cc \ @@ -525,7 +524,6 @@ TEST_MAIN_SOURCES = \ options/customizable_test.cc \ options/options_settable_test.cc \ options/options_test.cc \ - table/block_based/block_based_filter_block_test.cc \ table/block_based/block_based_table_reader_test.cc \ table/block_based/block_test.cc \ table/block_based/data_block_hash_index_test.cc \ diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc deleted file mode 100644 index 3650621b0..000000000 --- a/table/block_based/block_based_filter_block.cc +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/block_based/block_based_filter_block.h" - -#include - -#include "db/dbformat.h" -#include "monitoring/perf_context_imp.h" -#include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_table_reader.h" -#include "util/cast_util.h" -#include "util/coding.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { - -void AppendItem(std::string* props, const std::string& key, - const std::string& value) { - char cspace = ' '; - std::string value_str(""); - size_t i = 0; - const size_t dataLength = 64; - const size_t tabLength = 2; - const size_t offLength = 16; - - value_str.append(&value[i], std::min(size_t(dataLength), value.size())); - i += dataLength; - while (i < value.size()) { - value_str.append("\n"); - value_str.append(offLength, cspace); - value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); - i += dataLength; - } - - std::string result(""); - if (key.size() < (offLength - tabLength)) - result.append(size_t((offLength - tabLength)) - key.size(), cspace); - result.append(key); - - props->append(result + ": " + value_str + "\n"); -} - -template -void AppendItem(std::string* props, const TKey& key, const std::string& value) { - std::string key_str = std::to_string(key); - AppendItem(props, key_str, value); -} -} // namespace - -// See doc/table_format.txt for an explanation of the filter block format. - -// Generate new filter every 2KB of data -static const size_t kFilterBaseLg = 11; -static const size_t kFilterBase = 1 << kFilterBaseLg; - -BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( - const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt, int bits_per_key) - : prefix_extractor_(prefix_extractor), - whole_key_filtering_(table_opt.whole_key_filtering), - bits_per_key_(bits_per_key), - prev_prefix_start_(0), - prev_prefix_size_(0), - total_added_in_built_(0) {} - -void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { - uint64_t filter_index = (block_offset / kFilterBase); - assert(filter_index >= filter_offsets_.size()); - while (filter_index > filter_offsets_.size()) { - GenerateFilter(); - } -} - -size_t BlockBasedFilterBlockBuilder::EstimateEntriesAdded() { - return total_added_in_built_ + start_.size(); -} - -void BlockBasedFilterBlockBuilder::Add(const Slice& key_without_ts) { - if (prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts)) { - AddPrefix(key_without_ts); - } - - if (whole_key_filtering_) { - AddKey(key_without_ts); - } -} - -// Add key to filter if needed -inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { - start_.push_back(entries_.size()); - entries_.append(key.data(), key.size()); -} - -// Add prefix to filter if needed -inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { - // get slice for most recently added entry - Slice prev; - if (prev_prefix_size_ > 0) { - prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); - } - - Slice prefix = prefix_extractor_->Transform(key); - // insert prefix only when it's different from the previous prefix. - if (prev.size() == 0 || prefix != prev) { - prev_prefix_start_ = entries_.size(); - prev_prefix_size_ = prefix.size(); - AddKey(prefix); - } -} - -Slice BlockBasedFilterBlockBuilder::Finish( - const BlockHandle& /*tmp*/, Status* status, - std::unique_ptr* /* filter_data */) { - // In this impl we ignore BlockHandle and filter_data - *status = Status::OK(); - - if (!start_.empty()) { - GenerateFilter(); - } - - // Append array of per-filter offsets - const uint32_t array_offset = static_cast(result_.size()); - for (size_t i = 0; i < filter_offsets_.size(); i++) { - PutFixed32(&result_, filter_offsets_[i]); - } - - PutFixed32(&result_, array_offset); - result_.push_back(kFilterBaseLg); // Save encoding parameter in result - return Slice(result_); -} - -void BlockBasedFilterBlockBuilder::GenerateFilter() { - const size_t num_entries = start_.size(); - if (num_entries == 0) { - // Fast path if there are no keys for this filter - filter_offsets_.push_back(static_cast(result_.size())); - return; - } - total_added_in_built_ += num_entries; - - // Make list of keys from flattened key structure - start_.push_back(entries_.size()); // Simplify length computation - tmp_entries_.resize(num_entries); - for (size_t i = 0; i < num_entries; i++) { - const char* base = entries_.data() + start_[i]; - size_t length = start_[i + 1] - start_[i]; - tmp_entries_[i] = Slice(base, length); - } - - // Generate filter for current set of keys and append to result_. - filter_offsets_.push_back(static_cast(result_.size())); - DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( - tmp_entries_.data(), static_cast(num_entries), bits_per_key_, - &result_); - - tmp_entries_.clear(); - entries_.clear(); - start_.clear(); - prev_prefix_start_ = 0; - prev_prefix_size_ = 0; -} - -BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( - const BlockBasedTable* t, CachableEntry&& filter_block) - : FilterBlockReaderCommon(t, std::move(filter_block)) { - assert(table()); - assert(table()->get_rep()); - assert(table()->get_rep()->filter_policy); -} - -std::unique_ptr BlockBasedFilterBlockReader::Create( - const BlockBasedTable* table, const ReadOptions& ro, - FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, - bool pin, BlockCacheLookupContext* lookup_context) { - assert(table); - assert(table->get_rep()); - assert(!pin || prefetch); - - CachableEntry filter_block; - if (prefetch || !use_cache) { - const Status s = ReadFilterBlock( - table, prefetch_buffer, ro, use_cache, nullptr /* get_context */, - lookup_context, &filter_block, BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return std::unique_ptr(); - } - - if (use_cache && !pin) { - filter_block.Reset(); - } - } - - return std::unique_ptr( - new BlockBasedFilterBlockReader(table, std::move(filter_block))); -} - -bool BlockBasedFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { - assert(block_offset != kNotValid); - if (!whole_key_filtering()) { - return true; - } - return MayMatch(key, block_offset, no_io, get_context, lookup_context); -} - -bool BlockBasedFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { - assert(block_offset != kNotValid); - return MayMatch(prefix, block_offset, no_io, get_context, lookup_context); -} - -bool BlockBasedFilterBlockReader::ParseFieldsFromBlock( - const BlockContents& contents, const char** data, const char** offset, - size_t* num, size_t* base_lg) { - assert(data); - assert(offset); - assert(num); - assert(base_lg); - - const size_t n = contents.data.size(); - if (n < 5) { // 1 byte for base_lg and 4 for start of offset array - return false; - } - - const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5); - if (last_word > n - 5) { - return false; - } - - *data = contents.data.data(); - *offset = (*data) + last_word; - *num = (n - 5 - last_word) / 4; - *base_lg = contents.data[n - 1]; - - return true; -} - -bool BlockBasedFilterBlockReader::MayMatch( - const Slice& entry, uint64_t block_offset, bool no_io, - GetContext* get_context, BlockCacheLookupContext* lookup_context) const { - CachableEntry filter_block; - - const Status s = - GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block, - BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return true; - } - - assert(filter_block.GetValue()); - - const char* data = nullptr; - const char* offset = nullptr; - size_t num = 0; - size_t base_lg = 0; - if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, - &base_lg)) { - return true; // Errors are treated as potential matches - } - - const uint64_t index = block_offset >> base_lg; - if (index < num) { - const uint32_t start = DecodeFixed32(offset + index * 4); - const uint32_t limit = DecodeFixed32(offset + index * 4 + 4); - if (start <= limit && limit <= (uint32_t)(offset - data)) { - const Slice filter = Slice(data + start, limit - start); - - assert(table()); - assert(table()->get_rep()); - - const bool may_match = - DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter); - if (may_match) { - PERF_COUNTER_ADD(bloom_sst_hit_count, 1); - return true; - } else { - PERF_COUNTER_ADD(bloom_sst_miss_count, 1); - return false; - } - } else if (start == limit) { - // Empty filters do not match any entries - return false; - } - } - return true; // Errors are treated as potential matches -} - -size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { - size_t usage = ApproximateFilterBlockMemoryUsage(); -#ifdef ROCKSDB_MALLOC_USABLE_SIZE - usage += malloc_usable_size(const_cast(this)); -#else - usage += sizeof(*this); -#endif // ROCKSDB_MALLOC_USABLE_SIZE - return usage; -} - -std::string BlockBasedFilterBlockReader::ToString() const { - CachableEntry filter_block; - - const Status s = - GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */, - nullptr /* lookup_context */, &filter_block, - BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return std::string("Unable to retrieve filter block"); - } - - assert(filter_block.GetValue()); - - const char* data = nullptr; - const char* offset = nullptr; - size_t num = 0; - size_t base_lg = 0; - if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, - &base_lg)) { - return std::string("Error parsing filter block"); - } - - std::string result; - result.reserve(1024); - - std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); - AppendItem(&result, s_fb, std::to_string(num)); - AppendItem(&result, s_bo, s_hd); - - for (size_t index = 0; index < num; index++) { - uint32_t start = DecodeFixed32(offset + index * 4); - uint32_t limit = DecodeFixed32(offset + index * 4 + 4); - - if (start != limit) { - result.append(" filter block # " + std::to_string(index + 1) + "\n"); - Slice filter = Slice(data + start, limit - start); - AppendItem(&result, start, filter.ToString(true)); - } - } - return result; -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_filter_block.h b/table/block_based/block_based_filter_block.h deleted file mode 100644 index 156da0492..000000000 --- a/table/block_based/block_based_filter_block.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A filter block is stored near the end of a Table file. It contains -// filters (e.g., bloom filters) for all data blocks in the table combined -// into a single filter block. - -#pragma once - -#include -#include - -#include -#include -#include - -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "table/block_based/filter_block_reader_common.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/format.h" -#include "util/hash.h" - -namespace ROCKSDB_NAMESPACE { - -// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a -// particular Table. It generates a single string which is stored as -// a special block in the Table. -// -// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp: -// (StartBlock Add*)* Finish -class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { - public: - BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt, - int bits_per_key); - // No copying allowed - BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&) = delete; - void operator=(const BlockBasedFilterBlockBuilder&) = delete; - - virtual bool IsBlockBased() override { return true; } - virtual void StartBlock(uint64_t block_offset) override; - virtual void Add(const Slice& key_without_ts) override; - virtual bool IsEmpty() const override { - return start_.empty() && filter_offsets_.empty(); - } - virtual size_t EstimateEntriesAdded() override; - virtual Slice Finish( - const BlockHandle& tmp, Status* status, - std::unique_ptr* filter_data = nullptr) override; - using FilterBlockBuilder::Finish; - - private: - void AddKey(const Slice& key); - void AddPrefix(const Slice& key); - void GenerateFilter(); - - // important: all of these might point to invalid addresses - // at the time of destruction of this filter block. destructor - // should NOT dereference them. - const SliceTransform* prefix_extractor_; - bool whole_key_filtering_; - int bits_per_key_; - - size_t prev_prefix_start_; // the position of the last appended prefix - // to "entries_". - size_t prev_prefix_size_; // the length of the last appended prefix to - // "entries_". - std::string entries_; // Flattened entry contents - std::vector start_; // Starting index in entries_ of each entry - std::string result_; // Filter data computed so far - std::vector tmp_entries_; // policy_->CreateFilter() argument - std::vector filter_offsets_; - uint64_t total_added_in_built_; // Total keys added to filters built so far -}; - -// A FilterBlockReader is used to parse filter from SST table. -// KeyMayMatch and PrefixMayMatch would trigger filter checking -class BlockBasedFilterBlockReader - : public FilterBlockReaderCommon { - public: - BlockBasedFilterBlockReader(const BlockBasedTable* t, - CachableEntry&& filter_block); - // No copying allowed - BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&) = delete; - void operator=(const BlockBasedFilterBlockReader&) = delete; - - static std::unique_ptr Create( - const BlockBasedTable* table, const ReadOptions& ro, - FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, - bool pin, BlockCacheLookupContext* lookup_context); - - bool IsBlockBased() override { return true; } - - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, GetContext* get_context, - BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, - GetContext* get_context, - BlockCacheLookupContext* lookup_context) override; - size_t ApproximateMemoryUsage() const override; - - // convert this object to a human readable form - std::string ToString() const override; - - private: - static bool ParseFieldsFromBlock(const BlockContents& contents, - const char** data, const char** offset, - size_t* num, size_t* base_lg); - - bool MayMatch(const Slice& entry, uint64_t block_offset, bool no_io, - GetContext* get_context, - BlockCacheLookupContext* lookup_context) const; -}; - -} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_filter_block_test.cc b/table/block_based/block_based_filter_block_test.cc deleted file mode 100644 index 943a3e941..000000000 --- a/table/block_based/block_based_filter_block_test.cc +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/block_based/block_based_filter_block.h" -#include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/mock_block_based_table.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// Test for block based filter block -// use new interface in FilterPolicy to create filter builder/reader -class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester, - public testing::Test { - public: - BlockBasedFilterBlockTest() - : mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, true)) {} -}; - -TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - Slice slice(builder->Finish()); - ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - builder->StartBlock(100); - builder->Add("foo"); - builder->Add("bar"); - builder->Add("box"); - builder->StartBlock(200); - builder->Add("box"); - builder->StartBlock(300); - builder->Add("hello"); - Slice slice(builder->Finish()); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - - // First filter - builder->StartBlock(0); - builder->Add("foo"); - builder->StartBlock(2000); - builder->Add("bar"); - - // Second filter - builder->StartBlock(3100); - builder->Add("box"); - - // Third filter is empty - - // Last filter - builder->StartBlock(9000); - builder->Add("box"); - builder->Add("hello"); - - Slice slice(builder->Finish()); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - - // Check first filter - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check second filter - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check third filter (empty) - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check last filter - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 5d56499db..80e3aefb2 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -37,7 +37,6 @@ #include "rocksdb/table.h" #include "rocksdb/types.h" #include "table/block_based/block.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" @@ -80,17 +79,6 @@ FilterBlockBuilder* CreateFilterBlockBuilder( if (filter_bits_builder == nullptr) { return nullptr; } else { - // Check for backdoor deprecated block-based bloom config - size_t starting_est = filter_bits_builder->EstimateEntriesAdded(); - constexpr auto kSecretStart = - DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart; - if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) { - int bits_per_key = static_cast(starting_est - kSecretStart); - delete filter_bits_builder; - return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(), - table_opt, bits_per_key); - } - // END check for backdoor deprecated block-based bloom config if (table_opt.partition_filters) { assert(p_index_builder != nullptr); // Since after partition cut request from filter builder it takes time @@ -904,10 +892,6 @@ BlockBasedTableBuilder::BlockBasedTableBuilder( rep_ = new Rep(sanitized_table_options, tbo, file); - if (rep_->filter_builder != nullptr) { - rep_->filter_builder->StartBlock(0); - } - TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey", const_cast(&rep_->props)); @@ -1094,9 +1078,6 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, WriteRawBlock(block_contents, type, handle, block_type, &raw_block_contents); r->compressed_output.clear(); if (is_data_block) { - if (r->filter_builder != nullptr) { - r->filter_builder->StartBlock(r->get_offset()); - } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; } @@ -1385,9 +1366,6 @@ void BlockBasedTableBuilder::BGWorkWriteRawBlock() { break; } - if (r->filter_builder != nullptr) { - r->filter_builder->StartBlock(r->get_offset()); - } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; @@ -1496,9 +1474,6 @@ Status BlockBasedTableBuilder::InsertBlockInCacheHelper( case BlockType::kFilterPartitionIndex: s = InsertBlockInCache(block_contents, handle, block_type); break; - case BlockType::kDeprecatedFilter: - s = InsertBlockInCache(block_contents, handle, block_type); - break; case BlockType::kFilter: s = InsertBlockInCache(block_contents, handle, block_type); @@ -1568,7 +1543,6 @@ void BlockBasedTableBuilder::WriteFilterBlock( return; } BlockHandle filter_block_handle; - bool is_block_based_filter = rep_->filter_builder->IsBlockBased(); bool is_partitioned_filter = rep_->table_options.partition_filters; if (ok()) { rep_->props.num_filter_entries += @@ -1595,8 +1569,7 @@ void BlockBasedTableBuilder::WriteFilterBlock( rep_->props.filter_size += filter_content.size(); - BlockType btype = is_block_based_filter ? BlockType::kDeprecatedFilter - : is_partitioned_filter && /* last */ s.ok() + BlockType btype = is_partitioned_filter && /* last */ s.ok() ? BlockType::kFilterPartitionIndex : BlockType::kFilter; WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, btype, @@ -1608,10 +1581,8 @@ void BlockBasedTableBuilder::WriteFilterBlock( // Add mapping from ".Name" to location // of filter data. std::string key; - key = is_block_based_filter ? BlockBasedTable::kFilterBlockPrefix - : is_partitioned_filter - ? BlockBasedTable::kPartitionedFilterBlockPrefix - : BlockBasedTable::kFullFilterBlockPrefix; + key = is_partitioned_filter ? BlockBasedTable::kPartitionedFilterBlockPrefix + : BlockBasedTable::kFullFilterBlockPrefix; key.append(rep_->table_options.filter_policy->CompatibilityName()); meta_index_builder->Add(key, filter_block_handle); } @@ -2114,7 +2085,7 @@ const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const { } } -const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; +const std::string BlockBasedTable::kObsoleteFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter."; const std::string BlockBasedTable::kPartitionedFilterBlockPrefix = "partitionedfilter."; diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 7074df313..6c1bb431d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -46,7 +46,6 @@ #include "rocksdb/trace_record.h" #include "table/block_based/binary_search_index_reader.h" #include "table/block_based/block.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_iterator.h" #include "table/block_based/block_like_traits.h" @@ -196,7 +195,6 @@ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type, switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: PERF_COUNTER_ADD(block_cache_filter_hit_count, 1); if (get_context) { @@ -255,7 +253,6 @@ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type, switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_miss; } else { @@ -312,7 +309,6 @@ void BlockBasedTable::UpdateCacheInsertionMetrics( switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_add; if (redundant) { @@ -954,7 +950,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( {std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix), std::make_pair(Rep::FilterType::kPartitionedFilter, kPartitionedFilterBlockPrefix), - std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) { + std::make_pair(Rep::FilterType::kNoFilter, + kObsoleteFilterBlockPrefix)}) { if (builtin_compatible) { // This code is only here to deal with a hiccup in early 7.0.x where // there was an unintentional name change in the SST files metadata. @@ -967,7 +964,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( test::LegacyBloomFilterPolicy::kClassName(), test::FastLocalBloomFilterPolicy::kClassName(), test::Standard128RibbonFilterPolicy::kClassName(), - DeprecatedBlockBasedBloomFilterPolicy::kClassName(), + "rocksdb.internal.DeprecatedBlockBasedBloomFilter", BloomFilterPolicy::kClassName(), RibbonFilterPolicy::kClassName(), }; @@ -985,6 +982,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( Status s = rep_->filter_handle.DecodeFrom(&v); if (s.ok()) { rep_->filter_type = filter_type; + if (filter_type == Rep::FilterType::kNoFilter) { + ROCKS_LOG_WARN(rep_->ioptions.logger, + "Detected obsolete filter type in %s. Read " + "performance might suffer until DB is fully " + "re-compacted.", + rep_->file->file_name().c_str()); + } break; } } @@ -995,6 +999,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle) .ok()) { rep_->filter_type = filter_type; + if (filter_type == Rep::FilterType::kNoFilter) { + ROCKS_LOG_WARN( + rep_->ioptions.logger, + "Detected obsolete filter type in %s. Read performance might " + "suffer until DB is fully re-compacted.", + rep_->file->file_name().c_str()); + } break; } } @@ -1459,10 +1470,6 @@ std::unique_ptr BlockBasedTable::CreateFilterBlockReader( return PartitionedFilterBlockReader::Create( this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); - case Rep::FilterType::kBlockFilter: - return BlockBasedFilterBlockReader::Create( - this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); - case Rep::FilterType::kFullFilter: return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); @@ -1608,7 +1615,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: ++get_context->get_context_stats_.num_filter_read; break; default: @@ -1649,7 +1655,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: trace_block_type = TraceType::kBlockTraceFilterBlock; break; case BlockType::kCompressionDictionary: @@ -1765,7 +1770,6 @@ Status BlockBasedTable::RetrieveBlock( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: ++(get_context->get_context_stats_.num_filter_read); break; default: @@ -1889,75 +1893,16 @@ bool BlockBasedTable::PrefixMayMatch( bool may_match = true; - // First, try check with full filter FilterBlockReader* const filter = rep_->filter.get(); - bool filter_checked = true; + bool filter_checked = false; if (filter != nullptr) { const bool no_io = read_options.read_tier == kBlockCacheTier; - if (!filter->IsBlockBased()) { - const Slice* const const_ikey_ptr = &internal_key; - may_match = filter->RangeMayExist( - read_options.iterate_upper_bound, user_key_without_ts, - prefix_extractor, rep_->internal_comparator.user_comparator(), - const_ikey_ptr, &filter_checked, need_upper_bound_check, no_io, - lookup_context); - } else { - // if prefix_extractor changed for block based filter, skip filter - if (need_upper_bound_check) { - return true; - } - auto prefix = prefix_extractor->Transform(user_key_without_ts); - InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue); - auto internal_prefix = internal_key_prefix.Encode(); - - // To prevent any io operation in this method, we set `read_tier` to make - // sure we always read index or filter only when they have already been - // loaded to memory. - ReadOptions no_io_read_options; - no_io_read_options.read_tier = kBlockCacheTier; - - // Then, try find it within each block - // we already know prefix_extractor and prefix_extractor_name must match - // because `CheckPrefixMayMatch` first checks `check_filter_ == true` - std::unique_ptr> iiter(NewIndexIterator( - no_io_read_options, - /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, - /*get_context=*/nullptr, lookup_context)); - iiter->Seek(internal_prefix); - - if (!iiter->Valid()) { - // we're past end of file - // if it's incomplete, it means that we avoided I/O - // and we're not really sure that we're past the end - // of the file - may_match = iiter->status().IsIncomplete(); - } else if ((rep_->index_key_includes_seq ? ExtractUserKey(iiter->key()) - : iiter->key()) - .starts_with(ExtractUserKey(internal_prefix))) { - // we need to check for this subtle case because our only - // guarantee is that "the key is a string >= last key in that data - // block" according to the doc/table_format.txt spec. - // - // Suppose iiter->key() starts with the desired prefix; it is not - // necessarily the case that the corresponding data block will - // contain the prefix, since iiter->key() need not be in the - // block. However, the next data block may contain the prefix, so - // we return true to play it safe. - may_match = true; - } else if (filter->IsBlockBased()) { - // iiter->key() does NOT start with the desired prefix. Because - // Seek() finds the first key that is >= the seek target, this - // means that iiter->key() > prefix. Thus, any data blocks coming - // after the data block corresponding to iiter->key() cannot - // possibly contain the key. Thus, the corresponding data block - // is the only on could potentially contain the prefix. - BlockHandle handle = iiter->value().handle; - may_match = filter->PrefixMayMatch( - prefix, prefix_extractor, handle.offset(), no_io, - /*const_key_ptr=*/nullptr, /*get_context=*/nullptr, lookup_context); - } - } + const Slice* const const_ikey_ptr = &internal_key; + may_match = filter->RangeMayExist( + read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor, + rep_->internal_comparator.user_comparator(), const_ikey_ptr, + &filter_checked, need_upper_bound_check, no_io, lookup_context); } if (filter_checked) { @@ -2030,7 +1975,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch( FilterBlockReader* filter, const Slice& internal_key, const bool no_io, const SliceTransform* prefix_extractor, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { - if (filter == nullptr || filter->IsBlockBased()) { + if (filter == nullptr) { return true; } Slice user_key = ExtractUserKey(internal_key); @@ -2039,15 +1984,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch( size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size(); Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz); if (rep_->whole_key_filtering) { - may_match = - filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, - no_io, const_ikey_ptr, get_context, lookup_context); + may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr, + get_context, lookup_context); } else if (!PrefixExtractorChanged(prefix_extractor) && prefix_extractor->InDomain(user_key_without_ts) && !filter->PrefixMayMatch( - prefix_extractor->Transform(user_key_without_ts), - prefix_extractor, kNotValid, no_io, const_ikey_ptr, - get_context, lookup_context)) { + prefix_extractor->Transform(user_key_without_ts), no_io, + const_ikey_ptr, get_context, lookup_context)) { // FIXME ^^^: there should be no reason for Get() to depend on current // prefix_extractor at all. It should always use table_prefix_extractor. may_match = false; @@ -2063,14 +2006,13 @@ void BlockBasedTable::FullFilterKeysMayMatch( FilterBlockReader* filter, MultiGetRange* range, const bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const { - if (filter == nullptr || filter->IsBlockBased()) { + if (filter == nullptr) { return; } uint64_t before_keys = range->KeysLeft(); assert(before_keys > 0); // Caller should ensure if (rep_->whole_key_filtering) { - filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io, - lookup_context); + filter->KeysMayMatch(range, no_io, lookup_context); uint64_t after_keys = range->KeysLeft(); if (after_keys) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys); @@ -2086,8 +2028,7 @@ void BlockBasedTable::FullFilterKeysMayMatch( } else if (!PrefixExtractorChanged(prefix_extractor)) { // FIXME ^^^: there should be no reason for MultiGet() to depend on current // prefix_extractor at all. It should always use table_prefix_extractor. - filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, - lookup_context); + filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys); uint64_t after_keys = range->KeysLeft(); uint64_t filtered_keys = before_keys - after_keys; @@ -2152,22 +2093,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { IndexValue v = iiter->value(); - bool not_exist_in_filter = - filter != nullptr && filter->IsBlockBased() == true && - !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz), - prefix_extractor, v.handle.offset(), no_io, - /*const_ikey_ptr=*/nullptr, get_context, - &lookup_context); - - if (not_exist_in_filter) { - // Not found - // TODO: think about interaction with Merge. If a user key cannot - // cross one data block, we should be fine. - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); - break; - } - if (!v.first_internal_key.empty() && !skip_filters && UserComparatorWrapper(rep_->internal_comparator.user_comparator()) .CompareWithoutTimestamp( @@ -2272,7 +2197,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, break; } } - if (matched && filter != nullptr && !filter->IsBlockBased()) { + if (matched && filter != nullptr) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); @@ -2425,10 +2350,6 @@ Status BlockBasedTable::VerifyChecksumInBlocks( BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( const Slice& meta_block_name) { - if (meta_block_name.starts_with(kFilterBlockPrefix)) { - return BlockType::kDeprecatedFilter; - } - if (meta_block_name.starts_with(kFullFilterBlockPrefix)) { return BlockType::kFilter; } @@ -2457,6 +2378,11 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( return BlockType::kHashIndexMetadata; } + if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) { + // Obsolete but possible in old files + return BlockType::kInvalid; + } + assert(false); return BlockType::kInvalid; } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 692e519d0..51981f145 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -38,7 +38,6 @@ namespace ROCKSDB_NAMESPACE { class Cache; class FilterBlockReader; -class BlockBasedFilterBlockReader; class FullFilterBlockReader; class Footer; class InternalKeyComparator; @@ -67,7 +66,7 @@ using KVPairBlock = std::vector>; // loaded blocks in the memory. class BlockBasedTable : public TableReader { public: - static const std::string kFilterBlockPrefix; + static const std::string kObsoleteFilterBlockPrefix; static const std::string kFullFilterBlockPrefix; static const std::string kPartitionedFilterBlockPrefix; @@ -585,7 +584,6 @@ struct BlockBasedTable::Rep { enum class FilterType { kNoFilter, kFullFilter, - kBlockFilter, kPartitionedFilter, }; FilterType filter_type; diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 9eeea40da..450979dd7 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -729,7 +729,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) iiter->Next(); } while (iiter->Valid()); - if (matched && filter != nullptr && !filter->IsBlockBased()) { + if (matched && filter != nullptr) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); diff --git a/table/block_based/block_like_traits.h b/table/block_based/block_like_traits.h index 8889b896c..aeb255114 100644 --- a/table/block_based/block_like_traits.h +++ b/table/block_based/block_like_traits.h @@ -72,15 +72,10 @@ class BlocklikeTraits { return Status::OK(); } - static Cache::CacheItemHelper* GetCacheItemHelper(BlockType block_type) { - if (block_type == BlockType::kDeprecatedFilter) { - return GetCacheItemHelperForRole< - BlockContents, CacheEntryRole::kDeprecatedFilterBlock>(); - } else { - // E.g. compressed cache - return GetCacheItemHelperForRole(); - } + static Cache::CacheItemHelper* GetCacheItemHelper(BlockType /*block_type*/) { + // E.g. compressed cache + return GetCacheItemHelperForRole(); } }; diff --git a/table/block_based/block_type.h b/table/block_based/block_type.h index 516510b1a..a9d6a1a77 100644 --- a/table/block_based/block_type.h +++ b/table/block_based/block_type.h @@ -20,7 +20,6 @@ enum class BlockType : uint8_t { kData, kFilter, // for second level partitioned filters and full filters kFilterPartitionIndex, // for top-level index of filter partitions - kDeprecatedFilter, // for old, deprecated block-based filter kProperties, kCompressionDictionary, kRangeDeletion, diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index e767df7f1..0d67dd2f8 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -10,11 +10,6 @@ // A filter block is stored near the end of a Table file. It contains // filters (e.g., bloom filters) for all data blocks in the table combined // into a single filter block. -// -// It is a base class for BlockBasedFilter and FullFilter. -// These two are both used in BlockBasedTable. The first one contain filter -// For a part of keys in sst file, the second contain filter for all keys -// in sst file. #pragma once @@ -44,12 +39,10 @@ using MultiGetRange = MultiGetContext::Range; // A FilterBlockBuilder is used to construct all of the filters for a // particular Table. It generates a single string which is stored as -// a special block in the Table. +// a special block in the Table, or partitioned into smaller filters. // // The sequence of calls to FilterBlockBuilder must match the regexp: -// (StartBlock Add*)* Finish -// -// BlockBased/Full FilterBlock would be called in the same way. +// Add* Finish class FilterBlockBuilder { public: explicit FilterBlockBuilder() {} @@ -59,8 +52,6 @@ class FilterBlockBuilder { virtual ~FilterBlockBuilder() {} - virtual bool IsBlockBased() = 0; // If is blockbased filter - virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter virtual void Add( const Slice& key_without_ts) = 0; // Add a key to current filter virtual bool IsEmpty() const = 0; // Empty == none added @@ -108,8 +99,6 @@ class FilterBlockReader { FilterBlockReader(const FilterBlockReader&) = delete; FilterBlockReader& operator=(const FilterBlockReader&) = delete; - virtual bool IsBlockBased() = 0; // If is blockbased filter - /** * If no_io is set, then it returns true if it cannot answer the query without * reading data from disk. This is used in PartitionedFilterBlockReader to @@ -120,23 +109,19 @@ class FilterBlockReader { * built upon InternalKey and must be provided via const_ikey_ptr when running * queries. */ - virtual bool KeyMayMatch(const Slice& key, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; - virtual void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; - if (!KeyMayMatch(ukey_without_ts, prefix_extractor, block_offset, no_io, - &ikey, get_context, lookup_context)) { + if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context, + lookup_context)) { range->SkipKey(iter); } } @@ -145,25 +130,22 @@ class FilterBlockReader { /** * no_io and const_ikey_ptr here means the same as in KeyMayMatch */ - virtual bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; virtual void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; if (prefix_extractor->InDomain(ukey_without_ts) && - !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), - prefix_extractor, block_offset, no_io, &ikey, - get_context, lookup_context)) { + !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io, + &ikey, get_context, lookup_context)) { range->SkipKey(iter); } } diff --git a/table/block_based/filter_block_reader_common.cc b/table/block_based/filter_block_reader_common.cc index fe0da0e4d..51d34fa00 100644 --- a/table/block_based/filter_block_reader_common.cc +++ b/table/block_based/filter_block_reader_common.cc @@ -112,9 +112,8 @@ bool FilterBlockReaderCommon::RangeMayExist( return true; } else { *filter_checked = true; - return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, - const_ikey_ptr, /* get_context */ nullptr, - lookup_context); + return PrefixMayMatch(prefix, no_io, const_ikey_ptr, + /* get_context */ nullptr, lookup_context); } } diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index c887c59a0..020ce87d2 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -24,7 +24,6 @@ #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/object_registry.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" @@ -1375,87 +1374,10 @@ const char* ReadOnlyBuiltinFilterPolicy::kClassName() { return kBuiltinFilterMetadataName; } -const char* DeprecatedBlockBasedBloomFilterPolicy::kClassName() { - return "rocksdb.internal.DeprecatedBlockBasedBloomFilter"; -} - std::string BloomLikeFilterPolicy::GetId() const { return Name() + GetBitsPerKeySuffix(); } -DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy( - double bits_per_key) - : BloomLikeFilterPolicy(bits_per_key) {} - -FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext( - const FilterBuildingContext&) const { - if (GetWholeBitsPerKey() == 0) { - // "No filter" special case - return nullptr; - } - // Internal contract: returns a new fake builder that encodes bits per key - // into a special value from EstimateEntriesAdded() - struct B : public FilterBitsBuilder { - explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {} - size_t est; - size_t EstimateEntriesAdded() override { return est; } - void AddKey(const Slice&) override {} - using FilterBitsBuilder::Finish; // FIXME - Slice Finish(std::unique_ptr*) override { return Slice(); } - size_t ApproximateNumEntries(size_t) override { return 0; } - }; - return new B(GetWholeBitsPerKey()); -} - -void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys, - int n, - int bits_per_key, - std::string* dst) { - // Compute bloom filter size (in both bits and bytes) - uint32_t bits = static_cast(n * bits_per_key); - - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. - if (bits < 64) bits = 64; - - uint32_t bytes = (bits + 7) / 8; - bits = bytes * 8; - - int num_probes = LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key); - - const size_t init_size = dst->size(); - dst->resize(init_size + bytes, 0); - dst->push_back(static_cast(num_probes)); // Remember # of probes - char* array = &(*dst)[init_size]; - for (int i = 0; i < n; i++) { - LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes, - array); - } -} - -bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch( - const Slice& key, const Slice& bloom_filter) { - const size_t len = bloom_filter.size(); - if (len < 2 || len > 0xffffffffU) { - return false; - } - - const char* array = bloom_filter.data(); - const uint32_t bits = static_cast(len - 1) * 8; - - // Use the encoded k so that we can read filters generated by - // bloom filters created using different parameters. - const int k = static_cast(array[len - 1]); - if (k > 30) { - // Reserved for potentially new encodings for short bloom filters. - // Consider it a match. - return true; - } - // NB: using stored k not num_probes for whole_bits_per_key_ - return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k, - array); -} - BloomFilterPolicy::BloomFilterPolicy(double bits_per_key) : BloomLikeFilterPolicy(bits_per_key) {} @@ -1876,9 +1798,6 @@ std::shared_ptr BloomLikeFilterPolicy::Create( return std::make_shared(bits_per_key); } else if (name == test::Standard128RibbonFilterPolicy::kClassName()) { return std::make_shared(bits_per_key); - } else if (name == DeprecatedBlockBasedBloomFilterPolicy::kClassName()) { - return std::make_shared( - bits_per_key); } else if (name == BloomFilterPolicy::kClassName()) { // For testing return std::make_shared(bits_per_key); @@ -1996,15 +1915,6 @@ static int RegisterBuiltinFilterPolicies(ObjectLibrary& library, uri)); return guard->get(); }); - library.AddFactory( - FilterPatternEntryWithBits( - DeprecatedBlockBasedBloomFilterPolicy::kClassName()), - [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(NewBuiltinFilterPolicyWithBits< - DeprecatedBlockBasedBloomFilterPolicy>(uri)); - return guard->get(); - }); size_t num_types; return static_cast(library.GetFactoryCount(&num_types)); } @@ -2055,7 +1965,6 @@ const std::vector& BloomLikeFilterPolicy::GetAllFixedImpls() { STATIC_AVOID_DESTRUCTION(std::vector, impls){ // Match filter_bench -impl=x ordering test::LegacyBloomFilterPolicy::kClassName(), - DeprecatedBlockBasedBloomFilterPolicy::kClassName(), test::FastLocalBloomFilterPolicy::kClassName(), test::Standard128RibbonFilterPolicy::kClassName(), }; diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 06566f871..9bc3a2482 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -131,7 +131,7 @@ class BuiltinFilterPolicy : public FilterPolicy { // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. This must successfully process any filter data // generated by a built-in FilterBitsBuilder, regardless of the impl - // chosen for this BloomFilterPolicy. Not compatible with CreateFilter. + // chosen for this BloomFilterPolicy. FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; static const char* kClassName(); bool IsInstanceOf(const std::string& id) const override; @@ -151,12 +151,8 @@ class BuiltinFilterPolicy : public FilterPolicy { // (An internal convenience function to save boilerplate.) static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); - protected: - // Deprecated block-based filter only (no longer in public API) - bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const; - private: - // For Bloom filter implementation(s) (except deprecated block-based filter) + // For Bloom filter implementation(s) static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents); // For Ribbon filter implementation(s) @@ -300,30 +296,6 @@ class RibbonFilterPolicy : public BloomLikeFilterPolicy { const int bloom_before_level_; }; -// Deprecated block-based filter only. We still support reading old -// block-based filters from any BuiltinFilterPolicy, but there is no public -// option to build them. However, this class is used to build them for testing -// and for a public backdoor to building them by constructing this policy from -// a string. -class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy { - public: - explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key); - - // Internal contract: returns a new fake builder that encodes bits per key - // into a special value from EstimateEntriesAdded(), using - // kSecretBitsPerKeyStart - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override; - static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; - - static const char* kClassName(); - const char* Name() const override { return kClassName(); } - - static void CreateFilter(const Slice* keys, int n, int bits_per_key, - std::string* dst); - static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter); -}; - // For testing only, but always constructable with internal names namespace test { diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 9bb9212e6..4ca650edc 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -125,14 +125,8 @@ FullFilterBlockReader::FullFilterBlockReader( } bool FullFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* /*prefix_extractor*/, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); + const Slice& key, const bool no_io, const Slice* const /*const_ikey_ptr*/, + GetContext* get_context, BlockCacheLookupContext* lookup_context) { if (!whole_key_filtering()) { return true; } @@ -167,14 +161,9 @@ std::unique_ptr FullFilterBlockReader::Create( } bool FullFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, + const Slice& prefix, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); return MayMatch(prefix, no_io, get_context, lookup_context); } @@ -204,17 +193,12 @@ bool FullFilterBlockReader::MayMatch( return false; } } - return true; // remain the same with block_based filter + return true; } void FullFilterBlockReader::KeysMayMatch( - MultiGetRange* range, const SliceTransform* /*prefix_extractor*/, - uint64_t block_offset, const bool no_io, + MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); if (!whole_key_filtering()) { // Simply return. Don't skip any key - consider all keys as likely to be // present @@ -225,12 +209,7 @@ void FullFilterBlockReader::KeysMayMatch( void FullFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); + const bool no_io, BlockCacheLookupContext* lookup_context) { MayMatch(range, no_io, prefix_extractor, lookup_context); } diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 3753a1c3d..ea7f06e60 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -49,8 +49,6 @@ class FullFilterBlockBuilder : public FilterBlockBuilder { // directly. and be deleted here ~FullFilterBlockBuilder() {} - virtual bool IsBlockBased() override { return false; } - virtual void StartBlock(uint64_t /*block_offset*/) override {} virtual void Add(const Slice& key_without_ts) override; virtual bool IsEmpty() const override { return !any_added_; } virtual size_t EstimateEntriesAdded() override; @@ -107,28 +105,28 @@ class FullFilterBlockReader FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); - bool IsBlockBased() override { return false; } - - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) override; + // Used in partitioned filter code + void KeysMayMatch2(MultiGetRange* range, + const SliceTransform* /*prefix_extractor*/, + const bool no_io, + BlockCacheLookupContext* lookup_context) { + KeysMayMatch(range, no_io, lookup_context); + } void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; private: diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 24d870d4c..ab0bdc3e0 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -115,8 +115,7 @@ TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) { FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); @@ -137,39 +136,34 @@ TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) { nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("bar", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("box", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("hello", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("missing", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("other", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } class FullFilterBlockTest : public mock::MockBlockBasedTableTester, @@ -191,8 +185,7 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); @@ -292,39 +285,34 @@ TEST_F(FullFilterBlockTest, SingleChunk) { nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("bar", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("box", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("hello", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("missing", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("other", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/mock_block_based_table.h b/table/block_based/mock_block_based_table.h index 4dd6e392b..13f3dfaee 100644 --- a/table/block_based/mock_block_based_table.h +++ b/table/block_based/mock_block_based_table.h @@ -7,7 +7,6 @@ #include #include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 6fc6ddd1e..c80cd10fd 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -216,58 +216,41 @@ std::unique_ptr PartitionedFilterBlockReader::Create( } bool PartitionedFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); - assert(block_offset == kNotValid); if (!whole_key_filtering()) { return true; } - return MayMatch(key, prefix_extractor, block_offset, no_io, const_ikey_ptr, - get_context, lookup_context, + return MayMatch(key, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::KeyMayMatch); } void PartitionedFilterBlockReader::KeysMayMatch( - MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { - assert(block_offset == kNotValid); if (!whole_key_filtering()) { return; // Any/all may match } - MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, - &FullFilterBlockReader::KeysMayMatch); + MayMatch(range, nullptr, no_io, lookup_context, + &FullFilterBlockReader::KeysMayMatch2); } bool PartitionedFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); - assert(block_offset == kNotValid); - if (!table_prefix_extractor() && !prefix_extractor) { - return true; - } - - return MayMatch(prefix, prefix_extractor, block_offset, no_io, const_ikey_ptr, - get_context, lookup_context, + return MayMatch(prefix, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::PrefixMayMatch); } void PartitionedFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context) { - assert(block_offset == kNotValid); - if (!table_prefix_extractor() && !prefix_extractor) { - return; // Any/all may match - } - - MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, + const bool no_io, BlockCacheLookupContext* lookup_context) { + assert(prefix_extractor); + MayMatch(range, prefix_extractor, no_io, lookup_context, &FullFilterBlockReader::PrefixesMayMatch); } @@ -331,8 +314,7 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock( } bool PartitionedFilterBlockReader::MayMatch( - const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, + const Slice& slice, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const { CachableEntry filter_block; @@ -364,14 +346,13 @@ bool PartitionedFilterBlockReader::MayMatch( FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); - return (filter_partition.*filter_function)( - slice, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context, - lookup_context); + return (filter_partition.*filter_function)(slice, no_io, const_ikey_ptr, + get_context, lookup_context); } void PartitionedFilterBlockReader::MayMatch( - MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, BlockCacheLookupContext* lookup_context, + MultiGetRange* range, const SliceTransform* prefix_extractor, bool no_io, + BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_block; Status s = @@ -399,9 +380,8 @@ void PartitionedFilterBlockReader::MayMatch( if (!prev_filter_handle.IsNull() && this_filter_handle != prev_filter_handle) { MultiGetRange subrange(*range, start_iter_same_handle, iter); - MayMatchPartition(&subrange, prefix_extractor, block_offset, - prev_filter_handle, no_io, lookup_context, - filter_function); + MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io, + lookup_context, filter_function); range->AddSkipsFrom(subrange); start_iter_same_handle = iter; } @@ -416,16 +396,15 @@ void PartitionedFilterBlockReader::MayMatch( } if (!prev_filter_handle.IsNull()) { MultiGetRange subrange(*range, start_iter_same_handle, range->end()); - MayMatchPartition(&subrange, prefix_extractor, block_offset, - prev_filter_handle, no_io, lookup_context, - filter_function); + MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io, + lookup_context, filter_function); range->AddSkipsFrom(subrange); } } void PartitionedFilterBlockReader::MayMatchPartition( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, BlockHandle filter_handle, bool no_io, + BlockHandle filter_handle, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_partition_block; @@ -439,8 +418,8 @@ void PartitionedFilterBlockReader::MayMatchPartition( FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); - (filter_partition.*filter_function)(range, prefix_extractor, block_offset, - no_io, lookup_context); + (filter_partition.*filter_function)(range, prefix_extractor, no_io, + lookup_context); } size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const { diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 302b30f19..3c00fa83e 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -109,25 +109,19 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon { FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); - bool IsBlockBased() override { return false; } - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; @@ -142,27 +136,22 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon { CachableEntry* filter_block) const; using FilterFunction = bool (FullFilterBlockReader::*)( - const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, GetContext* get_context, - BlockCacheLookupContext* lookup_context); - bool MayMatch(const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, + const Slice& slice, const bool no_io, const Slice* const const_ikey_ptr, + GetContext* get_context, BlockCacheLookupContext* lookup_context); + bool MayMatch(const Slice& slice, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const; using FilterManyFunction = void (FullFilterBlockReader::*)( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context); + const bool no_io, BlockCacheLookupContext* lookup_context); void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, - BlockCacheLookupContext* lookup_context, + bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; void MayMatchPartition(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, BlockHandle filter_handle, - bool no_io, BlockCacheLookupContext* lookup_context, + BlockHandle filter_handle, bool no_io, + BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; Status CacheDependencies(const ReadOptions& ro, bool pin) override; diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index d7ae4bfd1..d68e5d063 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -161,8 +161,7 @@ class PartitionedFilterBlockTest } void VerifyReader(PartitionedFilterBlockBuilder* builder, - PartitionedIndexBuilder* pib, bool empty = false, - const SliceTransform* prefix_extractor = nullptr) { + PartitionedIndexBuilder* pib, bool empty = false) { std::unique_ptr reader( NewReader(builder, pib)); // Querying added keys @@ -170,31 +169,31 @@ class PartitionedFilterBlockTest for (auto key : keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io, - &ikey_slice, /*get_context=*/nullptr, + ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } { // querying a key twice auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->KeyMayMatch( - keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch(keys[0], !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } // querying missing keys for (auto key : missing_keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); if (empty) { - ASSERT_TRUE(reader->KeyMayMatch( - key, prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } else { // assuming a good hash function - ASSERT_FALSE(reader->KeyMayMatch( - key, prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_FALSE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } } @@ -343,20 +342,20 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { for (auto key : pkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } // Non-existent keys but with the same prefix const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"}; for (auto key : pnonkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } @@ -391,10 +390,10 @@ TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) { auto prefix = prefix_extractor->Transform(key); auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix, prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix, + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 4ca4063d7..dc32c887c 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -285,7 +285,6 @@ IOStatus BlockFetcher::ReadBlockContents() { switch (block_type_) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: PERF_COUNTER_ADD(filter_block_read_count, 1); break; diff --git a/table/table_test.cc b/table/table_test.cc index 39f6e1974..f8f5bda49 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3524,11 +3524,10 @@ TEST_P(BlockBasedTableTest, InvalidOptions) { } TEST_P(BlockBasedTableTest, BlockReadCountTest) { - // bloom_filter_type = 0 -- block-based filter (not available in public API) // bloom_filter_type = 1 -- full filter using use_block_based_builder=false // bloom_filter_type = 2 -- full filter using use_block_based_builder=true // because of API change to hide block-based filter - for (int bloom_filter_type = 0; bloom_filter_type <= 2; ++bloom_filter_type) { + for (int bloom_filter_type = 1; bloom_filter_type <= 2; ++bloom_filter_type) { for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2; ++index_and_filter_in_cache) { Options options; @@ -3537,22 +3536,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_cache = NewLRUCache(1, 0); table_options.cache_index_and_filter_blocks = index_and_filter_in_cache; - if (bloom_filter_type == 0) { -#ifndef ROCKSDB_LITE - // Use back-door way of enabling obsolete block-based Bloom - ASSERT_OK(FilterPolicy::CreateFromString( - ConfigOptions(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:10", - &table_options.filter_policy)); -#else - // Skip this case in LITE build - continue; -#endif - } else { - // Public API - table_options.filter_policy.reset( - NewBloomFilterPolicy(10, bloom_filter_type == 2)); - } + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, bloom_filter_type == 2)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; stl_wrappers::KVMap kvmap; diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index bd9e43b43..9cebbbca9 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1605,9 +1605,6 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table."); DEFINE_bool(use_hash_search, false, "if use kHashSearch " "instead of kBinarySearch. " "This is valid if only we use BlockTable"); -DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter " - "instead of kFullFilter for filter block. " - "This is valid if only we use BlockTable"); DEFINE_string(merge_operator, "", "The merge operator to use with the database." "If a new merge operator is specified, be sure to use fresh" " database The possible merge operators are defined in" @@ -4525,19 +4522,6 @@ class Benchmark { table_options->filter_policy = BlockBasedTableOptions().filter_policy; } else if (FLAGS_bloom_bits == 0) { table_options->filter_policy.reset(); - } else if (FLAGS_use_block_based_filter) { - // Use back-door way of enabling obsolete block-based Bloom - Status s = FilterPolicy::CreateFromString( - ConfigOptions(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:" + - std::to_string(FLAGS_bloom_bits), - &table_options->filter_policy); - if (!s.ok()) { - fprintf(stderr, - "failure creating obsolete block-based filter: %s\n", - s.ToString().c_str()); - exit(1); - } } else { table_options->filter_policy.reset( FLAGS_use_ribbon_filter ? NewRibbonFilterPolicy(FLAGS_bloom_bits) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index e0b4d36c2..cea8232d5 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -119,7 +119,6 @@ default_params = { "use_merge": lambda: random.randint(0, 1), # 999 -> use Bloom API "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]), - "use_block_based_filter": lambda: random.randint(0, 1), "value_size_mult": 32, "verify_checksum": 1, "write_buffer_size": 4 * 1024 * 1024, @@ -358,7 +357,6 @@ ts_params = { "use_blob_db": 0, "enable_compaction_filter": 0, "ingest_external_file_one_in": 0, - "use_block_based_filter": 0, } multiops_txn_default_params = { @@ -497,10 +495,6 @@ def finalize_and_sanitize(src_params): if dest_params["partition_filters"] == 1: if dest_params["index_type"] != 2: dest_params["partition_filters"] = 0 - else: - dest_params["use_block_based_filter"] = 0 - if dest_params["ribbon_starting_level"] < 999: - dest_params["use_block_based_filter"] = 0 if dest_params.get("atomic_flush", 0) == 1: # disable pipelined write when atomic flush is used. dest_params["enable_pipelined_write"] = 0 diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 91d640521..0bb50cbef 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -69,207 +69,6 @@ static int NextLength(int length) { return length; } -class BlockBasedBloomTest : public testing::Test { - private: - std::unique_ptr policy_; - std::string filter_; - std::vector keys_; - - public: - BlockBasedBloomTest() { ResetPolicy(); } - - void Reset() { - keys_.clear(); - filter_.clear(); - } - - void ResetPolicy(double bits_per_key) { - policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key)); - Reset(); - } - - void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); } - - void Add(const Slice& s) { - keys_.push_back(s.ToString()); - } - - void Build() { - std::vector key_slices; - for (size_t i = 0; i < keys_.size(); i++) { - key_slices.push_back(Slice(keys_[i])); - } - filter_.clear(); - DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( - &key_slices[0], static_cast(key_slices.size()), - policy_->GetWholeBitsPerKey(), &filter_); - keys_.clear(); - if (kVerbose >= 2) DumpFilter(); - } - - size_t FilterSize() const { - return filter_.size(); - } - - Slice FilterData() const { return Slice(filter_); } - - void DumpFilter() { - fprintf(stderr, "F("); - for (size_t i = 0; i+1 < filter_.size(); i++) { - const unsigned int c = static_cast(filter_[i]); - for (int j = 0; j < 8; j++) { - fprintf(stderr, "%c", (c & (1 <= 1) { - fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", - rate*100.0, length, static_cast(FilterSize())); - } - if (FLAGS_bits_per_key == 10) { - ASSERT_LE(rate, 0.02); // Must not be over 2% - if (rate > 0.0125) { - mediocre_filters++; // Allowed, but not too often - } else { - good_filters++; - } - } - } - if (FLAGS_bits_per_key == 10 && kVerbose >= 1) { - fprintf(stderr, "Filters: %d good, %d mediocre\n", - good_filters, mediocre_filters); - } - ASSERT_LE(mediocre_filters, good_filters/5); -} - -// Ensure the implementation doesn't accidentally change in an -// incompatible way -TEST_F(BlockBasedBloomTest, Schema) { - char buffer[sizeof(int)]; - - ResetPolicy(8); // num_probes = 5 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 3589896109U); - - ResetPolicy(9); // num_probes = 6 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 969445585U); - - ResetPolicy(11); // num_probes = 7 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 1694458207U); - - ResetPolicy(10); // num_probes = 6 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 2373646410U); - - ResetPolicy(10); - for (int key = /*CHANGED*/ 1; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 1908442116U); - - ResetPolicy(10); - for (int key = 1; key < /*CHANGED*/ 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 3057004015U); - - // With new fractional bits_per_key, check that we are rounding to - // whole bits per key for old Bloom filters. - ResetPolicy(9.5); // Treated as 10 - for (int key = 1; key < 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U); - - ResetPolicy(10.499); // Treated as 10 - for (int key = 1; key < 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U); - - ResetPolicy(); -} - -// Different bits-per-byte - class FullBloomTest : public testing::TestWithParam { protected: BlockBasedTableOptions table_options_; diff --git a/util/filter_bench.cc b/util/filter_bench.cc index bca4071d8..294f4367d 100644 --- a/util/filter_bench.cc +++ b/util/filter_bench.cc @@ -84,8 +84,8 @@ DEFINE_bool(new_builder, false, DEFINE_uint32(impl, 0, "Select filter implementation. Without -use_plain_table_bloom:" - "0 = legacy full Bloom filter, 1 = block-based Bloom filter, " - "2 = format_version 5 Bloom filter, 3 = Ribbon128 filter. With " + "0 = legacy full Bloom filter, " + "1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With " "-use_plain_table_bloom: 0 = no locality, 1 = locality."); DEFINE_bool(net_includes_hashing, false, @@ -358,13 +358,9 @@ void FilterBench::Go() { "-impl must currently be >= 0 and <= 1 for Plain table"); } } else { - if (FLAGS_impl == 1) { + if (FLAGS_impl > 2) { throw std::runtime_error( - "Block-based filter not currently supported by filter_bench"); - } - if (FLAGS_impl > 3) { - throw std::runtime_error( - "-impl must currently be 0, 2, or 3 for Block-based table"); + "-impl must currently be >= 0 and <= 2 for Block-based table"); } } @@ -603,7 +599,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run, auto dry_run_hash_fn = DryRunNoHash; if (!FLAGS_net_includes_hashing) { - if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) { + if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) { dry_run_hash_fn = DryRunHash32; } else { dry_run_hash_fn = DryRunHash64; @@ -728,8 +724,6 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run, } else { may_match = info.full_block_reader_->KeyMayMatch( batch_slices[i], - /*prefix_extractor=*/nullptr, - /*block_offset=*/ROCKSDB_NAMESPACE::kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr); diff --git a/utilities/cache_dump_load_impl.cc b/utilities/cache_dump_load_impl.cc index 0c2e2e2f5..7745e1618 100644 --- a/utilities/cache_dump_load_impl.cc +++ b/utilities/cache_dump_load_impl.cc @@ -139,11 +139,6 @@ CacheDumperImpl::DumpOneBlockCallBack() { block_start = (static_cast(value))->data(); block_len = (static_cast(value))->size(); break; - case CacheEntryRole::kDeprecatedFilterBlock: - type = CacheDumpUnitType::kDeprecatedFilterBlock; - block_start = (static_cast(value))->data.data(); - block_len = (static_cast(value))->data.size(); - break; case CacheEntryRole::kFilterBlock: type = CacheDumpUnitType::kFilter; block_start = (static_cast(value)) @@ -163,6 +158,10 @@ CacheDumperImpl::DumpOneBlockCallBack() { block_start = (static_cast(value))->data(); block_len = (static_cast(value))->size(); break; + case CacheEntryRole::kDeprecatedFilterBlock: + // Obsolete + filter_out = true; + break; case CacheEntryRole::kMisc: filter_out = true; break; @@ -322,22 +321,6 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() { // according to the block type, get the helper callback function and create // the corresponding block switch (dump_unit.type) { - case CacheDumpUnitType::kDeprecatedFilterBlock: { - helper = BlocklikeTraits::GetCacheItemHelper( - BlockType::kDeprecatedFilter); - std::unique_ptr block_holder; - block_holder.reset(BlocklikeTraits::Create( - std::move(raw_block_contents), 0, statistics, false, - toptions_.filter_policy.get())); - // Insert the block to secondary cache. - // Note that, if we cannot get the correct helper callback, the block - // will not be inserted. - if (helper != nullptr) { - s = secondary_cache_->Insert(dump_unit.key, - (void*)(block_holder.get()), helper); - } - break; - } case CacheDumpUnitType::kFilter: { helper = BlocklikeTraits::GetCacheItemHelper( BlockType::kFilter); @@ -390,6 +373,9 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() { } case CacheDumpUnitType::kFooter: break; + case CacheDumpUnitType::kDeprecatedFilterBlock: + // Obsolete + break; default: continue; } diff --git a/utilities/cache_dump_load_impl.h b/utilities/cache_dump_load_impl.h index 6377a858f..ad8cd045a 100644 --- a/utilities/cache_dump_load_impl.h +++ b/utilities/cache_dump_load_impl.h @@ -36,7 +36,7 @@ enum CacheDumpUnitType : unsigned char { kHashIndexMetadata = 9, kMetaIndex = 10, kIndex = 11, - kDeprecatedFilterBlock = 12, + kDeprecatedFilterBlock = 12, // OBSOLETE / DEPRECATED kFilterMetaBlock = 13, kBlockTypeMax, };