Remove deprecated block-based filter (#10184)

Summary:
In https://github.com/facebook/rocksdb/issues/9535, release 7.0, we hid the old block-based filter from being created using
the public API, because of its inefficiency. Although we normally maintain read compatibility
on old DBs forever, filters are not required for reading a DB, only for optimizing read
performance. Thus, it should be acceptable to remove this code and the substantial
maintenance burden it carries as useful features are developed and validated (such
as user timestamp).

This change completely removes the code for reading and writing the old block-based
filters, net removing about 1370 lines of code no longer needed. Options removed from
testing / benchmarking tools. The prior existence is only evident in a couple of places:
* `CacheEntryRole::kDeprecatedFilterBlock` - We can update this public API enum in
a major release to minimize source code incompatibilities.
* A warning is logged when an old table file is opened that used the old block-based
filter. This is provided as a courtesy, and would be a pain to unit test, so manual testing
should suffice. Unfortunately, sst_dump does not tell you whether a file uses
block-based filter, and the structure of the code makes it very difficult to fix.
* To detect that case, `kObsoleteFilterBlockPrefix` (renamed from `kFilterBlockPrefix`)
for metaindex is maintained (for now).

Other notes:
* In some cases where numbers are associated with filter configurations, we have had to
update the assigned numbers so that they all correspond to something that exists.
* Fixed potential stat counting bug by assuming `filter_checked = false` for cases
like `filter == nullptr` rather than assuming `filter_checked = true`
* Removed obsolete `block_offset` and `prefix_extractor` parameters from several
functions.
* Removed some unnecessary checks `if (!table_prefix_extractor() && !prefix_extractor)`
because the caller guarantees the prefix extractor exists and is compatible

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10184

Test Plan:
tests updated, manually test new warning in LOG using base version to
generate a DB

Reviewed By: riversand963

Differential Revision: D37212647

Pulled By: pdillinger

fbshipit-source-id: 06ee020d8de3b81260ffc36ad0c1202cbf463a80
main
Peter Dillinger 3 years ago committed by Facebook GitHub Bot
parent a6691d0f65
commit 126c223714
  1. 2
      CMakeLists.txt
  2. 1
      HISTORY.md
  3. 3
      Makefile
  4. 8
      TARGETS
  5. 11
      db/db_block_cache_test.cc
  6. 101
      db/db_bloom_filter_test.cc
  7. 1
      db_stress_tool/db_stress_common.h
  8. 4
      db_stress_tool/db_stress_gflags.cc
  9. 11
      db_stress_tool/db_stress_test_base.cc
  10. 2
      include/rocksdb/cache.h
  11. 12
      options/options_test.cc
  12. 2
      src.mk
  13. 358
      table/block_based/block_based_filter_block.cc
  14. 127
      table/block_based/block_based_filter_block.h
  15. 219
      table/block_based/block_based_filter_block_test.cc
  16. 35
      table/block_based/block_based_table_builder.cc
  17. 144
      table/block_based/block_based_table_reader.cc
  18. 4
      table/block_based/block_based_table_reader.h
  19. 2
      table/block_based/block_based_table_reader_sync_and_async.h
  20. 7
      table/block_based/block_like_traits.h
  21. 1
      table/block_based/block_type.h
  22. 38
      table/block_based/filter_block.h
  23. 5
      table/block_based/filter_block_reader_common.cc
  24. 91
      table/block_based/filter_policy.cc
  25. 32
      table/block_based/filter_policy_internal.h
  26. 33
      table/block_based/full_filter_block.cc
  27. 24
      table/block_based/full_filter_block.h
  28. 60
      table/block_based/full_filter_block_test.cc
  29. 1
      table/block_based/mock_block_based_table.h
  30. 65
      table/block_based/partitioned_filter_block.cc
  31. 33
      table/block_based/partitioned_filter_block.h
  32. 43
      table/block_based/partitioned_filter_block_test.cc
  33. 1
      table/block_fetcher.cc
  34. 17
      table/table_test.cc
  35. 16
      tools/db_bench_tool.cc
  36. 6
      tools/db_crashtest.py
  37. 201
      util/bloom_test.cc
  38. 16
      util/filter_bench.cc
  39. 28
      utilities/cache_dump_load_impl.cc
  40. 2
      utilities/cache_dump_load_impl.h

@ -738,7 +738,6 @@ set(SOURCES
table/adaptive/adaptive_table_factory.cc
table/block_based/binary_search_index_reader.cc
table/block_based/block.cc
table/block_based/block_based_filter_block.cc
table/block_based/block_based_table_builder.cc
table/block_based/block_based_table_factory.cc
table/block_based/block_based_table_iterator.cc
@ -1321,7 +1320,6 @@ if(WITH_TESTS)
options/customizable_test.cc
options/options_settable_test.cc
options/options_test.cc
table/block_based/block_based_filter_block_test.cc
table/block_based/block_based_table_reader_test.cc
table/block_based/block_test.cc
table/block_based/data_block_hash_index_test.cc

@ -37,6 +37,7 @@
### Behavior changes
* DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984)
* Removed support for reading Bloom filters using obsolete block-based filter format. (Support for writing such filters was dropped in 7.0.) For good read performance on old DBs using these filters, a full compaction is required.
## 7.3.0 (05/20/2022)
### Bug Fixes

@ -1594,9 +1594,6 @@ random_access_file_reader_test: $(OBJ_DIR)/file/random_access_file_reader_test.o
file_reader_writer_test: $(OBJ_DIR)/util/file_reader_writer_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
block_based_filter_block_test: $(OBJ_DIR)/table/block_based/block_based_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
block_based_table_reader_test: table/block_based/block_based_table_reader_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)

@ -165,7 +165,6 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"table/adaptive/adaptive_table_factory.cc",
"table/block_based/binary_search_index_reader.cc",
"table/block_based/block.cc",
"table/block_based/block_based_filter_block.cc",
"table/block_based/block_based_table_builder.cc",
"table/block_based/block_based_table_factory.cc",
"table/block_based/block_based_table_iterator.cc",
@ -494,7 +493,6 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"table/adaptive/adaptive_table_factory.cc",
"table/block_based/binary_search_index_reader.cc",
"table/block_based/block.cc",
"table/block_based/block_based_filter_block.cc",
"table/block_based/block_based_table_builder.cc",
"table/block_based/block_based_table_factory.cc",
"table/block_based/block_based_table_iterator.cc",
@ -4800,12 +4798,6 @@ cpp_unittest_wrapper(name="blob_garbage_meter_test",
extra_compiler_flags=[])
cpp_unittest_wrapper(name="block_based_filter_block_test",
srcs=["table/block_based/block_based_filter_block_test.cc"],
deps=[":rocksdb_test_lib"],
extra_compiler_flags=[])
cpp_unittest_wrapper(name="block_based_table_reader_test",
srcs=["table/block_based/block_based_table_reader_test.cc"],
deps=[":rocksdb_test_lib"],

@ -670,7 +670,7 @@ class DBBlockCacheTest1 : public DBTestBase,
};
INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1,
::testing::Values(1, 2, 3));
::testing::Values(1, 2));
TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
Options options = CurrentOptions();
@ -686,13 +686,10 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
table_options.partition_filters = true;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
break;
case 2: // block-based filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
break;
case 3: // full filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
case 2: // full filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
break;
default:
assert(false);

@ -37,8 +37,6 @@ std::shared_ptr<const FilterPolicy> Create(double bits_per_key,
return BloomLikeFilterPolicy::Create(name, bits_per_key);
}
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
const std::string kDeprecatedBlock =
DeprecatedBlockBasedBloomFilterPolicy::kClassName();
const std::string kFastLocalBloom =
test::FastLocalBloomFilterPolicy::kClassName();
const std::string kStandard128Ribbon =
@ -189,7 +187,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) {
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.filter_policy.reset(NewBloomFilterPolicy(10));
if (partition_filters) {
bbto.partition_filters = true;
bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
@ -267,7 +265,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.filter_policy.reset(NewBloomFilterPolicy(10));
if (partition_filters) {
bbto.partition_filters = true;
bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
@ -331,7 +329,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
get_perf_context()->EnablePerLevelPerfContext();
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.filter_policy.reset(NewBloomFilterPolicy(10));
bbto.whole_key_filtering = false;
if (partition_filters) {
bbto.partition_filters = true;
@ -747,7 +745,6 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values(
std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion)));
@ -755,7 +752,6 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam,
::testing::Values(
std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion)));
@ -763,7 +759,6 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(
std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion),
std::make_tuple(kAutoBloom, true, kLatestFormatVersion),
std::make_tuple(kAutoBloom, false, kLatestFormatVersion),
std::make_tuple(kAutoRibbon, false, kLatestFormatVersion)));
@ -829,8 +824,6 @@ std::shared_ptr<const FilterPolicy> kCompatibilityRibbonPolicy{
NewRibbonFilterPolicy(20, -1)};
std::vector<CompatibilityConfig> kCompatibilityConfigs = {
{Create(20, kDeprecatedBlock), false,
BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U},
@ -878,9 +871,8 @@ TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive
// Filter negative, with high probability
ASSERT_EQ("NOT_FOUND", Get(prefix + "Q"));
// FULL_POSITIVE does not include block-based filter case (j == 0)
EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE),
j == 0 ? 0 : 2);
2);
EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
}
}
@ -904,7 +896,7 @@ class ChargeFilterConstructionTestWithParam
detect_filter_construct_corruption_(std::get<3>(GetParam())) {
if (charge_filter_construction_ ==
CacheEntryRoleOptions::Decision::kDisabled ||
policy_ == kDeprecatedBlock || policy_ == kLegacyBloom) {
policy_ == kLegacyBloom) {
// For these cases, we only interested in whether filter construction
// cache charging happens instead of its accuracy. Therefore we don't
// need many keys.
@ -1031,8 +1023,6 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
kStandard128Ribbon, true, true),
std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
kDeprecatedBlock, false, false),
std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom,
false, false)));
@ -1109,11 +1099,10 @@ TEST_P(ChargeFilterConstructionTestWithParam, Basic) {
return;
}
if (policy == kDeprecatedBlock || policy == kLegacyBloom) {
if (policy == kLegacyBloom) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0)
<< "There shouldn't be filter construction cache charging as this "
"feature does not support kDeprecatedBlock "
"nor kLegacyBloom";
"feature does not support kLegacyBloom";
return;
}
@ -1798,10 +1787,9 @@ class SliceTransformLimitedDomain : public SliceTransform {
}
};
TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) {
TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) {
BlockBasedTableOptions bbto;
// Full Filter Block
bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false));
bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
bbto.whole_key_filtering = false;
Options options = CurrentOptions();
@ -1827,10 +1815,9 @@ TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) {
ASSERT_EQ(Get("zzzzz_AAAA"), "val5");
}
TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) {
TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) {
BlockBasedTableOptions bbto;
// Block Filter Block
bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true));
bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
Options options = CurrentOptions();
options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>();
@ -2221,7 +2208,6 @@ class BloomStatsTestWithParam
} else {
BlockBasedTableOptions table_options;
if (partition_filters_) {
assert(bfp_impl_ != kDeprecatedBlock);
table_options.partition_filters = partition_filters_;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
@ -2346,9 +2332,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value3, iter->value().ToString());
// The seek doesn't check block-based bloom filter because last index key
// starts with the same prefix we're seeking to.
uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2;
uint64_t expected_hits = 2;
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
iter->Seek(key2);
@ -2360,8 +2344,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
INSTANTIATE_TEST_CASE_P(
BloomStatsTestWithParam, BloomStatsTestWithParam,
::testing::Values(std::make_tuple(kDeprecatedBlock, false),
std::make_tuple(kLegacyBloom, false),
::testing::Values(std::make_tuple(kLegacyBloom, false),
std::make_tuple(kLegacyBloom, true),
std::make_tuple(kFastLocalBloom, false),
std::make_tuple(kFastLocalBloom, true),
@ -2486,7 +2469,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
options.level_compaction_dynamic_level_bytes = true;
BlockBasedTableOptions bbto;
bbto.cache_index_and_filter_blocks = true;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
bbto.filter_policy.reset(NewBloomFilterPolicy(10));
bbto.whole_key_filtering = true;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.optimize_filters_for_hits = true;
@ -2674,7 +2657,6 @@ int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
// as the upper bound and two keys are adjacent according to the comparator.
TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options;
options.create_if_missing = true;
options.env = CurrentOptions().env;
@ -2726,8 +2708,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abcdxx00"), 4);
// should check bloom filter since upper bound meets requirement
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
}
{
@ -2740,8 +2721,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abcdxx01"), 4);
// should skip bloom filter since upper bound is too long
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
}
{
@ -2754,8 +2734,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
ASSERT_EQ(CountIter(iter, "abcdxx02"), 4);
// should check bloom filter since upper bound matches transformed seek
// key
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
}
{
@ -2768,8 +2747,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0);
// should skip bloom filter since mismatch is found
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
}
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}}));
@ -2782,8 +2760,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
read_options.iterate_upper_bound = &upper_bound;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abc"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
}
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}}));
@ -2795,8 +2772,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
read_options.iterate_upper_bound = &upper_bound;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "abc"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
3 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
}
}
@ -2806,7 +2782,6 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// verify iterators can read all SST files using the latest config.
TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true;
@ -2840,11 +2815,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
read_options.iterate_upper_bound = &upper_bound;
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter, "foo"), 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
1 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
ASSERT_EQ(CountIter(iter, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
1 + using_full_builder);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
// second SST with capped:3 BF
@ -2857,14 +2830,12 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// BF is cappped:3 now
std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter_tmp, "foo"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
2 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0);
ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
// both counters are incremented because BF is "not changed" for 1 of the
// 2 SST files, so filter is checked once and found no match.
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
3 + using_full_builder * 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
}
@ -2882,25 +2853,21 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
std::unique_ptr<Iterator> iter_tmp(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter_tmp, "foo"), 9);
// the first and last BF are checked
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
4 + using_full_builder * 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1);
ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0);
// only last BF is checked and not found
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
5 + using_full_builder * 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
}
// iter_old can only see the first SST, so checked plus 1
ASSERT_EQ(CountIter(iter_old, "foo"), 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
6 + using_full_builder * 3);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9);
// iter was created after the first setoptions call so only full filter
// will check the filter
ASSERT_EQ(CountIter(iter, "foo"), 2);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
6 + using_full_builder * 4);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10);
{
// keys in all three SSTs are visible to iterator
@ -2908,12 +2875,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// so +2 for checked counter
std::unique_ptr<Iterator> iter_all(db_->NewIterator(read_options));
ASSERT_EQ(CountIter(iter_all, "foo"), 9);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
7 + using_full_builder * 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2);
ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
8 + using_full_builder * 5);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
}
ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}}));
@ -2924,12 +2889,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
ASSERT_EQ(CountIter(iter_all, "foo"), 6);
// all three SST are checked because the current options has the same as
// the remaining SST (capped:3)
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
9 + using_full_builder * 7);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3);
ASSERT_EQ(CountIter(iter_all, "gpk"), 0);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED),
10 + using_full_builder * 7);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17);
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4);
}
// TODO(Zhongyi): Maybe also need to add Get calls to test point look up?

@ -152,7 +152,6 @@ DECLARE_double(experimental_mempurge_threshold);
DECLARE_bool(enable_write_thread_adaptive_yield);
DECLARE_int32(reopen);
DECLARE_double(bloom_bits);
DECLARE_bool(use_block_based_filter);
DECLARE_int32(ribbon_starting_level);
DECLARE_bool(partition_filters);
DECLARE_bool(optimize_filters_for_memory);

@ -469,10 +469,6 @@ DEFINE_double(bloom_bits, 10,
"Bloom filter bits per key. "
"Negative means use default settings.");
DEFINE_bool(use_block_based_filter, false,
"use block based filter"
"instead of full filter for block based table");
DEFINE_int32(
ribbon_starting_level, 999,
"Use Bloom filter on levels below specified and Ribbon beginning on level "

@ -36,16 +36,7 @@ std::shared_ptr<const FilterPolicy> CreateFilterPolicy() {
return BlockBasedTableOptions().filter_policy;
}
const FilterPolicy* new_policy;
if (FLAGS_use_block_based_filter) {
if (FLAGS_ribbon_starting_level < 999) {
fprintf(
stderr,
"Cannot combine use_block_based_filter and ribbon_starting_level\n");
exit(1);
} else {
new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true);
}
} else if (FLAGS_ribbon_starting_level >= 999) {
if (FLAGS_ribbon_starting_level >= 999) {
// Use Bloom API
new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false);
} else {

@ -553,7 +553,7 @@ enum class CacheEntryRole {
kFilterBlock,
// Block-based table metadata block for partitioned filter
kFilterMetaBlock,
// Block-based table deprecated filter block (old "block-based" filter)
// OBSOLETE / DEPRECATED: old/removed block-based filter
kDeprecatedFilterBlock,
// Block-based table index block
kIndexBlock,

@ -999,21 +999,11 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
// Back door way of enabling deprecated block-based Bloom
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4",
&new_opt));
auto builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:4");
// Test configuring using other internal names
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt));
builtin =
auto builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3");

@ -156,7 +156,6 @@ LIB_SOURCES = \
table/adaptive/adaptive_table_factory.cc \
table/block_based/binary_search_index_reader.cc \
table/block_based/block.cc \
table/block_based/block_based_filter_block.cc \
table/block_based/block_based_table_builder.cc \
table/block_based/block_based_table_factory.cc \
table/block_based/block_based_table_iterator.cc \
@ -525,7 +524,6 @@ TEST_MAIN_SOURCES = \
options/customizable_test.cc \
options/options_settable_test.cc \
options/options_test.cc \
table/block_based/block_based_filter_block_test.cc \
table/block_based/block_based_table_reader_test.cc \
table/block_based/block_test.cc \
table/block_based/data_block_hash_index_test.cc \

@ -1,358 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based/block_based_filter_block.h"
#include <algorithm>
#include "db/dbformat.h"
#include "monitoring/perf_context_imp.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "util/cast_util.h"
#include "util/coding.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
namespace {
void AppendItem(std::string* props, const std::string& key,
const std::string& value) {
char cspace = ' ';
std::string value_str("");
size_t i = 0;
const size_t dataLength = 64;
const size_t tabLength = 2;
const size_t offLength = 16;
value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
i += dataLength;
while (i < value.size()) {
value_str.append("\n");
value_str.append(offLength, cspace);
value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
i += dataLength;
}
std::string result("");
if (key.size() < (offLength - tabLength))
result.append(size_t((offLength - tabLength)) - key.size(), cspace);
result.append(key);
props->append(result + ": " + value_str + "\n");
}
template <class TKey>
void AppendItem(std::string* props, const TKey& key, const std::string& value) {
std::string key_str = std::to_string(key);
AppendItem(props, key_str, value);
}
} // namespace
// See doc/table_format.txt for an explanation of the filter block format.
// Generate new filter every 2KB of data
static const size_t kFilterBaseLg = 11;
static const size_t kFilterBase = 1 << kFilterBaseLg;
BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt, int bits_per_key)
: prefix_extractor_(prefix_extractor),
whole_key_filtering_(table_opt.whole_key_filtering),
bits_per_key_(bits_per_key),
prev_prefix_start_(0),
prev_prefix_size_(0),
total_added_in_built_(0) {}
void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
uint64_t filter_index = (block_offset / kFilterBase);
assert(filter_index >= filter_offsets_.size());
while (filter_index > filter_offsets_.size()) {
GenerateFilter();
}
}
size_t BlockBasedFilterBlockBuilder::EstimateEntriesAdded() {
return total_added_in_built_ + start_.size();
}
void BlockBasedFilterBlockBuilder::Add(const Slice& key_without_ts) {
if (prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts)) {
AddPrefix(key_without_ts);
}
if (whole_key_filtering_) {
AddKey(key_without_ts);
}
}
// Add key to filter if needed
inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
start_.push_back(entries_.size());
entries_.append(key.data(), key.size());
}
// Add prefix to filter if needed
inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
// get slice for most recently added entry
Slice prev;
if (prev_prefix_size_ > 0) {
prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
}
Slice prefix = prefix_extractor_->Transform(key);
// insert prefix only when it's different from the previous prefix.
if (prev.size() == 0 || prefix != prev) {
prev_prefix_start_ = entries_.size();
prev_prefix_size_ = prefix.size();
AddKey(prefix);
}
}
Slice BlockBasedFilterBlockBuilder::Finish(
const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* /* filter_data */) {
// In this impl we ignore BlockHandle and filter_data
*status = Status::OK();
if (!start_.empty()) {
GenerateFilter();
}
// Append array of per-filter offsets
const uint32_t array_offset = static_cast<uint32_t>(result_.size());
for (size_t i = 0; i < filter_offsets_.size(); i++) {
PutFixed32(&result_, filter_offsets_[i]);
}
PutFixed32(&result_, array_offset);
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
return Slice(result_);
}
void BlockBasedFilterBlockBuilder::GenerateFilter() {
const size_t num_entries = start_.size();
if (num_entries == 0) {
// Fast path if there are no keys for this filter
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
return;
}
total_added_in_built_ += num_entries;
// Make list of keys from flattened key structure
start_.push_back(entries_.size()); // Simplify length computation
tmp_entries_.resize(num_entries);
for (size_t i = 0; i < num_entries; i++) {
const char* base = entries_.data() + start_[i];
size_t length = start_[i + 1] - start_[i];
tmp_entries_[i] = Slice(base, length);
}
// Generate filter for current set of keys and append to result_.
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
tmp_entries_.data(), static_cast<int>(num_entries), bits_per_key_,
&result_);
tmp_entries_.clear();
entries_.clear();
start_.clear();
prev_prefix_start_ = 0;
prev_prefix_size_ = 0;
}
BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
const BlockBasedTable* t, CachableEntry<BlockContents>&& filter_block)
: FilterBlockReaderCommon(t, std::move(filter_block)) {
assert(table());
assert(table()->get_rep());
assert(table()->get_rep()->filter_policy);
}
std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create(
const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context) {
assert(table);
assert(table->get_rep());
assert(!pin || prefetch);
CachableEntry<BlockContents> filter_block;
if (prefetch || !use_cache) {
const Status s = ReadFilterBlock(
table, prefetch_buffer, ro, use_cache, nullptr /* get_context */,
lookup_context, &filter_block, BlockType::kDeprecatedFilter);
if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s);
return std::unique_ptr<FilterBlockReader>();
}
if (use_cache && !pin) {
filter_block.Reset();
}
}
return std::unique_ptr<FilterBlockReader>(
new BlockBasedFilterBlockReader(table, std::move(filter_block)));
}
bool BlockBasedFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool no_io,
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
BlockCacheLookupContext* lookup_context) {
assert(block_offset != kNotValid);
if (!whole_key_filtering()) {
return true;
}
return MayMatch(key, block_offset, no_io, get_context, lookup_context);
}
bool BlockBasedFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool no_io,
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
BlockCacheLookupContext* lookup_context) {
assert(block_offset != kNotValid);
return MayMatch(prefix, block_offset, no_io, get_context, lookup_context);
}
bool BlockBasedFilterBlockReader::ParseFieldsFromBlock(
const BlockContents& contents, const char** data, const char** offset,
size_t* num, size_t* base_lg) {
assert(data);
assert(offset);
assert(num);
assert(base_lg);
const size_t n = contents.data.size();
if (n < 5) { // 1 byte for base_lg and 4 for start of offset array
return false;
}
const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5);
if (last_word > n - 5) {
return false;
}
*data = contents.data.data();
*offset = (*data) + last_word;
*num = (n - 5 - last_word) / 4;
*base_lg = contents.data[n - 1];
return true;
}
bool BlockBasedFilterBlockReader::MayMatch(
const Slice& entry, uint64_t block_offset, bool no_io,
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
CachableEntry<BlockContents> filter_block;
const Status s =
GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block,
BlockType::kDeprecatedFilter);
if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s);
return true;
}
assert(filter_block.GetValue());
const char* data = nullptr;
const char* offset = nullptr;
size_t num = 0;
size_t base_lg = 0;
if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num,
&base_lg)) {
return true; // Errors are treated as potential matches
}
const uint64_t index = block_offset >> base_lg;
if (index < num) {
const uint32_t start = DecodeFixed32(offset + index * 4);
const uint32_t limit = DecodeFixed32(offset + index * 4 + 4);
if (start <= limit && limit <= (uint32_t)(offset - data)) {
const Slice filter = Slice(data + start, limit - start);
assert(table());
assert(table()->get_rep());
const bool may_match =
DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter);
if (may_match) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
} else if (start == limit) {
// Empty filters do not match any entries
return false;
}
}
return true; // Errors are treated as potential matches
}
size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
size_t usage = ApproximateFilterBlockMemoryUsage();
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
usage += malloc_usable_size(const_cast<BlockBasedFilterBlockReader*>(this));
#else
usage += sizeof(*this);
#endif // ROCKSDB_MALLOC_USABLE_SIZE
return usage;
}
std::string BlockBasedFilterBlockReader::ToString() const {
CachableEntry<BlockContents> filter_block;
const Status s =
GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */,
nullptr /* lookup_context */, &filter_block,
BlockType::kDeprecatedFilter);
if (!s.ok()) {
IGNORE_STATUS_IF_ERROR(s);
return std::string("Unable to retrieve filter block");
}
assert(filter_block.GetValue());
const char* data = nullptr;
const char* offset = nullptr;
size_t num = 0;
size_t base_lg = 0;
if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num,
&base_lg)) {
return std::string("Error parsing filter block");
}
std::string result;
result.reserve(1024);
std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
AppendItem(&result, s_fb, std::to_string(num));
AppendItem(&result, s_bo, s_hd);
for (size_t index = 0; index < num; index++) {
uint32_t start = DecodeFixed32(offset + index * 4);
uint32_t limit = DecodeFixed32(offset + index * 4 + 4);
if (start != limit) {
result.append(" filter block # " + std::to_string(index + 1) + "\n");
Slice filter = Slice(data + start, limit - start);
AppendItem(&result, start, filter.ToString(true));
}
}
return result;
}
} // namespace ROCKSDB_NAMESPACE

@ -1,127 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// A filter block is stored near the end of a Table file. It contains
// filters (e.g., bloom filters) for all data blocks in the table combined
// into a single filter block.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "table/block_based/filter_block_reader_common.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/format.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE {
// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a
// particular Table. It generates a single string which is stored as
// a special block in the Table.
//
// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp:
// (StartBlock Add*)* Finish
class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
public:
BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt,
int bits_per_key);
// No copying allowed
BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&) = delete;
void operator=(const BlockBasedFilterBlockBuilder&) = delete;
virtual bool IsBlockBased() override { return true; }
virtual void StartBlock(uint64_t block_offset) override;
virtual void Add(const Slice& key_without_ts) override;
virtual bool IsEmpty() const override {
return start_.empty() && filter_offsets_.empty();
}
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish;
private:
void AddKey(const Slice& key);
void AddPrefix(const Slice& key);
void GenerateFilter();
// important: all of these might point to invalid addresses
// at the time of destruction of this filter block. destructor
// should NOT dereference them.
const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
int bits_per_key_;
size_t prev_prefix_start_; // the position of the last appended prefix
// to "entries_".
size_t prev_prefix_size_; // the length of the last appended prefix to
// "entries_".
std::string entries_; // Flattened entry contents
std::vector<size_t> start_; // Starting index in entries_ of each entry
std::string result_; // Filter data computed so far
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
std::vector<uint32_t> filter_offsets_;
uint64_t total_added_in_built_; // Total keys added to filters built so far
};
// A FilterBlockReader is used to parse filter from SST table.
// KeyMayMatch and PrefixMayMatch would trigger filter checking
class BlockBasedFilterBlockReader
: public FilterBlockReaderCommon<BlockContents> {
public:
BlockBasedFilterBlockReader(const BlockBasedTable* t,
CachableEntry<BlockContents>&& filter_block);
// No copying allowed
BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&) = delete;
void operator=(const BlockBasedFilterBlockReader&) = delete;
static std::unique_ptr<FilterBlockReader> Create(
const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return true; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
size_t ApproximateMemoryUsage() const override;
// convert this object to a human readable form
std::string ToString() const override;
private:
static bool ParseFieldsFromBlock(const BlockContents& contents,
const char** data, const char** offset,
size_t* num, size_t* base_lg);
bool MayMatch(const Slice& entry, uint64_t block_offset, bool no_io,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
};
} // namespace ROCKSDB_NAMESPACE

@ -1,219 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based/block_based_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/mock_block_based_table.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
#include "util/hash.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
// Test for block based filter block
// use new interface in FilterPolicy to create filter builder/reader
class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester,
public testing::Test {
public:
BlockBasedFilterBlockTest()
: mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, true)) {}
};
TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) {
FilterBlockBuilder* builder =
new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10);
Slice slice(builder->Finish());
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice));
CachableEntry<BlockContents> block(
new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */,
true /* own_value */);
FilterBlockReader* reader =
new BlockBasedFilterBlockReader(table_.get(), std::move(block));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
delete builder;
delete reader;
}
TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) {
FilterBlockBuilder* builder =
new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10);
builder->StartBlock(100);
builder->Add("foo");
builder->Add("bar");
builder->Add("box");
builder->StartBlock(200);
builder->Add("box");
builder->StartBlock(300);
builder->Add("hello");
Slice slice(builder->Finish());
CachableEntry<BlockContents> block(
new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */,
true /* own_value */);
FilterBlockReader* reader =
new BlockBasedFilterBlockReader(table_.get(), std::move(block));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
delete builder;
delete reader;
}
TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) {
FilterBlockBuilder* builder =
new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10);
// First filter
builder->StartBlock(0);
builder->Add("foo");
builder->StartBlock(2000);
builder->Add("bar");
// Second filter
builder->StartBlock(3100);
builder->Add("box");
// Third filter is empty
// Last filter
builder->StartBlock(9000);
builder->Add("box");
builder->Add("hello");
Slice slice(builder->Finish());
CachableEntry<BlockContents> block(
new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */,
true /* own_value */);
FilterBlockReader* reader =
new BlockBasedFilterBlockReader(table_.get(), std::move(block));
// Check first filter
ASSERT_TRUE(reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0},
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
// Check second filter
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
// Check third filter (empty)
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
// Check last filter
ASSERT_TRUE(reader->KeyMayMatch(
"box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(
"hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader->KeyMayMatch(
"bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
delete builder;
delete reader;
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

@ -37,7 +37,6 @@
#include "rocksdb/table.h"
#include "rocksdb/types.h"
#include "table/block_based/block.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h"
@ -80,17 +79,6 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
if (filter_bits_builder == nullptr) {
return nullptr;
} else {
// Check for backdoor deprecated block-based bloom config
size_t starting_est = filter_bits_builder->EstimateEntriesAdded();
constexpr auto kSecretStart =
DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart;
if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) {
int bits_per_key = static_cast<int>(starting_est - kSecretStart);
delete filter_bits_builder;
return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(),
table_opt, bits_per_key);
}
// END check for backdoor deprecated block-based bloom config
if (table_opt.partition_filters) {
assert(p_index_builder != nullptr);
// Since after partition cut request from filter builder it takes time
@ -904,10 +892,6 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
rep_ = new Rep(sanitized_table_options, tbo, file);
if (rep_->filter_builder != nullptr) {
rep_->filter_builder->StartBlock(0);
}
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey",
const_cast<TableProperties*>(&rep_->props));
@ -1094,9 +1078,6 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
WriteRawBlock(block_contents, type, handle, block_type, &raw_block_contents);
r->compressed_output.clear();
if (is_data_block) {
if (r->filter_builder != nullptr) {
r->filter_builder->StartBlock(r->get_offset());
}
r->props.data_size = r->get_offset();
++r->props.num_data_blocks;
}
@ -1385,9 +1366,6 @@ void BlockBasedTableBuilder::BGWorkWriteRawBlock() {
break;
}
if (r->filter_builder != nullptr) {
r->filter_builder->StartBlock(r->get_offset());
}
r->props.data_size = r->get_offset();
++r->props.num_data_blocks;
@ -1496,9 +1474,6 @@ Status BlockBasedTableBuilder::InsertBlockInCacheHelper(
case BlockType::kFilterPartitionIndex:
s = InsertBlockInCache<Block>(block_contents, handle, block_type);
break;
case BlockType::kDeprecatedFilter:
s = InsertBlockInCache<BlockContents>(block_contents, handle, block_type);
break;
case BlockType::kFilter:
s = InsertBlockInCache<ParsedFullFilterBlock>(block_contents, handle,
block_type);
@ -1568,7 +1543,6 @@ void BlockBasedTableBuilder::WriteFilterBlock(
return;
}
BlockHandle filter_block_handle;
bool is_block_based_filter = rep_->filter_builder->IsBlockBased();
bool is_partitioned_filter = rep_->table_options.partition_filters;
if (ok()) {
rep_->props.num_filter_entries +=
@ -1595,8 +1569,7 @@ void BlockBasedTableBuilder::WriteFilterBlock(
rep_->props.filter_size += filter_content.size();
BlockType btype = is_block_based_filter ? BlockType::kDeprecatedFilter
: is_partitioned_filter && /* last */ s.ok()
BlockType btype = is_partitioned_filter && /* last */ s.ok()
? BlockType::kFilterPartitionIndex
: BlockType::kFilter;
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, btype,
@ -1608,9 +1581,7 @@ void BlockBasedTableBuilder::WriteFilterBlock(
// Add mapping from "<filter_block_prefix>.Name" to location
// of filter data.
std::string key;
key = is_block_based_filter ? BlockBasedTable::kFilterBlockPrefix
: is_partitioned_filter
? BlockBasedTable::kPartitionedFilterBlockPrefix
key = is_partitioned_filter ? BlockBasedTable::kPartitionedFilterBlockPrefix
: BlockBasedTable::kFullFilterBlockPrefix;
key.append(rep_->table_options.filter_policy->CompatibilityName());
meta_index_builder->Add(key, filter_block_handle);
@ -2114,7 +2085,7 @@ const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const {
}
}
const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
const std::string BlockBasedTable::kObsoleteFilterBlockPrefix = "filter.";
const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter.";
const std::string BlockBasedTable::kPartitionedFilterBlockPrefix =
"partitionedfilter.";

@ -46,7 +46,6 @@
#include "rocksdb/trace_record.h"
#include "table/block_based/binary_search_index_reader.h"
#include "table/block_based/block.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/block_based/block_based_table_iterator.h"
#include "table/block_based/block_like_traits.h"
@ -196,7 +195,6 @@ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
switch (block_type) {
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
PERF_COUNTER_ADD(block_cache_filter_hit_count, 1);
if (get_context) {
@ -255,7 +253,6 @@ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
switch (block_type) {
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
if (get_context) {
++get_context->get_context_stats_.num_cache_filter_miss;
} else {
@ -312,7 +309,6 @@ void BlockBasedTable::UpdateCacheInsertionMetrics(
switch (block_type) {
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
if (get_context) {
++get_context->get_context_stats_.num_cache_filter_add;
if (redundant) {
@ -954,7 +950,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
{std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix),
std::make_pair(Rep::FilterType::kPartitionedFilter,
kPartitionedFilterBlockPrefix),
std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) {
std::make_pair(Rep::FilterType::kNoFilter,
kObsoleteFilterBlockPrefix)}) {
if (builtin_compatible) {
// This code is only here to deal with a hiccup in early 7.0.x where
// there was an unintentional name change in the SST files metadata.
@ -967,7 +964,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
test::LegacyBloomFilterPolicy::kClassName(),
test::FastLocalBloomFilterPolicy::kClassName(),
test::Standard128RibbonFilterPolicy::kClassName(),
DeprecatedBlockBasedBloomFilterPolicy::kClassName(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter",
BloomFilterPolicy::kClassName(),
RibbonFilterPolicy::kClassName(),
};
@ -985,6 +982,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
Status s = rep_->filter_handle.DecodeFrom(&v);
if (s.ok()) {
rep_->filter_type = filter_type;
if (filter_type == Rep::FilterType::kNoFilter) {
ROCKS_LOG_WARN(rep_->ioptions.logger,
"Detected obsolete filter type in %s. Read "
"performance might suffer until DB is fully "
"re-compacted.",
rep_->file->file_name().c_str());
}
break;
}
}
@ -995,6 +999,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
.ok()) {
rep_->filter_type = filter_type;
if (filter_type == Rep::FilterType::kNoFilter) {
ROCKS_LOG_WARN(
rep_->ioptions.logger,
"Detected obsolete filter type in %s. Read performance might "
"suffer until DB is fully re-compacted.",
rep_->file->file_name().c_str());
}
break;
}
}
@ -1459,10 +1470,6 @@ std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader(
return PartitionedFilterBlockReader::Create(
this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
case Rep::FilterType::kBlockFilter:
return BlockBasedFilterBlockReader::Create(
this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
case Rep::FilterType::kFullFilter:
return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache,
prefetch, pin, lookup_context);
@ -1608,7 +1615,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
break;
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
++get_context->get_context_stats_.num_filter_read;
break;
default:
@ -1649,7 +1655,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
break;
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
trace_block_type = TraceType::kBlockTraceFilterBlock;
break;
case BlockType::kCompressionDictionary:
@ -1765,7 +1770,6 @@ Status BlockBasedTable::RetrieveBlock(
break;
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
++(get_context->get_context_stats_.num_filter_read);
break;
default:
@ -1889,75 +1893,16 @@ bool BlockBasedTable::PrefixMayMatch(
bool may_match = true;
// First, try check with full filter
FilterBlockReader* const filter = rep_->filter.get();
bool filter_checked = true;
bool filter_checked = false;
if (filter != nullptr) {
const bool no_io = read_options.read_tier == kBlockCacheTier;
if (!filter->IsBlockBased()) {
const Slice* const const_ikey_ptr = &internal_key;
may_match = filter->RangeMayExist(
read_options.iterate_upper_bound, user_key_without_ts,
prefix_extractor, rep_->internal_comparator.user_comparator(),
const_ikey_ptr, &filter_checked, need_upper_bound_check, no_io,
lookup_context);
} else {
// if prefix_extractor changed for block based filter, skip filter
if (need_upper_bound_check) {
return true;
}
auto prefix = prefix_extractor->Transform(user_key_without_ts);
InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue);
auto internal_prefix = internal_key_prefix.Encode();
// To prevent any io operation in this method, we set `read_tier` to make
// sure we always read index or filter only when they have already been
// loaded to memory.
ReadOptions no_io_read_options;
no_io_read_options.read_tier = kBlockCacheTier;
// Then, try find it within each block
// we already know prefix_extractor and prefix_extractor_name must match
// because `CheckPrefixMayMatch` first checks `check_filter_ == true`
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter(NewIndexIterator(
no_io_read_options,
/*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
/*get_context=*/nullptr, lookup_context));
iiter->Seek(internal_prefix);
if (!iiter->Valid()) {
// we're past end of file
// if it's incomplete, it means that we avoided I/O
// and we're not really sure that we're past the end
// of the file
may_match = iiter->status().IsIncomplete();
} else if ((rep_->index_key_includes_seq ? ExtractUserKey(iiter->key())
: iiter->key())
.starts_with(ExtractUserKey(internal_prefix))) {
// we need to check for this subtle case because our only
// guarantee is that "the key is a string >= last key in that data
// block" according to the doc/table_format.txt spec.
//
// Suppose iiter->key() starts with the desired prefix; it is not
// necessarily the case that the corresponding data block will
// contain the prefix, since iiter->key() need not be in the
// block. However, the next data block may contain the prefix, so
// we return true to play it safe.
may_match = true;
} else if (filter->IsBlockBased()) {
// iiter->key() does NOT start with the desired prefix. Because
// Seek() finds the first key that is >= the seek target, this
// means that iiter->key() > prefix. Thus, any data blocks coming
// after the data block corresponding to iiter->key() cannot
// possibly contain the key. Thus, the corresponding data block
// is the only on could potentially contain the prefix.
BlockHandle handle = iiter->value().handle;
may_match = filter->PrefixMayMatch(
prefix, prefix_extractor, handle.offset(), no_io,
/*const_key_ptr=*/nullptr, /*get_context=*/nullptr, lookup_context);
}
}
read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor,
rep_->internal_comparator.user_comparator(), const_ikey_ptr,
&filter_checked, need_upper_bound_check, no_io, lookup_context);
}
if (filter_checked) {
@ -2030,7 +1975,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
const SliceTransform* prefix_extractor, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) {
if (filter == nullptr) {
return true;
}
Slice user_key = ExtractUserKey(internal_key);
@ -2039,15 +1984,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
if (rep_->whole_key_filtering) {
may_match =
filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
no_io, const_ikey_ptr, get_context, lookup_context);
may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr,
get_context, lookup_context);
} else if (!PrefixExtractorChanged(prefix_extractor) &&
prefix_extractor->InDomain(user_key_without_ts) &&
!filter->PrefixMayMatch(
prefix_extractor->Transform(user_key_without_ts),
prefix_extractor, kNotValid, no_io, const_ikey_ptr,
get_context, lookup_context)) {
prefix_extractor->Transform(user_key_without_ts), no_io,
const_ikey_ptr, get_context, lookup_context)) {
// FIXME ^^^: there should be no reason for Get() to depend on current
// prefix_extractor at all. It should always use table_prefix_extractor.
may_match = false;
@ -2063,14 +2006,13 @@ void BlockBasedTable::FullFilterKeysMayMatch(
FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const {
if (filter == nullptr || filter->IsBlockBased()) {
if (filter == nullptr) {
return;
}
uint64_t before_keys = range->KeysLeft();
assert(before_keys > 0); // Caller should ensure
if (rep_->whole_key_filtering) {
filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io,
lookup_context);
filter->KeysMayMatch(range, no_io, lookup_context);
uint64_t after_keys = range->KeysLeft();
if (after_keys) {
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
@ -2086,8 +2028,7 @@ void BlockBasedTable::FullFilterKeysMayMatch(
} else if (!PrefixExtractorChanged(prefix_extractor)) {
// FIXME ^^^: there should be no reason for MultiGet() to depend on current
// prefix_extractor at all. It should always use table_prefix_extractor.
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
lookup_context);
filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context);
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
uint64_t after_keys = range->KeysLeft();
uint64_t filtered_keys = before_keys - after_keys;
@ -2152,22 +2093,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
IndexValue v = iiter->value();
bool not_exist_in_filter =
filter != nullptr && filter->IsBlockBased() == true &&
!filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz),
prefix_extractor, v.handle.offset(), no_io,
/*const_ikey_ptr=*/nullptr, get_context,
&lookup_context);
if (not_exist_in_filter) {
// Not found
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
break;
}
if (!v.first_internal_key.empty() && !skip_filters &&
UserComparatorWrapper(rep_->internal_comparator.user_comparator())
.CompareWithoutTimestamp(
@ -2272,7 +2197,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
break;
}
}
if (matched && filter != nullptr && !filter->IsBlockBased()) {
if (matched && filter != nullptr) {
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
rep_->level);
@ -2425,10 +2350,6 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
const Slice& meta_block_name) {
if (meta_block_name.starts_with(kFilterBlockPrefix)) {
return BlockType::kDeprecatedFilter;
}
if (meta_block_name.starts_with(kFullFilterBlockPrefix)) {
return BlockType::kFilter;
}
@ -2457,6 +2378,11 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
return BlockType::kHashIndexMetadata;
}
if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) {
// Obsolete but possible in old files
return BlockType::kInvalid;
}
assert(false);
return BlockType::kInvalid;
}

@ -38,7 +38,6 @@ namespace ROCKSDB_NAMESPACE {
class Cache;
class FilterBlockReader;
class BlockBasedFilterBlockReader;
class FullFilterBlockReader;
class Footer;
class InternalKeyComparator;
@ -67,7 +66,7 @@ using KVPairBlock = std::vector<std::pair<std::string, std::string>>;
// loaded blocks in the memory.
class BlockBasedTable : public TableReader {
public:
static const std::string kFilterBlockPrefix;
static const std::string kObsoleteFilterBlockPrefix;
static const std::string kFullFilterBlockPrefix;
static const std::string kPartitionedFilterBlockPrefix;
@ -585,7 +584,6 @@ struct BlockBasedTable::Rep {
enum class FilterType {
kNoFilter,
kFullFilter,
kBlockFilter,
kPartitionedFilter,
};
FilterType filter_type;

@ -729,7 +729,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
iiter->Next();
} while (iiter->Valid());
if (matched && filter != nullptr && !filter->IsBlockBased()) {
if (matched && filter != nullptr) {
RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE);
PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
rep_->level);

@ -72,16 +72,11 @@ class BlocklikeTraits<BlockContents> {
return Status::OK();
}
static Cache::CacheItemHelper* GetCacheItemHelper(BlockType block_type) {
if (block_type == BlockType::kDeprecatedFilter) {
return GetCacheItemHelperForRole<
BlockContents, CacheEntryRole::kDeprecatedFilterBlock>();
} else {
static Cache::CacheItemHelper* GetCacheItemHelper(BlockType /*block_type*/) {
// E.g. compressed cache
return GetCacheItemHelperForRole<BlockContents,
CacheEntryRole::kOtherBlock>();
}
}
};
template <>

@ -20,7 +20,6 @@ enum class BlockType : uint8_t {
kData,
kFilter, // for second level partitioned filters and full filters
kFilterPartitionIndex, // for top-level index of filter partitions
kDeprecatedFilter, // for old, deprecated block-based filter
kProperties,
kCompressionDictionary,
kRangeDeletion,

@ -10,11 +10,6 @@
// A filter block is stored near the end of a Table file. It contains
// filters (e.g., bloom filters) for all data blocks in the table combined
// into a single filter block.
//
// It is a base class for BlockBasedFilter and FullFilter.
// These two are both used in BlockBasedTable. The first one contain filter
// For a part of keys in sst file, the second contain filter for all keys
// in sst file.
#pragma once
@ -44,12 +39,10 @@ using MultiGetRange = MultiGetContext::Range;
// A FilterBlockBuilder is used to construct all of the filters for a
// particular Table. It generates a single string which is stored as
// a special block in the Table.
// a special block in the Table, or partitioned into smaller filters.
//
// The sequence of calls to FilterBlockBuilder must match the regexp:
// (StartBlock Add*)* Finish
//
// BlockBased/Full FilterBlock would be called in the same way.
// Add* Finish
class FilterBlockBuilder {
public:
explicit FilterBlockBuilder() {}
@ -59,8 +52,6 @@ class FilterBlockBuilder {
virtual ~FilterBlockBuilder() {}
virtual bool IsBlockBased() = 0; // If is blockbased filter
virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter
virtual void Add(
const Slice& key_without_ts) = 0; // Add a key to current filter
virtual bool IsEmpty() const = 0; // Empty == none added
@ -108,8 +99,6 @@ class FilterBlockReader {
FilterBlockReader(const FilterBlockReader&) = delete;
FilterBlockReader& operator=(const FilterBlockReader&) = delete;
virtual bool IsBlockBased() = 0; // If is blockbased filter
/**
* If no_io is set, then it returns true if it cannot answer the query without
* reading data from disk. This is used in PartitionedFilterBlockReader to
@ -120,23 +109,19 @@ class FilterBlockReader {
* built upon InternalKey and must be provided via const_ikey_ptr when running
* queries.
*/
virtual bool KeyMayMatch(const Slice& key,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
virtual bool KeyMayMatch(const Slice& key, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) = 0;
virtual void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
virtual void KeysMayMatch(MultiGetRange* range, const bool no_io,
BlockCacheLookupContext* lookup_context) {
for (auto iter = range->begin(); iter != range->end(); ++iter) {
const Slice ukey_without_ts = iter->ukey_without_ts;
const Slice ikey = iter->ikey;
GetContext* const get_context = iter->get_context;
if (!KeyMayMatch(ukey_without_ts, prefix_extractor, block_offset, no_io,
&ikey, get_context, lookup_context)) {
if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context,
lookup_context)) {
range->SkipKey(iter);
}
}
@ -145,25 +130,22 @@ class FilterBlockReader {
/**
* no_io and const_ikey_ptr here means the same as in KeyMayMatch
*/
virtual bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
virtual bool PrefixMayMatch(const Slice& prefix, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) = 0;
virtual void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const bool no_io,
BlockCacheLookupContext* lookup_context) {
for (auto iter = range->begin(); iter != range->end(); ++iter) {
const Slice ukey_without_ts = iter->ukey_without_ts;
const Slice ikey = iter->ikey;
GetContext* const get_context = iter->get_context;
if (prefix_extractor->InDomain(ukey_without_ts) &&
!PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts),
prefix_extractor, block_offset, no_io, &ikey,
get_context, lookup_context)) {
!PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io,
&ikey, get_context, lookup_context)) {
range->SkipKey(iter);
}
}

@ -112,9 +112,8 @@ bool FilterBlockReaderCommon<TBlocklike>::RangeMayExist(
return true;
} else {
*filter_checked = true;
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io,
const_ikey_ptr, /* get_context */ nullptr,
lookup_context);
return PrefixMayMatch(prefix, no_io, const_ikey_ptr,
/* get_context */ nullptr, lookup_context);
}
}

@ -24,7 +24,6 @@
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h"
#include "rocksdb/utilities/object_registry.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/block_based/full_filter_block.h"
@ -1375,87 +1374,10 @@ const char* ReadOnlyBuiltinFilterPolicy::kClassName() {
return kBuiltinFilterMetadataName;
}
const char* DeprecatedBlockBasedBloomFilterPolicy::kClassName() {
return "rocksdb.internal.DeprecatedBlockBasedBloomFilter";
}
std::string BloomLikeFilterPolicy::GetId() const {
return Name() + GetBitsPerKeySuffix();
}
DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy(
double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext&) const {
if (GetWholeBitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded()
struct B : public FilterBitsBuilder {
explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {}
size_t est;
size_t EstimateEntriesAdded() override { return est; }
void AddKey(const Slice&) override {}
using FilterBitsBuilder::Finish; // FIXME
Slice Finish(std::unique_ptr<const char[]>*) override { return Slice(); }
size_t ApproximateNumEntries(size_t) override { return 0; }
};
return new B(GetWholeBitsPerKey());
}
void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys,
int n,
int bits_per_key,
std::string* dst) {
// Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * bits_per_key);
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
uint32_t bytes = (bits + 7) / 8;
bits = bytes * 8;
int num_probes = LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key);
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes)); // Remember # of probes
char* array = &(*dst)[init_size];
for (int i = 0; i < n; i++) {
LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes,
array);
}
}
bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(
const Slice& key, const Slice& bloom_filter) {
const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) {
return false;
}
const char* array = bloom_filter.data();
const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
const int k = static_cast<uint8_t>(array[len - 1]);
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
// NB: using stored k not num_probes for whole_bits_per_key_
return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k,
array);
}
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
@ -1876,9 +1798,6 @@ std::shared_ptr<const FilterPolicy> BloomLikeFilterPolicy::Create(
return std::make_shared<test::FastLocalBloomFilterPolicy>(bits_per_key);
} else if (name == test::Standard128RibbonFilterPolicy::kClassName()) {
return std::make_shared<test::Standard128RibbonFilterPolicy>(bits_per_key);
} else if (name == DeprecatedBlockBasedBloomFilterPolicy::kClassName()) {
return std::make_shared<DeprecatedBlockBasedBloomFilterPolicy>(
bits_per_key);
} else if (name == BloomFilterPolicy::kClassName()) {
// For testing
return std::make_shared<BloomFilterPolicy>(bits_per_key);
@ -1996,15 +1915,6 @@ static int RegisterBuiltinFilterPolicies(ObjectLibrary& library,
uri));
return guard->get();
});
library.AddFactory<const FilterPolicy>(
FilterPatternEntryWithBits(
DeprecatedBlockBasedBloomFilterPolicy::kClassName()),
[](const std::string& uri, std::unique_ptr<const FilterPolicy>* guard,
std::string* /* errmsg */) {
guard->reset(NewBuiltinFilterPolicyWithBits<
DeprecatedBlockBasedBloomFilterPolicy>(uri));
return guard->get();
});
size_t num_types;
return static_cast<int>(library.GetFactoryCount(&num_types));
}
@ -2055,7 +1965,6 @@ const std::vector<std::string>& BloomLikeFilterPolicy::GetAllFixedImpls() {
STATIC_AVOID_DESTRUCTION(std::vector<std::string>, impls){
// Match filter_bench -impl=x ordering
test::LegacyBloomFilterPolicy::kClassName(),
DeprecatedBlockBasedBloomFilterPolicy::kClassName(),
test::FastLocalBloomFilterPolicy::kClassName(),
test::Standard128RibbonFilterPolicy::kClassName(),
};

@ -131,7 +131,7 @@ class BuiltinFilterPolicy : public FilterPolicy {
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
// chosen for this BloomFilterPolicy.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
static const char* kClassName();
bool IsInstanceOf(const std::string& id) const override;
@ -151,12 +151,8 @@ class BuiltinFilterPolicy : public FilterPolicy {
// (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
protected:
// Deprecated block-based filter only (no longer in public API)
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const;
private:
// For Bloom filter implementation(s) (except deprecated block-based filter)
// For Bloom filter implementation(s)
static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents);
// For Ribbon filter implementation(s)
@ -300,30 +296,6 @@ class RibbonFilterPolicy : public BloomLikeFilterPolicy {
const int bloom_before_level_;
};
// Deprecated block-based filter only. We still support reading old
// block-based filters from any BuiltinFilterPolicy, but there is no public
// option to build them. However, this class is used to build them for testing
// and for a public backdoor to building them by constructing this policy from
// a string.
class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key);
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded(), using
// kSecretBitsPerKeyStart
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
static constexpr size_t kSecretBitsPerKeyStart = 1234567890U;
static const char* kClassName();
const char* Name() const override { return kClassName(); }
static void CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst);
static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter);
};
// For testing only, but always constructable with internal names
namespace test {

@ -125,14 +125,8 @@ FullFilterBlockReader::FullFilterBlockReader(
}
bool FullFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* /*prefix_extractor*/,
uint64_t block_offset, const bool no_io,
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
BlockCacheLookupContext* lookup_context) {
#ifdef NDEBUG
(void)block_offset;
#endif
assert(block_offset == kNotValid);
const Slice& key, const bool no_io, const Slice* const /*const_ikey_ptr*/,
GetContext* get_context, BlockCacheLookupContext* lookup_context) {
if (!whole_key_filtering()) {
return true;
}
@ -167,14 +161,9 @@ std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
}
bool FullFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* /* prefix_extractor */,
uint64_t block_offset, const bool no_io,
const Slice& prefix, const bool no_io,
const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
BlockCacheLookupContext* lookup_context) {
#ifdef NDEBUG
(void)block_offset;
#endif
assert(block_offset == kNotValid);
return MayMatch(prefix, no_io, get_context, lookup_context);
}
@ -204,17 +193,12 @@ bool FullFilterBlockReader::MayMatch(
return false;
}
}
return true; // remain the same with block_based filter
return true;
}
void FullFilterBlockReader::KeysMayMatch(
MultiGetRange* range, const SliceTransform* /*prefix_extractor*/,
uint64_t block_offset, const bool no_io,
MultiGetRange* range, const bool no_io,
BlockCacheLookupContext* lookup_context) {
#ifdef NDEBUG
(void)block_offset;
#endif
assert(block_offset == kNotValid);
if (!whole_key_filtering()) {
// Simply return. Don't skip any key - consider all keys as likely to be
// present
@ -225,12 +209,7 @@ void FullFilterBlockReader::KeysMayMatch(
void FullFilterBlockReader::PrefixesMayMatch(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context) {
#ifdef NDEBUG
(void)block_offset;
#endif
assert(block_offset == kNotValid);
const bool no_io, BlockCacheLookupContext* lookup_context) {
MayMatch(range, no_io, prefix_extractor, lookup_context);
}

@ -49,8 +49,6 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
// directly. and be deleted here
~FullFilterBlockBuilder() {}
virtual bool IsBlockBased() override { return false; }
virtual void StartBlock(uint64_t /*block_offset*/) override {}
virtual void Add(const Slice& key_without_ts) override;
virtual bool IsEmpty() const override { return !any_added_; }
virtual size_t EstimateEntriesAdded() override;
@ -107,28 +105,28 @@ class FullFilterBlockReader
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
bool KeyMayMatch(const Slice& key, const bool no_io,
const Slice* const const_ikey_ptr, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
bool PrefixMayMatch(const Slice& prefix, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
void KeysMayMatch(MultiGetRange* range, const bool no_io,
BlockCacheLookupContext* lookup_context) override;
// Used in partitioned filter code
void KeysMayMatch2(MultiGetRange* range,
const SliceTransform* /*prefix_extractor*/,
const bool no_io,
BlockCacheLookupContext* lookup_context) {
KeysMayMatch(range, no_io, lookup_context);
}
void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const bool no_io,
BlockCacheLookupContext* lookup_context) override;
size_t ApproximateMemoryUsage() const override;
private:

@ -115,8 +115,7 @@ TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) {
FullFilterBlockReader reader(table_.get(), std::move(block));
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
@ -137,38 +136,33 @@ TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) {
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("bar",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("box",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("hello",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
ASSERT_TRUE(!reader.KeyMayMatch("missing",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
ASSERT_TRUE(!reader.KeyMayMatch("other",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
@ -191,8 +185,7 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
FullFilterBlockReader reader(table_.get(), std::move(block));
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
@ -292,38 +285,33 @@ TEST_F(FullFilterBlockTest, SingleChunk) {
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("bar",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("box",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("hello",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
ASSERT_TRUE(reader.KeyMayMatch("foo",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
ASSERT_TRUE(!reader.KeyMayMatch("missing",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
ASSERT_TRUE(!reader.KeyMayMatch("other",
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}

@ -7,7 +7,6 @@
#include <memory>
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/filter_policy_internal.h"

@ -216,58 +216,41 @@ std::unique_ptr<FilterBlockReader> PartitionedFilterBlockReader::Create(
}
bool PartitionedFilterBlockReader::KeyMayMatch(
const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr,
const Slice& key, const bool no_io, const Slice* const const_ikey_ptr,
GetContext* get_context, BlockCacheLookupContext* lookup_context) {
assert(const_ikey_ptr != nullptr);
assert(block_offset == kNotValid);
if (!whole_key_filtering()) {
return true;
}
return MayMatch(key, prefix_extractor, block_offset, no_io, const_ikey_ptr,
get_context, lookup_context,
return MayMatch(key, no_io, const_ikey_ptr, get_context, lookup_context,
&FullFilterBlockReader::KeyMayMatch);
}
void PartitionedFilterBlockReader::KeysMayMatch(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
MultiGetRange* range, const bool no_io,
BlockCacheLookupContext* lookup_context) {
assert(block_offset == kNotValid);
if (!whole_key_filtering()) {
return; // Any/all may match
}
MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context,
&FullFilterBlockReader::KeysMayMatch);
MayMatch(range, nullptr, no_io, lookup_context,
&FullFilterBlockReader::KeysMayMatch2);
}
bool PartitionedFilterBlockReader::PrefixMayMatch(
const Slice& prefix, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr,
const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr,
GetContext* get_context, BlockCacheLookupContext* lookup_context) {
assert(const_ikey_ptr != nullptr);
assert(block_offset == kNotValid);
if (!table_prefix_extractor() && !prefix_extractor) {
return true;
}
return MayMatch(prefix, prefix_extractor, block_offset, no_io, const_ikey_ptr,
get_context, lookup_context,
return MayMatch(prefix, no_io, const_ikey_ptr, get_context, lookup_context,
&FullFilterBlockReader::PrefixMayMatch);
}
void PartitionedFilterBlockReader::PrefixesMayMatch(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context) {
assert(block_offset == kNotValid);
if (!table_prefix_extractor() && !prefix_extractor) {
return; // Any/all may match
}
MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context,
const bool no_io, BlockCacheLookupContext* lookup_context) {
assert(prefix_extractor);
MayMatch(range, prefix_extractor, no_io, lookup_context,
&FullFilterBlockReader::PrefixesMayMatch);
}
@ -331,8 +314,7 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock(
}
bool PartitionedFilterBlockReader::MayMatch(
const Slice& slice, const SliceTransform* prefix_extractor,
uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr,
const Slice& slice, bool no_io, const Slice* const_ikey_ptr,
GetContext* get_context, BlockCacheLookupContext* lookup_context,
FilterFunction filter_function) const {
CachableEntry<Block> filter_block;
@ -364,14 +346,13 @@ bool PartitionedFilterBlockReader::MayMatch(
FullFilterBlockReader filter_partition(table(),
std::move(filter_partition_block));
return (filter_partition.*filter_function)(
slice, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context,
lookup_context);
return (filter_partition.*filter_function)(slice, no_io, const_ikey_ptr,
get_context, lookup_context);
}
void PartitionedFilterBlockReader::MayMatch(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, bool no_io, BlockCacheLookupContext* lookup_context,
MultiGetRange* range, const SliceTransform* prefix_extractor, bool no_io,
BlockCacheLookupContext* lookup_context,
FilterManyFunction filter_function) const {
CachableEntry<Block> filter_block;
Status s =
@ -399,9 +380,8 @@ void PartitionedFilterBlockReader::MayMatch(
if (!prev_filter_handle.IsNull() &&
this_filter_handle != prev_filter_handle) {
MultiGetRange subrange(*range, start_iter_same_handle, iter);
MayMatchPartition(&subrange, prefix_extractor, block_offset,
prev_filter_handle, no_io, lookup_context,
filter_function);
MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io,
lookup_context, filter_function);
range->AddSkipsFrom(subrange);
start_iter_same_handle = iter;
}
@ -416,16 +396,15 @@ void PartitionedFilterBlockReader::MayMatch(
}
if (!prev_filter_handle.IsNull()) {
MultiGetRange subrange(*range, start_iter_same_handle, range->end());
MayMatchPartition(&subrange, prefix_extractor, block_offset,
prev_filter_handle, no_io, lookup_context,
filter_function);
MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io,
lookup_context, filter_function);
range->AddSkipsFrom(subrange);
}
}
void PartitionedFilterBlockReader::MayMatchPartition(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, BlockHandle filter_handle, bool no_io,
BlockHandle filter_handle, bool no_io,
BlockCacheLookupContext* lookup_context,
FilterManyFunction filter_function) const {
CachableEntry<ParsedFullFilterBlock> filter_partition_block;
@ -439,8 +418,8 @@ void PartitionedFilterBlockReader::MayMatchPartition(
FullFilterBlockReader filter_partition(table(),
std::move(filter_partition_block));
(filter_partition.*filter_function)(range, prefix_extractor, block_offset,
no_io, lookup_context);
(filter_partition.*filter_function)(range, prefix_extractor, no_io,
lookup_context);
}
size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {

@ -109,25 +109,19 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon<Block> {
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
bool KeyMayMatch(const Slice& key, const bool no_io,
const Slice* const const_ikey_ptr, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
void KeysMayMatch(MultiGetRange* range, const bool no_io,
BlockCacheLookupContext* lookup_context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
bool PrefixMayMatch(const Slice& prefix, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const bool no_io,
BlockCacheLookupContext* lookup_context) override;
size_t ApproximateMemoryUsage() const override;
@ -142,27 +136,22 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon<Block> {
CachableEntry<ParsedFullFilterBlock>* filter_block) const;
using FilterFunction = bool (FullFilterBlockReader::*)(
const Slice& slice, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr, GetContext* get_context,
BlockCacheLookupContext* lookup_context);
bool MayMatch(const Slice& slice, const SliceTransform* prefix_extractor,
uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr,
const Slice& slice, const bool no_io, const Slice* const const_ikey_ptr,
GetContext* get_context, BlockCacheLookupContext* lookup_context);
bool MayMatch(const Slice& slice, bool no_io, const Slice* const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context,
FilterFunction filter_function) const;
using FilterManyFunction = void (FullFilterBlockReader::*)(
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context);
const bool no_io, BlockCacheLookupContext* lookup_context);
void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, bool no_io,
BlockCacheLookupContext* lookup_context,
bool no_io, BlockCacheLookupContext* lookup_context,
FilterManyFunction filter_function) const;
void MayMatchPartition(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, BlockHandle filter_handle,
bool no_io, BlockCacheLookupContext* lookup_context,
BlockHandle filter_handle, bool no_io,
BlockCacheLookupContext* lookup_context,
FilterManyFunction filter_function) const;
Status CacheDependencies(const ReadOptions& ro, bool pin) override;

@ -161,8 +161,7 @@ class PartitionedFilterBlockTest
}
void VerifyReader(PartitionedFilterBlockBuilder* builder,
PartitionedIndexBuilder* pib, bool empty = false,
const SliceTransform* prefix_extractor = nullptr) {
PartitionedIndexBuilder* pib, bool empty = false) {
std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder, pib));
// Querying added keys
@ -170,31 +169,31 @@ class PartitionedFilterBlockTest
for (auto key : keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
&ikey_slice, /*get_context=*/nullptr,
ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
{
// querying a key twice
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(
keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice,
/*get_context=*/nullptr, /*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(keys[0], !no_io, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
// querying missing keys
for (auto key : missing_keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
if (empty) {
ASSERT_TRUE(reader->KeyMayMatch(
key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
/*get_context=*/nullptr, /*lookup_context=*/nullptr));
ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
} else {
// assuming a good hash function
ASSERT_FALSE(reader->KeyMayMatch(
key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
/*get_context=*/nullptr, /*lookup_context=*/nullptr));
ASSERT_FALSE(reader->KeyMayMatch(key, !no_io, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
}
}
@ -343,9 +342,9 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
for (auto key : pkeys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(
prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
/*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
/*no_io=*/false, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
// Non-existent keys but with the same prefix
@ -353,9 +352,9 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
for (auto key : pnonkeys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(
prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
/*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
/*no_io=*/false, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
}
@ -391,9 +390,9 @@ TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) {
auto prefix = prefix_extractor->Transform(key);
auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(
prefix, prefix_extractor.get(), kNotValid,
/*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
ASSERT_TRUE(reader->PrefixMayMatch(prefix,
/*no_io=*/false, &ikey_slice,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
}

@ -285,7 +285,6 @@ IOStatus BlockFetcher::ReadBlockContents() {
switch (block_type_) {
case BlockType::kFilter:
case BlockType::kFilterPartitionIndex:
case BlockType::kDeprecatedFilter:
PERF_COUNTER_ADD(filter_block_read_count, 1);
break;

@ -3524,11 +3524,10 @@ TEST_P(BlockBasedTableTest, InvalidOptions) {
}
TEST_P(BlockBasedTableTest, BlockReadCountTest) {
// bloom_filter_type = 0 -- block-based filter (not available in public API)
// bloom_filter_type = 1 -- full filter using use_block_based_builder=false
// bloom_filter_type = 2 -- full filter using use_block_based_builder=true
// because of API change to hide block-based filter
for (int bloom_filter_type = 0; bloom_filter_type <= 2; ++bloom_filter_type) {
for (int bloom_filter_type = 1; bloom_filter_type <= 2; ++bloom_filter_type) {
for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
++index_and_filter_in_cache) {
Options options;
@ -3537,22 +3536,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) {
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
table_options.block_cache = NewLRUCache(1, 0);
table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
if (bloom_filter_type == 0) {
#ifndef ROCKSDB_LITE
// Use back-door way of enabling obsolete block-based Bloom
ASSERT_OK(FilterPolicy::CreateFromString(
ConfigOptions(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:10",
&table_options.filter_policy));
#else
// Skip this case in LITE build
continue;
#endif
} else {
// Public API
table_options.filter_policy.reset(
NewBloomFilterPolicy(10, bloom_filter_type == 2));
}
options.table_factory.reset(new BlockBasedTableFactory(table_options));
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;

@ -1605,9 +1605,6 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table.");
DEFINE_bool(use_hash_search, false, "if use kHashSearch "
"instead of kBinarySearch. "
"This is valid if only we use BlockTable");
DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter "
"instead of kFullFilter for filter block. "
"This is valid if only we use BlockTable");
DEFINE_string(merge_operator, "", "The merge operator to use with the database."
"If a new merge operator is specified, be sure to use fresh"
" database The possible merge operators are defined in"
@ -4525,19 +4522,6 @@ class Benchmark {
table_options->filter_policy = BlockBasedTableOptions().filter_policy;
} else if (FLAGS_bloom_bits == 0) {
table_options->filter_policy.reset();
} else if (FLAGS_use_block_based_filter) {
// Use back-door way of enabling obsolete block-based Bloom
Status s = FilterPolicy::CreateFromString(
ConfigOptions(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:" +
std::to_string(FLAGS_bloom_bits),
&table_options->filter_policy);
if (!s.ok()) {
fprintf(stderr,
"failure creating obsolete block-based filter: %s\n",
s.ToString().c_str());
exit(1);
}
} else {
table_options->filter_policy.reset(
FLAGS_use_ribbon_filter ? NewRibbonFilterPolicy(FLAGS_bloom_bits)

@ -119,7 +119,6 @@ default_params = {
"use_merge": lambda: random.randint(0, 1),
# 999 -> use Bloom API
"ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]),
"use_block_based_filter": lambda: random.randint(0, 1),
"value_size_mult": 32,
"verify_checksum": 1,
"write_buffer_size": 4 * 1024 * 1024,
@ -358,7 +357,6 @@ ts_params = {
"use_blob_db": 0,
"enable_compaction_filter": 0,
"ingest_external_file_one_in": 0,
"use_block_based_filter": 0,
}
multiops_txn_default_params = {
@ -497,10 +495,6 @@ def finalize_and_sanitize(src_params):
if dest_params["partition_filters"] == 1:
if dest_params["index_type"] != 2:
dest_params["partition_filters"] = 0
else:
dest_params["use_block_based_filter"] = 0
if dest_params["ribbon_starting_level"] < 999:
dest_params["use_block_based_filter"] = 0
if dest_params.get("atomic_flush", 0) == 1:
# disable pipelined write when atomic flush is used.
dest_params["enable_pipelined_write"] = 0

@ -69,207 +69,6 @@ static int NextLength(int length) {
return length;
}
class BlockBasedBloomTest : public testing::Test {
private:
std::unique_ptr<const DeprecatedBlockBasedBloomFilterPolicy> policy_;
std::string filter_;
std::vector<std::string> keys_;
public:
BlockBasedBloomTest() { ResetPolicy(); }
void Reset() {
keys_.clear();
filter_.clear();
}
void ResetPolicy(double bits_per_key) {
policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key));
Reset();
}
void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
void Add(const Slice& s) {
keys_.push_back(s.ToString());
}
void Build() {
std::vector<Slice> key_slices;
for (size_t i = 0; i < keys_.size(); i++) {
key_slices.push_back(Slice(keys_[i]));
}
filter_.clear();
DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
&key_slices[0], static_cast<int>(key_slices.size()),
policy_->GetWholeBitsPerKey(), &filter_);
keys_.clear();
if (kVerbose >= 2) DumpFilter();
}
size_t FilterSize() const {
return filter_.size();
}
Slice FilterData() const { return Slice(filter_); }
void DumpFilter() {
fprintf(stderr, "F(");
for (size_t i = 0; i+1 < filter_.size(); i++) {
const unsigned int c = static_cast<unsigned int>(filter_[i]);
for (int j = 0; j < 8; j++) {
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
}
}
fprintf(stderr, ")\n");
}
bool Matches(const Slice& s) {
if (!keys_.empty()) {
Build();
}
return DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(s, filter_);
}
double FalsePositiveRate() {
char buffer[sizeof(int)];
int result = 0;
for (int i = 0; i < 10000; i++) {
if (Matches(Key(i + 1000000000, buffer))) {
result++;
}
}
return result / 10000.0;
}
};
TEST_F(BlockBasedBloomTest, EmptyFilter) {
ASSERT_TRUE(! Matches("hello"));
ASSERT_TRUE(! Matches("world"));
}
TEST_F(BlockBasedBloomTest, Small) {
Add("hello");
Add("world");
ASSERT_TRUE(Matches("hello"));
ASSERT_TRUE(Matches("world"));
ASSERT_TRUE(! Matches("x"));
ASSERT_TRUE(! Matches("foo"));
}
TEST_F(BlockBasedBloomTest, VaryingLengths) {
char buffer[sizeof(int)];
// Count number of filters that significantly exceed the false positive rate
int mediocre_filters = 0;
int good_filters = 0;
for (int length = 1; length <= 10000; length = NextLength(length)) {
Reset();
for (int i = 0; i < length; i++) {
Add(Key(i, buffer));
}
Build();
ASSERT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + 40))
<< length;
// All added keys must match
for (int i = 0; i < length; i++) {
ASSERT_TRUE(Matches(Key(i, buffer)))
<< "Length " << length << "; key " << i;
}
// Check false positive rate
double rate = FalsePositiveRate();
if (kVerbose >= 1) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate*100.0, length, static_cast<int>(FilterSize()));
}
if (FLAGS_bits_per_key == 10) {
ASSERT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) {
mediocre_filters++; // Allowed, but not too often
} else {
good_filters++;
}
}
}
if (FLAGS_bits_per_key == 10 && kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n",
good_filters, mediocre_filters);
}
ASSERT_LE(mediocre_filters, good_filters/5);
}
// Ensure the implementation doesn't accidentally change in an
// incompatible way
TEST_F(BlockBasedBloomTest, Schema) {
char buffer[sizeof(int)];
ResetPolicy(8); // num_probes = 5
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
ResetPolicy(9); // num_probes = 6
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 969445585U);
ResetPolicy(11); // num_probes = 7
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 1694458207U);
ResetPolicy(10); // num_probes = 6
for (int key = 0; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
ResetPolicy(10);
for (int key = /*CHANGED*/ 1; key < 87; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 1908442116U);
ResetPolicy(10);
for (int key = 1; key < /*CHANGED*/ 88; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
// With new fractional bits_per_key, check that we are rounding to
// whole bits per key for old Bloom filters.
ResetPolicy(9.5); // Treated as 10
for (int key = 1; key < 88; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
ResetPolicy(10.499); // Treated as 10
for (int key = 1; key < 88; key++) {
Add(Key(key, buffer));
}
Build();
ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
ResetPolicy();
}
// Different bits-per-byte
class FullBloomTest : public testing::TestWithParam<std::string> {
protected:
BlockBasedTableOptions table_options_;

@ -84,8 +84,8 @@ DEFINE_bool(new_builder, false,
DEFINE_uint32(impl, 0,
"Select filter implementation. Without -use_plain_table_bloom:"
"0 = legacy full Bloom filter, 1 = block-based Bloom filter, "
"2 = format_version 5 Bloom filter, 3 = Ribbon128 filter. With "
"0 = legacy full Bloom filter, "
"1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With "
"-use_plain_table_bloom: 0 = no locality, 1 = locality.");
DEFINE_bool(net_includes_hashing, false,
@ -358,13 +358,9 @@ void FilterBench::Go() {
"-impl must currently be >= 0 and <= 1 for Plain table");
}
} else {
if (FLAGS_impl == 1) {
if (FLAGS_impl > 2) {
throw std::runtime_error(
"Block-based filter not currently supported by filter_bench");
}
if (FLAGS_impl > 3) {
throw std::runtime_error(
"-impl must currently be 0, 2, or 3 for Block-based table");
"-impl must currently be >= 0 and <= 2 for Block-based table");
}
}
@ -603,7 +599,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
auto dry_run_hash_fn = DryRunNoHash;
if (!FLAGS_net_includes_hashing) {
if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) {
if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) {
dry_run_hash_fn = DryRunHash32;
} else {
dry_run_hash_fn = DryRunHash64;
@ -728,8 +724,6 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
} else {
may_match = info.full_block_reader_->KeyMayMatch(
batch_slices[i],
/*prefix_extractor=*/nullptr,
/*block_offset=*/ROCKSDB_NAMESPACE::kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr);

@ -139,11 +139,6 @@ CacheDumperImpl::DumpOneBlockCallBack() {
block_start = (static_cast<Block*>(value))->data();
block_len = (static_cast<Block*>(value))->size();
break;
case CacheEntryRole::kDeprecatedFilterBlock:
type = CacheDumpUnitType::kDeprecatedFilterBlock;
block_start = (static_cast<BlockContents*>(value))->data.data();
block_len = (static_cast<BlockContents*>(value))->data.size();
break;
case CacheEntryRole::kFilterBlock:
type = CacheDumpUnitType::kFilter;
block_start = (static_cast<ParsedFullFilterBlock*>(value))
@ -163,6 +158,10 @@ CacheDumperImpl::DumpOneBlockCallBack() {
block_start = (static_cast<Block*>(value))->data();
block_len = (static_cast<Block*>(value))->size();
break;
case CacheEntryRole::kDeprecatedFilterBlock:
// Obsolete
filter_out = true;
break;
case CacheEntryRole::kMisc:
filter_out = true;
break;
@ -322,22 +321,6 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
// according to the block type, get the helper callback function and create
// the corresponding block
switch (dump_unit.type) {
case CacheDumpUnitType::kDeprecatedFilterBlock: {
helper = BlocklikeTraits<BlockContents>::GetCacheItemHelper(
BlockType::kDeprecatedFilter);
std::unique_ptr<BlockContents> block_holder;
block_holder.reset(BlocklikeTraits<BlockContents>::Create(
std::move(raw_block_contents), 0, statistics, false,
toptions_.filter_policy.get()));
// Insert the block to secondary cache.
// Note that, if we cannot get the correct helper callback, the block
// will not be inserted.
if (helper != nullptr) {
s = secondary_cache_->Insert(dump_unit.key,
(void*)(block_holder.get()), helper);
}
break;
}
case CacheDumpUnitType::kFilter: {
helper = BlocklikeTraits<ParsedFullFilterBlock>::GetCacheItemHelper(
BlockType::kFilter);
@ -390,6 +373,9 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
}
case CacheDumpUnitType::kFooter:
break;
case CacheDumpUnitType::kDeprecatedFilterBlock:
// Obsolete
break;
default:
continue;
}

@ -36,7 +36,7 @@ enum CacheDumpUnitType : unsigned char {
kHashIndexMetadata = 9,
kMetaIndex = 10,
kIndex = 11,
kDeprecatedFilterBlock = 12,
kDeprecatedFilterBlock = 12, // OBSOLETE / DEPRECATED
kFilterMetaBlock = 13,
kBlockTypeMax,
};

Loading…
Cancel
Save