Refactor FilterPolicies toward Customizable (#9567)

Summary:
Some changes to make it easier to make FilterPolicy
customizable. Especially, create distinct classes for the different
testing-only and user-facing built-in FilterPolicy modes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9567

Test Plan:
tests updated, with no intended difference in functionality
tested. No difference in test performance seen as a result of moving to
string-based filter type configuration.

Reviewed By: mrambacher

Differential Revision: D34234694

Pulled By: pdillinger

fbshipit-source-id: 8a94931a9e04c3bcca863a4f524cfd064aaf0122
main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent a0c569ee1d
commit 8c681087c7
  1. 234
      db/db_bloom_filter_test.cc
  2. 28
      microbench/ribbon_bench.cc
  3. 64
      options/options_test.cc
  4. 10
      table/block_based/block_based_filter_block.cc
  5. 3
      table/block_based/block_based_table_builder.cc
  6. 439
      table/block_based/filter_policy.cc
  7. 262
      table/block_based/filter_policy_internal.h
  8. 9
      table/block_based/mock_block_based_table.h
  9. 108
      util/bloom_test.cc
  10. 17
      util/filter_bench.cc

@ -10,6 +10,7 @@
#include <cstring>
#include <iomanip>
#include <sstream>
#include <string>
#include "cache/cache_entry_roles.h"
#include "cache/cache_reservation_manager.h"
@ -27,7 +28,17 @@
namespace ROCKSDB_NAMESPACE {
namespace {
using BFP = BloomFilterPolicy;
std::shared_ptr<const FilterPolicy> Create(double bits_per_key,
const std::string& name) {
return BloomLikeFilterPolicy::Create(name, bits_per_key);
}
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName();
const std::string kDeprecatedBlock =
DeprecatedBlockBasedBloomFilterPolicy::kName();
const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName();
const std::string kStandard128Ribbon =
test::Standard128RibbonFilterPolicy::kName();
const std::string kAutoBloom = BloomFilterPolicy::kName();
} // namespace
// DB tests related to bloom filter.
@ -38,12 +49,13 @@ class DBBloomFilterTest : public DBTestBase {
: DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {}
};
class DBBloomFilterTestWithParam : public DBTestBase,
public testing::WithParamInterface<
std::tuple<BFP::Mode, bool, uint32_t>> {
class DBBloomFilterTestWithParam
: public DBTestBase,
public testing::WithParamInterface<
std::tuple<std::string, bool, uint32_t>> {
// public testing::WithParamInterface<bool> {
protected:
BFP::Mode bfp_impl_;
std::string bfp_impl_;
bool partition_filters_;
uint32_t format_version_;
@ -90,7 +102,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
ReadOptions ropts;
std::string value;
anon::OptionsOverride options_override;
options_override.filter_policy.reset(new BFP(20, bfp_impl_));
options_override.filter_policy = Create(20, bfp_impl_);
options_override.partition_filters = partition_filters_;
options_override.metadata_block_size = 32;
Options options = CurrentOptions(options_override);
@ -477,7 +489,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
// trigger reset of table_factory
BlockBasedTableOptions table_options;
table_options.no_block_cache = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl_));
table_options.filter_policy = Create(10, bfp_impl_);
table_options.partition_filters = partition_filters_;
if (partition_filters_) {
table_options.index_type =
@ -573,11 +585,10 @@ class AlwaysTrueBitsBuilder : public FilterBitsBuilder {
size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; }
};
class AlwaysTrueFilterPolicy : public BloomFilterPolicy {
class AlwaysTrueFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit AlwaysTrueFilterPolicy(bool skip)
: BloomFilterPolicy(/* ignored */ 10, /* ignored */ BFP::kAutoBloom),
skip_(skip) {}
: BloomLikeFilterPolicy(/* ignored */ 10), skip_(skip) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override {
@ -588,6 +599,10 @@ class AlwaysTrueFilterPolicy : public BloomFilterPolicy {
}
}
std::string GetId() const override {
return "rocksdb.test.AlwaysTrueFilterPolicy";
}
private:
bool skip_;
};
@ -636,7 +651,7 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
// Test 1: bits per key < 0.5 means skip filters -> no filter
// constructed or read.
table_options.filter_policy.reset(new BFP(0.4, bfp_impl_));
table_options.filter_policy = Create(0.4, bfp_impl_);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
PutAndGetFn();
@ -724,25 +739,23 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) {
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false,
test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion)));
std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam,
::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false,
test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion)));
std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion),
std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion),
std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion)));
std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion),
std::make_tuple(kAutoBloom, true, kLatestFormatVersion),
std::make_tuple(kAutoBloom, false, kLatestFormatVersion)));
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
TEST_F(DBBloomFilterTest, BloomFilterRate) {
@ -941,7 +954,7 @@ using FilterConstructionReserveMemoryHash = uint64_t;
class DBFilterConstructionReserveMemoryTestWithParam
: public DBTestBase,
public testing::WithParamInterface<
std::tuple<bool, BloomFilterPolicy::Mode, bool, bool>> {
std::tuple<bool, std::string, bool, bool>> {
public:
DBFilterConstructionReserveMemoryTestWithParam()
: DBTestBase("db_bloom_filter_tests",
@ -951,9 +964,8 @@ class DBFilterConstructionReserveMemoryTestWithParam
policy_(std::get<1>(GetParam())),
partition_filters_(std::get<2>(GetParam())),
detect_filter_construct_corruption_(std::get<3>(GetParam())) {
if (!reserve_table_builder_memory_ ||
policy_ == BloomFilterPolicy::Mode::kDeprecatedBlock ||
policy_ == BloomFilterPolicy::Mode::kLegacyBloom) {
if (!reserve_table_builder_memory_ || policy_ == kDeprecatedBlock ||
policy_ == kLegacyBloom) {
// For these cases, we only interested in whether filter construction
// cache resevation happens instead of its accuracy. Therefore we don't
// need many keys.
@ -966,7 +978,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
// two partitions.
num_key_ = 18 * CacheReservationManager::GetDummyEntrySize() /
sizeof(FilterConstructionReserveMemoryHash);
} else if (policy_ == BloomFilterPolicy::Mode::kFastLocalBloom) {
} else if (policy_ == kFastLocalBloom) {
// For Bloom Filter + FullFilter case, since we design the num_key_ to
// make hash entry cache reservation be a multiple of dummy entries, the
// correct behavior of charging final filter on top of it will trigger at
@ -995,7 +1007,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024;
table_options.reserve_table_builder_memory = reserve_table_builder_memory_;
table_options.filter_policy.reset(new BloomFilterPolicy(10, policy_));
table_options.filter_policy = Create(10, policy_);
table_options.partition_filters = partition_filters_;
if (table_options.partition_filters) {
table_options.index_type =
@ -1023,7 +1035,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; }
BloomFilterPolicy::Mode GetFilterPolicy() { return policy_; }
std::string GetFilterPolicy() { return policy_; }
bool PartitionFilters() { return partition_filters_; }
@ -1035,7 +1047,7 @@ class DBFilterConstructionReserveMemoryTestWithParam
private:
std::size_t num_key_;
bool reserve_table_builder_memory_;
BloomFilterPolicy::Mode policy_;
std::string policy_;
bool partition_filters_;
std::shared_ptr<FilterConstructResPeakTrackingCache> cache_;
bool detect_filter_construct_corruption_;
@ -1043,32 +1055,20 @@ class DBFilterConstructionReserveMemoryTestWithParam
INSTANTIATE_TEST_CASE_P(
BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam,
::testing::Values(
std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false,
true),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true,
true),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
false, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
false, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, true,
true),
std::make_tuple(true, BloomFilterPolicy::Mode::kDeprecatedBlock, false,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kLegacyBloom, false,
false)));
::testing::Values(std::make_tuple(false, kFastLocalBloom, false, false),
std::make_tuple(true, kFastLocalBloom, false, false),
std::make_tuple(true, kFastLocalBloom, false, true),
std::make_tuple(true, kFastLocalBloom, true, false),
std::make_tuple(true, kFastLocalBloom, true, true),
std::make_tuple(true, kStandard128Ribbon, false, false),
std::make_tuple(true, kStandard128Ribbon, false, true),
std::make_tuple(true, kStandard128Ribbon, true, false),
std::make_tuple(true, kStandard128Ribbon, true, true),
std::make_tuple(true, kDeprecatedBlock, false, false),
std::make_tuple(true, kLegacyBloom, false, false)));
// TODO: Speed up this test.
// The current test inserts many keys (on the scale of dummy entry size)
@ -1126,7 +1126,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
ASSERT_OK(Flush());
bool reserve_table_builder_memory = ReserveTableBuilderMemory();
BloomFilterPolicy::Mode policy = GetFilterPolicy();
std::string policy = GetFilterPolicy();
bool partition_filters = PartitionFilters();
bool detect_filter_construct_corruption =
table_options.detect_filter_construct_corruption;
@ -1141,12 +1141,11 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
return;
}
if (policy == BloomFilterPolicy::Mode::kDeprecatedBlock ||
policy == BloomFilterPolicy::Mode::kLegacyBloom) {
if (policy == kDeprecatedBlock || policy == kLegacyBloom) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0)
<< "There shouldn't be filter construction cache reservation as this "
"feature does not support BloomFilterPolicy::Mode::kDeprecatedBlock "
"nor BloomFilterPolicy::Mode::kLegacyBloom";
"feature does not support kDeprecatedBlock "
"nor kLegacyBloom";
return;
}
@ -1162,17 +1161,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
const std::size_t predicted_hash_entries_cache_res_dummy_entry_num =
predicted_hash_entries_cache_res / kDummyEntrySize;
const std::size_t predicted_final_filter_cache_res =
static_cast<std::size_t>(std::ceil(
1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 *
(policy == BloomFilterPolicy::Mode::kStandard128Ribbon ? 0.7 : 1))) *
static_cast<std::size_t>(
std::ceil(1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 *
(policy == kStandard128Ribbon ? 0.7 : 1))) *
kDummyEntrySize;
const std::size_t predicted_banding_cache_res =
static_cast<std::size_t>(
std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) *
kDummyEntrySize;
if (policy == BloomFilterPolicy::Mode::kFastLocalBloom) {
/* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter
if (policy == kFastLocalBloom) {
/* kFastLocalBloom + FullFilter
* p0
* / \
* b / \
@ -1186,13 +1185,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* multiple of dummy entries so that reservation for (p0 - b)
* will trigger at least another dummy entry insertion.
*
* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter +
* kFastLocalBloom + FullFilter +
* detect_filter_construct_corruption
* The peak p0 stays the same as
* (BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter) but just lasts
* (kFastLocalBloom + FullFilter) but just lasts
* longer since we release hash entries reservation later.
*
* BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter
* kFastLocalBloom + PartitionedFilter
* p1
* / \
* p0 b'/ \
@ -1209,17 +1208,17 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* + parittioned final filter1 + parittioned final filter2
* = hash entries + final filter
*
* BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter +
* kFastLocalBloom + PartitionedFilter +
* detect_filter_construct_corruption
* The peak p0, p1 stay the same as
* (BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter) but just
* (kFastLocalBloom + PartitionedFilter) but just
* last longer since we release hash entries reservation later.
*
*/
if (!partition_filters) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
<< "Filter construction cache reservation should have only 1 peak in "
"case: BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter";
"case: kFastLocalBloom + FullFilter";
std::size_t filter_construction_cache_res_peak =
filter_construction_cache_res_peaks[0];
EXPECT_GT(filter_construction_cache_res_peak,
@ -1239,7 +1238,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
} else {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have multiple peaks "
"in case: BloomFilterPolicy::Mode::kFastLocalBloom + "
"in case: kFastLocalBloom + "
"PartitionedFilter";
std::size_t predicted_filter_construction_cache_res_increments_sum =
predicted_hash_entries_cache_res + predicted_final_filter_cache_res;
@ -1251,8 +1250,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
}
}
if (policy == BloomFilterPolicy::Mode::kStandard128Ribbon) {
/* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter
if (policy == kStandard128Ribbon) {
/* kStandard128Ribbon + FullFilter
* p0
* / \ p1
* / \/\
@ -1266,7 +1265,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* will trigger at least another dummy entry insertion
* (or equivelantly to saying, creating another peak).
*
* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter +
* kStandard128Ribbon + FullFilter +
* detect_filter_construct_corruption
*
* new p0
@ -1287,7 +1286,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* entries reserveration (like p0 - b' previously) until after final filter
* creation and post-verification
*
* BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter
* kStandard128Ribbon + PartitionedFilter
* p3
* p0 /\ p4
* / \ p1 / \ /\
@ -1306,7 +1305,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
* + parittioned final filter1 + parittioned final filter2
* = hash entries + banding + final filter
*
* BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter +
* kStandard128Ribbon + PartitionedFilter +
* detect_filter_construct_corruption
*
* new p3
@ -1347,7 +1346,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
if (!detect_filter_construct_corruption) {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have 2 peaks in "
"case: BloomFilterPolicy::Mode::kStandard128Ribbon + "
"case: kStandard128Ribbon + "
"FullFilter. "
"The second peak is resulted from charging the final filter "
"after "
@ -1366,7 +1365,7 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
} else {
EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
<< "Filter construction cache reservation should have 1 peaks in "
"case: BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter "
"case: kStandard128Ribbon + FullFilter "
"+ detect_filter_construct_corruption. "
"The previous second peak now disappears since we don't "
"decrease the hash entry reservation"
@ -1388,13 +1387,13 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 3)
<< "Filter construction cache reservation should have more than 3 "
"peaks "
"in case: BloomFilterPolicy::Mode::kStandard128Ribbon + "
"in case: kStandard128Ribbon + "
"PartitionedFilter";
} else {
EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
<< "Filter construction cache reservation should have more than 2 "
"peaks "
"in case: BloomFilterPolicy::Mode::kStandard128Ribbon + "
"in case: kStandard128Ribbon + "
"PartitionedFilter + detect_filter_construct_corruption";
}
std::size_t predicted_filter_construction_cache_res_increments_sum =
@ -1412,8 +1411,8 @@ TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
class DBFilterConstructionCorruptionTestWithParam
: public DBTestBase,
public testing::WithParamInterface<
std::tuple<bool /* detect_filter_construct_corruption */,
BloomFilterPolicy::Mode, bool /* partition_filters */>> {
std::tuple<bool /* detect_filter_construct_corruption */, std::string,
bool /* partition_filters */>> {
public:
DBFilterConstructionCorruptionTestWithParam()
: DBTestBase("db_bloom_filter_tests",
@ -1422,8 +1421,7 @@ class DBFilterConstructionCorruptionTestWithParam
BlockBasedTableOptions GetBlockBasedTableOptions() {
BlockBasedTableOptions table_options;
table_options.detect_filter_construct_corruption = std::get<0>(GetParam());
table_options.filter_policy.reset(
new BloomFilterPolicy(10, std::get<1>(GetParam())));
table_options.filter_policy = Create(10, std::get<1>(GetParam()));
table_options.partition_filters = std::get<2>(GetParam());
if (table_options.partition_filters) {
table_options.index_type =
@ -1444,14 +1442,11 @@ class DBFilterConstructionCorruptionTestWithParam
INSTANTIATE_TEST_CASE_P(
DBFilterConstructionCorruptionTestWithParam,
DBFilterConstructionCorruptionTestWithParam,
::testing::Values(
std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false),
std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
false),
std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon,
true)));
::testing::Values(std::make_tuple(false, kFastLocalBloom, false),
std::make_tuple(true, kFastLocalBloom, false),
std::make_tuple(true, kFastLocalBloom, true),
std::make_tuple(true, kStandard128Ribbon, false),
std::make_tuple(true, kStandard128Ribbon, true)));
TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) {
Options options = CurrentOptions();
@ -2139,16 +2134,12 @@ INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer,
#ifndef ROCKSDB_LITE
namespace {
namespace BFP2 {
// Extends BFP::Mode with option to use Plain table
using PseudoMode = int;
static constexpr PseudoMode kPlainTable = -1;
} // namespace BFP2
static const std::string kPlainTable = "test_PlainTableBloom";
} // namespace
class BloomStatsTestWithParam
: public DBBloomFilterTest,
public testing::WithParamInterface<std::tuple<BFP2::PseudoMode, bool>> {
public testing::WithParamInterface<std::tuple<std::string, bool>> {
public:
BloomStatsTestWithParam() {
bfp_impl_ = std::get<0>(GetParam());
@ -2159,7 +2150,7 @@ class BloomStatsTestWithParam
ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4));
options_.memtable_prefix_bloom_size_ratio =
8.0 * 1024.0 / static_cast<double>(options_.write_buffer_size);
if (bfp_impl_ == BFP2::kPlainTable) {
if (bfp_impl_ == kPlainTable) {
assert(!partition_filters_); // not supported in plain table
PlainTableOptions table_options;
options_.table_factory.reset(NewPlainTableFactory(table_options));
@ -2167,13 +2158,12 @@ class BloomStatsTestWithParam
BlockBasedTableOptions table_options;
table_options.hash_index_allow_collision = false;
if (partition_filters_) {
assert(bfp_impl_ != BFP::kDeprecatedBlock);
assert(bfp_impl_ != kDeprecatedBlock);
table_options.partition_filters = partition_filters_;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
}
table_options.filter_policy.reset(
new BFP(10, static_cast<BFP::Mode>(bfp_impl_)));
table_options.filter_policy = Create(10, bfp_impl_);
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
}
options_.env = env_;
@ -2191,7 +2181,7 @@ class BloomStatsTestWithParam
static void SetUpTestCase() {}
static void TearDownTestCase() {}
BFP2::PseudoMode bfp_impl_;
std::string bfp_impl_;
bool partition_filters_;
Options options_;
};
@ -2295,7 +2285,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_EQ(value3, iter->value().ToString());
// The seek doesn't check block-based bloom filter because last index key
// starts with the same prefix we're seeking to.
uint64_t expected_hits = bfp_impl_ == BFP::kDeprecatedBlock ? 1 : 2;
uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2;
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
iter->Seek(key2);
@ -2307,12 +2297,12 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
INSTANTIATE_TEST_CASE_P(
BloomStatsTestWithParam, BloomStatsTestWithParam,
::testing::Values(std::make_tuple(BFP::kDeprecatedBlock, false),
std::make_tuple(BFP::kLegacyBloom, false),
std::make_tuple(BFP::kLegacyBloom, true),
std::make_tuple(BFP::kFastLocalBloom, false),
std::make_tuple(BFP::kFastLocalBloom, true),
std::make_tuple(BFP2::kPlainTable, false)));
::testing::Values(std::make_tuple(kDeprecatedBlock, false),
std::make_tuple(kLegacyBloom, false),
std::make_tuple(kLegacyBloom, true),
std::make_tuple(kFastLocalBloom, false),
std::make_tuple(kFastLocalBloom, true),
std::make_tuple(kPlainTable, false)));
namespace {
void PrefixScanInit(DBBloomFilterTest* dbtest) {
@ -2620,8 +2610,8 @@ int CountIter(std::unique_ptr<Iterator>& iter, const Slice& key) {
// into the same string, or 2) the transformed seek key is of the same length
// as the upper bound and two keys are adjacent according to the comparator.
TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
for (auto bfp_impl : BFP::kAllFixedImpls) {
int using_full_builder = bfp_impl != BFP::kDeprecatedBlock;
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options;
options.create_if_missing = true;
options.env = CurrentOptions().env;
@ -2631,7 +2621,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// Enable prefix bloom for SST files
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl));
table_options.filter_policy = Create(10, bfp_impl);
table_options.index_shortening = BlockBasedTableOptions::
IndexShorteningMode::kShortenSeparatorsAndSuccessor;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -2752,8 +2742,8 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) {
// Create multiple SST files each with a different prefix_extractor config,
// verify iterators can read all SST files using the latest config.
TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
for (auto bfp_impl : BFP::kAllFixedImpls) {
int using_full_builder = bfp_impl != BFP::kDeprecatedBlock;
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
int using_full_builder = bfp_impl != kDeprecatedBlock;
Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true;
@ -2762,7 +2752,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
options.statistics = CreateDBStatistics();
// Enable prefix bloom for SST files
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(new BFP(10, bfp_impl));
table_options.filter_policy = Create(10, bfp_impl);
table_options.cache_index_and_filter_blocks = true;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
@ -2888,7 +2878,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
// as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
int iteration = 0;
for (auto bfp_impl : BFP::kAllFixedImpls) {
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
@ -2897,7 +2887,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Enable prefix bloom for SST files
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl));
table_options.filter_policy = Create(10, bfp_impl);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options);
ReadOptions read_options;
@ -2944,7 +2934,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) {
// Verify it's possible to change prefix_extractor at runtime and iterators
// behaves as expected
TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
for (auto bfp_impl : BFP::kAllFixedImpls) {
for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) {
Options options;
options.env = CurrentOptions().env;
options.create_if_missing = true;
@ -2954,7 +2944,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) {
// Enable prefix bloom for SST files
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(new BFP(10, bfp_impl));
table_options.filter_policy = Create(10, bfp_impl);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);

@ -47,30 +47,28 @@ struct KeyMaker {
};
// benchmark arguments:
// 0. filter mode
// 0. filter impl (like filter_bench -impl)
// 1. filter config bits_per_key
// 2. average data key length
// 3. data entry number
static void CustomArguments(benchmark::internal::Benchmark *b) {
for (int filter_mode :
{BloomFilterPolicy::kLegacyBloom, BloomFilterPolicy::kFastLocalBloom,
BloomFilterPolicy::kStandard128Ribbon}) {
for (int filter_impl : {0, 2, 3}) {
for (int bits_per_key : {10, 20}) {
for (int key_len_avg : {10, 100}) {
for (int64_t entry_num : {1 << 10, 1 << 20}) {
b->Args({filter_mode, bits_per_key, key_len_avg, entry_num});
b->Args({filter_impl, bits_per_key, key_len_avg, entry_num});
}
}
}
}
b->ArgNames({"filter_mode", "bits_per_key", "key_len_avg", "entry_num"});
b->ArgNames({"filter_impl", "bits_per_key", "key_len_avg", "entry_num"});
}
static void FilterBuild(benchmark::State &state) {
// setup data
auto filter = new BloomFilterPolicy(
static_cast<double>(state.range(1)),
static_cast<BloomFilterPolicy::Mode>(state.range(0)));
auto filter = BloomLikeFilterPolicy::Create(
BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner;
@ -91,9 +89,9 @@ BENCHMARK(FilterBuild)->Apply(CustomArguments);
static void FilterQueryPositive(benchmark::State &state) {
// setup data
auto filter = new BloomFilterPolicy(
static_cast<double>(state.range(1)),
static_cast<BloomFilterPolicy::Mode>(state.range(0)));
auto filter = BloomLikeFilterPolicy::Create(
BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner;
@ -119,9 +117,9 @@ BENCHMARK(FilterQueryPositive)->Apply(CustomArguments);
static void FilterQueryNegative(benchmark::State &state) {
// setup data
auto filter = new BloomFilterPolicy(
static_cast<double>(state.range(1)),
static_cast<BloomFilterPolicy::Mode>(state.range(0)));
auto filter = BloomLikeFilterPolicy::Create(
BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
static_cast<double>(state.range(1)));
auto tester = new mock::MockBlockBasedTableTester(filter);
KeyMaker km(state.range(2));
std::unique_ptr<const char[]> owner;

@ -882,7 +882,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Verify that only the lower 32bits are stored in
// new_opt.read_amp_bytes_per_bit.
EXPECT_EQ(1U, new_opt.read_amp_bytes_per_bit);
@ -936,7 +935,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// use_block_based_builder=true now ignored in public API (same as false)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
@ -944,82 +942,67 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Back door way of enabling deprecated block-based Bloom
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4",
&new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); // Only whole bits used
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kDeprecatedBlock);
auto builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(),
"rocksdb.internal.DeprecatedBlockBasedBloomFilter:4");
// Test configuring using other internal names
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 3); // Only whole bits used
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kLegacyBloom);
builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3");
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.FastLocalBloomFilter:1.234", &new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(), "rocksdb.internal.FastLocalBloomFilter:1.234");
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt,
"filter_policy=rocksdb.internal.Standard128RibbonFilter:1.234",
&new_opt));
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 1234);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
builtin =
dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(builtin->GetId(), "rocksdb.internal.Standard128RibbonFilter:1.234");
// Ribbon filter policy (no Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;",
&new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr);
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
auto rfp =
dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(rfp->GetMillibitsPerKey(), 5678);
EXPECT_EQ(rfp->GetBloomBeforeLevel(), -1);
// Ribbon filter policy (default Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789;",
&new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr);
auto ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>(
new_opt.filter_policy.get());
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 0);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
rfp = dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(rfp->GetBloomBeforeLevel(), 0);
// Ribbon filter policy (custom Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;",
&new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr);
ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>(
new_opt.filter_policy.get());
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 5);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
rfp = dynamic_cast<const RibbonFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(rfp->GetBloomBeforeLevel(), 5);
// Check block cache options are overwritten when specified
// in new format as a struct.
@ -2877,7 +2860,6 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) {
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom);
// Check block cache options are overwritten when specified
// in new format as a struct.

@ -15,6 +15,7 @@
#include "monitoring/perf_context_imp.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "util/cast_util.h"
#include "util/coding.h"
#include "util/string_util.h"
@ -157,9 +158,9 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() {
// Generate filter for current set of keys and append to result_.
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
BloomFilterPolicy::CreateFilter(tmp_entries_.data(),
static_cast<int>(num_entries), bits_per_key_,
&result_);
DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
tmp_entries_.data(), static_cast<int>(num_entries), bits_per_key_,
&result_);
tmp_entries_.clear();
entries_.clear();
@ -283,7 +284,8 @@ bool BlockBasedFilterBlockReader::MayMatch(
assert(table());
assert(table()->get_rep());
const bool may_match = BloomFilterPolicy::KeyMayMatch(entry, filter);
const bool may_match =
DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter);
if (may_match) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;

@ -82,7 +82,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
} else {
// Check for backdoor deprecated block-based bloom config
size_t starting_est = filter_bits_builder->EstimateEntriesAdded();
constexpr auto kSecretStart = BloomFilterPolicy::kSecretBitsPerKeyStart;
constexpr auto kSecretStart =
DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart;
if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) {
int bits_per_key = static_cast<int>(starting_est - kSecretStart);
delete filter_bits_builder;

@ -10,6 +10,7 @@
#include "rocksdb/filter_policy.h"
#include <array>
#include <climits>
#include <cstring>
#include <deque>
#include <limits>
@ -18,6 +19,8 @@
#include "cache/cache_entry_roles.h"
#include "cache/cache_reservation_manager.h"
#include "logging/logging.h"
#include "port/lang.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h"
@ -29,6 +32,7 @@
#include "util/hash.h"
#include "util/ribbon_config.h"
#include "util/ribbon_impl.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
@ -1307,21 +1311,8 @@ Status XXPH3FilterBitsBuilder::MaybePostVerify(const Slice& filter_content) {
}
} // namespace
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllFixedImpls = {
kLegacyBloom,
kDeprecatedBlock,
kFastLocalBloom,
kStandard128Ribbon,
};
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
kDeprecatedBlock,
kAutoBloom,
kStandard128Ribbon,
};
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode), warned_(false), aggregate_rounding_balance_(0) {
BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key)
: warned_(false), aggregate_rounding_balance_(0) {
// Sanitize bits_per_key
if (bits_per_key < 0.5) {
// Round down to no filter
@ -1353,14 +1344,48 @@ BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
}
BloomFilterPolicy::~BloomFilterPolicy() {}
BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {}
const char* BuiltinFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter";
}
void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst) {
const char* DeprecatedBlockBasedBloomFilterPolicy::kName() {
return "rocksdb.internal.DeprecatedBlockBasedBloomFilter";
}
std::string DeprecatedBlockBasedBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy(
double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext&) const {
if (GetWholeBitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded()
struct B : public FilterBitsBuilder {
explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {}
size_t est;
size_t EstimateEntriesAdded() override { return est; }
void AddKey(const Slice&) override {}
using FilterBitsBuilder::Finish; // FIXME
Slice Finish(std::unique_ptr<const char[]>*) override { return Slice(); }
size_t ApproximateNumEntries(size_t) override { return 0; }
};
return new B(GetWholeBitsPerKey());
}
void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys,
int n,
int bits_per_key,
std::string* dst) {
// Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * bits_per_key);
@ -1383,8 +1408,8 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key,
}
}
bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
const Slice& bloom_filter) {
bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(
const Slice& key, const Slice& bloom_filter) {
const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) {
return false;
@ -1406,13 +1431,31 @@ bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
array);
}
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (millibits_per_key_ == 0) {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
} else if (context.table_options.format_version < 5) {
return GetLegacyBloomBuilderWithContext(context);
} else {
return GetFastLocalBloomBuilderWithContext(context);
}
Mode cur = mode_;
}
const char* BloomFilterPolicy::kName() { return "bloomfilter"; }
std::string BloomFilterPolicy::GetId() const {
// Including ":false" for better forward-compatibility with 6.29 and earlier
// which required a boolean `use_block_based_builder` parameter
return kName() + GetBitsPerKeySuffix() + ":false";
}
FilterBitsBuilder* BloomLikeFilterPolicy::GetFastLocalBloomBuilderWithContext(
const FilterBuildingContext& context) const {
bool offm = context.table_options.optimize_filters_for_memory;
bool reserve_filter_construction_mem =
(context.table_options.reserve_table_builder_memory &&
@ -1422,80 +1465,73 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
cache_res_mgr = std::make_shared<CacheReservationManager>(
context.table_options.block_cache);
}
// Unusual code construction so that we can have just
// one exhaustive switch without (risky) recursion
for (int i = 0; i < 2; ++i) {
switch (cur) {
case kAutoBloom:
if (context.table_options.format_version < 5) {
cur = kLegacyBloom;
} else {
cur = kFastLocalBloom;
}
break;
case kDeprecatedBlock: {
if (context.info_log && !warned_.load(std::memory_order_relaxed)) {
warned_ = true;
ROCKS_LOG_WARN(context.info_log,
"Using deprecated block-based Bloom filter is "
"inefficient (%d bits per key).",
whole_bits_per_key_);
}
// Internal contract: returns a new fake builder that encodes bits per
// key into a special value from EstimateEntriesAdded()
struct B : public FilterBitsBuilder {
explicit B(int bits_per_key)
: est(kSecretBitsPerKeyStart + bits_per_key) {}
size_t est;
size_t EstimateEntriesAdded() override { return est; }
void AddKey(const Slice&) override {}
using FilterBitsBuilder::Finish; // FIXME
Slice Finish(std::unique_ptr<const char[]>*) override {
return Slice();
}
size_t ApproximateNumEntries(size_t) override { return 0; }
};
return new B(GetWholeBitsPerKey());
}
case kFastLocalBloom:
return new FastLocalBloomBitsBuilder(
millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr,
cache_res_mgr,
context.table_options.detect_filter_construct_corruption);
case kLegacyBloom:
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) {
warned_ = true;
const char* adjective;
if (whole_bits_per_key_ >= 20) {
adjective = "Dramatic";
} else {
adjective = "Significant";
}
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(
context.info_log,
"Using legacy Bloom filter with high (%d) bits/key. "
"%s filter space and/or accuracy improvement is available "
"with format_version>=5.",
whole_bits_per_key_, adjective);
}
return new LegacyBloomBitsBuilder(whole_bits_per_key_,
context.info_log);
case kStandard128Ribbon:
return new Standard128RibbonBitsBuilder(
desired_one_in_fp_rate_, millibits_per_key_,
offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr,
context.table_options.detect_filter_construct_corruption,
context.info_log);
}
FilterBitsBuilder* BloomLikeFilterPolicy::GetLegacyBloomBuilderWithContext(
const FilterBuildingContext& context) const {
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) {
warned_ = true;
const char* adjective;
if (whole_bits_per_key_ >= 20) {
adjective = "Dramatic";
} else {
adjective = "Significant";
}
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(context.info_log,
"Using legacy Bloom filter with high (%d) bits/key. "
"%s filter space and/or accuracy improvement is available "
"with format_version>=5.",
whole_bits_per_key_, adjective);
}
return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log);
}
FilterBitsBuilder*
BloomLikeFilterPolicy::GetStandard128RibbonBuilderWithContext(
const FilterBuildingContext& context) const {
// FIXME: code duplication with GetFastLocalBloomBuilderWithContext
bool offm = context.table_options.optimize_filters_for_memory;
bool reserve_filter_construction_mem =
(context.table_options.reserve_table_builder_memory &&
context.table_options.block_cache);
std::shared_ptr<CacheReservationManager> cache_res_mgr;
if (reserve_filter_construction_mem) {
cache_res_mgr = std::make_shared<CacheReservationManager>(
context.table_options.block_cache);
}
return new Standard128RibbonBitsBuilder(
desired_one_in_fp_rate_, millibits_per_key_,
offm ? &aggregate_rounding_balance_ : nullptr, cache_res_mgr,
context.table_options.detect_filter_construct_corruption,
context.info_log);
}
std::string BloomLikeFilterPolicy::GetBitsPerKeySuffix() const {
std::string rv = ":" + ROCKSDB_NAMESPACE::ToString(millibits_per_key_ / 1000);
int frac = millibits_per_key_ % 1000;
if (frac > 0) {
rv.push_back('.');
rv.push_back(static_cast<char>('0' + (frac / 100)));
frac %= 100;
if (frac > 0) {
rv.push_back(static_cast<char>('0' + (frac / 10)));
frac %= 10;
if (frac > 0) {
rv.push_back(static_cast<char>('0' + frac));
}
}
}
assert(false);
return nullptr; // something legal
return rv;
}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
FilterBitsBuilder* BuiltinFilterPolicy::GetBuilderFromContext(
const FilterBuildingContext& context) {
if (context.table_options.filter_policy) {
return context.table_options.filter_policy->GetBuilderWithContext(context);
@ -1504,6 +1540,62 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
}
}
// For testing only, but always constructable with internal names
namespace test {
const char* LegacyBloomFilterPolicy::kName() {
return "rocksdb.internal.LegacyBloomFilter";
}
std::string LegacyBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* LegacyBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetLegacyBloomBuilderWithContext(context);
}
const char* FastLocalBloomFilterPolicy::kName() {
return "rocksdb.internal.FastLocalBloomFilter";
}
std::string FastLocalBloomFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* FastLocalBloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetFastLocalBloomBuilderWithContext(context);
}
const char* Standard128RibbonFilterPolicy::kName() {
return "rocksdb.internal.Standard128RibbonFilter";
}
std::string Standard128RibbonFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix();
}
FilterBitsBuilder* Standard128RibbonFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
if (GetMillibitsPerKey() == 0) {
// "No filter" special case
return nullptr;
}
return GetStandard128RibbonBuilderWithContext(context);
}
} // namespace test
BuiltinFilterBitsReader* BuiltinFilterPolicy::GetBuiltinFilterBitsReader(
const Slice& contents) {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
@ -1679,70 +1771,58 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
bool /*use_block_based_builder*/) {
// NOTE: use_block_based_builder now ignored so block-based filter is no
// longer accessible in public API.
BloomFilterPolicy::Mode m = BloomFilterPolicy::kAutoBloom;
assert(std::find(BloomFilterPolicy::kAllUserModes.begin(),
BloomFilterPolicy::kAllUserModes.end(),
m) != BloomFilterPolicy::kAllUserModes.end());
return new BloomFilterPolicy(bits_per_key, m);
return new BloomFilterPolicy(bits_per_key);
}
// Chooses between two filter policies based on LSM level, but
// only for Level and Universal compaction styles. Flush is treated
// as level -1. Policy b is considered fallback / primary policy.
LevelThresholdFilterPolicy::LevelThresholdFilterPolicy(
std::unique_ptr<const FilterPolicy>&& a,
std::unique_ptr<const FilterPolicy>&& b, int starting_level_for_b)
: policy_a_(std::move(a)),
policy_b_(std::move(b)),
starting_level_for_b_(starting_level_for_b) {
// Don't use this wrapper class if you were going to set to -1
assert(starting_level_for_b_ >= 0);
}
RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level)
: BloomLikeFilterPolicy(bloom_equivalent_bits_per_key),
bloom_before_level_(bloom_before_level) {}
FilterBitsBuilder* LevelThresholdFilterPolicy::GetBuilderWithContext(
FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
// Treat unknown same as bottommost
int levelish = INT_MAX;
switch (context.compaction_style) {
case kCompactionStyleLevel:
case kCompactionStyleUniversal: {
int levelish;
if (context.reason == TableFileCreationReason::kFlush) {
// Treat flush as level -1
assert(context.level_at_creation == 0);
levelish = -1;
} else if (context.level_at_creation == -1) {
// Unknown level
// Policy b considered fallback / primary
return policy_b_->GetBuilderWithContext(context);
assert(levelish == INT_MAX);
} else {
levelish = context.level_at_creation;
}
if (levelish >= starting_level_for_b_) {
return policy_b_->GetBuilderWithContext(context);
} else {
return policy_a_->GetBuilderWithContext(context);
}
break;
}
case kCompactionStyleFIFO:
case kCompactionStyleNone:
// Treat as bottommost
assert(levelish == INT_MAX);
break;
}
// Policy b considered fallback / primary
return policy_b_->GetBuilderWithContext(context);
if (levelish < bloom_before_level_) {
return GetFastLocalBloomBuilderWithContext(context);
} else {
return GetStandard128RibbonBuilderWithContext(context);
}
}
const char* RibbonFilterPolicy::kName() { return "ribbonfilter"; }
std::string RibbonFilterPolicy::GetId() const {
return kName() + GetBitsPerKeySuffix() + ":" +
ROCKSDB_NAMESPACE::ToString(bloom_before_level_);
}
const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level) {
std::unique_ptr<const FilterPolicy> ribbon_only{new BloomFilterPolicy(
bloom_equivalent_bits_per_key, BloomFilterPolicy::kStandard128Ribbon)};
if (bloom_before_level > -1) {
// Could also use Bloom policy
std::unique_ptr<const FilterPolicy> bloom_only{new BloomFilterPolicy(
bloom_equivalent_bits_per_key, BloomFilterPolicy::kFastLocalBloom)};
return new LevelThresholdFilterPolicy(
std::move(bloom_only), std::move(ribbon_only), bloom_before_level);
} else {
return ribbon_only.release();
}
return new RibbonFilterPolicy(bloom_equivalent_bits_per_key,
bloom_before_level);
}
FilterBuildingContext::FilterBuildingContext(
@ -1751,55 +1831,84 @@ FilterBuildingContext::FilterBuildingContext(
FilterPolicy::~FilterPolicy() { }
std::shared_ptr<const FilterPolicy> BloomLikeFilterPolicy::Create(
const std::string& name, double bits_per_key) {
if (name == test::LegacyBloomFilterPolicy::kName()) {
return std::make_shared<test::LegacyBloomFilterPolicy>(bits_per_key);
} else if (name == test::FastLocalBloomFilterPolicy::kName()) {
return std::make_shared<test::FastLocalBloomFilterPolicy>(bits_per_key);
} else if (name == test::Standard128RibbonFilterPolicy::kName()) {
return std::make_shared<test::Standard128RibbonFilterPolicy>(bits_per_key);
} else if (name == DeprecatedBlockBasedBloomFilterPolicy::kName()) {
return std::make_shared<DeprecatedBlockBasedBloomFilterPolicy>(
bits_per_key);
} else if (name == BloomFilterPolicy::kName()) {
// For testing
return std::make_shared<BloomFilterPolicy>(bits_per_key);
} else if (name == RibbonFilterPolicy::kName()) {
// For testing
return std::make_shared<RibbonFilterPolicy>(bits_per_key,
/*bloom_before_level*/ 0);
} else {
return nullptr;
}
}
Status FilterPolicy::CreateFromString(
const ConfigOptions& /*options*/, const std::string& value,
std::shared_ptr<const FilterPolicy>* policy) {
const std::string kBloomName = "bloomfilter:";
const std::string kRibbonName = "ribbonfilter:";
if (value == kNullptrString) {
policy->reset();
return Status::OK();
} else if (value == "rocksdb.BuiltinBloomFilter") {
*policy = std::make_shared<BuiltinFilterPolicy>();
} else {
*policy = std::make_shared<ReadOnlyBuiltinFilterPolicy>();
return Status::OK();
}
#ifndef ROCKSDB_LITE
const std::vector<std::string> vals = StringSplit(value, ':');
if (vals.size() < 2) {
return Status::NotFound("Invalid filter policy name ", value);
const std::vector<std::string> vals = StringSplit(value, ':');
if (vals.size() < 2) {
return Status::NotFound("Invalid filter policy name ", value);
}
const std::string& name = vals[0];
double bits_per_key = ParseDouble(trim(vals[1]));
if (name == BloomFilterPolicy::kName()) {
bool use_block_based_builder = false;
if (vals.size() > 2) {
use_block_based_builder =
ParseBoolean("use_block_based_builder", trim(vals[2]));
}
const std::string& name = vals[0];
double bits_per_key = ParseDouble(trim(vals[1]));
if (name == "bloomfilter") { // TODO: constants for names
// NOTE: ignoring obsolete bool for "use_block_based_builder"
policy->reset(NewBloomFilterPolicy(bits_per_key));
} else if (name == "ribbonfilter") {
int bloom_before_level;
if (vals.size() < 3) {
bloom_before_level = 0;
} else {
bloom_before_level = ParseInt(trim(vals[2]));
}
policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key,
bloom_before_level));
} else if (name == "rocksdb.internal.DeprecatedBlockBasedBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kDeprecatedBlock);
} else if (name == "rocksdb.internal.LegacyBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kLegacyBloom);
} else if (name == "rocksdb.internal.FastLocalBloomFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kFastLocalBloom);
} else if (name == "rocksdb.internal.Standard128RibbonFilter") {
*policy = std::make_shared<BloomFilterPolicy>(
bits_per_key, BloomFilterPolicy::kStandard128Ribbon);
policy->reset(NewBloomFilterPolicy(bits_per_key, use_block_based_builder));
} else if (name == RibbonFilterPolicy::kName()) {
int bloom_before_level;
if (vals.size() < 3) {
bloom_before_level = 0;
} else {
return Status::NotFound("Invalid filter policy name ", value);
bloom_before_level = ParseInt(trim(vals[2]));
}
policy->reset(NewRibbonFilterPolicy(/*bloom_equivalent*/ bits_per_key,
bloom_before_level));
} else {
*policy = BloomLikeFilterPolicy::Create(name, bits_per_key);
}
if (*policy) {
return Status::OK();
} else {
return Status::NotFound("Invalid filter policy name ", value);
}
#else
return Status::NotSupported("Cannot load filter policy in LITE mode ",
value);
return Status::NotSupported("Cannot load filter policy in LITE mode ", value);
#endif // ROCKSDB_LITE
}
return Status::OK();
}
const std::vector<std::string>& BloomLikeFilterPolicy::GetAllFixedImpls() {
STATIC_AVOID_DESTRUCTION(std::vector<std::string>, impls){
// Match filter_bench -impl=x ordering
test::LegacyBloomFilterPolicy::kName(),
DeprecatedBlockBasedBloomFilterPolicy::kName(),
test::FastLocalBloomFilterPolicy::kName(),
test::Standard128RibbonFilterPolicy::kName(),
};
return impls;
}
} // namespace ROCKSDB_NAMESPACE

@ -46,31 +46,41 @@ class BuiltinFilterBitsReader : public FilterBitsReader {
virtual bool HashMayMatch(const uint64_t /* h */) { return true; }
};
// Base class for RocksDB built-in filter policies. This can read all
// kinds of built-in filters (for backward compatibility with old
// OPTIONS files) but does not build filters, so new SST files generated
// under the policy will get no filters (like nullptr FilterPolicy).
// This class is considered internal API and subject to change.
// Base class for RocksDB built-in filter policies. This provides the
// ability to read all kinds of built-in filters (so that old filters can
// be used even when you change between built-in policies).
class BuiltinFilterPolicy : public FilterPolicy {
public:
static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
const Slice& contents);
public: // overrides
// Shared name because any built-in policy can read filters from
// any other
// FIXME when making filter policies Configurable. For now, this
// is still rocksdb.BuiltinBloomFilter
const char* Name() const override;
// Convert to a string understood by FilterPolicy::CreateFromString
virtual std::string GetId() const = 0;
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
// Does not write filters.
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override {
return nullptr;
}
public: // new
// An internal function for the implementation of
// BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance
// or working around potential virtual overrides.
static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
const Slice& contents);
// Returns a new FilterBitsBuilder from the filter_policy in
// table_options of a context, or nullptr if not applicable.
// (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
protected:
// Deprecated block-based filter only (no longer in public API)
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const;
private:
// For Bloom filter implementation(s) (except deprecated block-based filter)
@ -80,85 +90,58 @@ class BuiltinFilterPolicy : public FilterPolicy {
static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents);
};
// RocksDB built-in filter policy for Bloom or Bloom-like filters including
// Ribbon filters.
// A "read only" filter policy used for backward compatibility with old
// OPTIONS files, which did not specifying a Bloom configuration, just
// "rocksdb.BuiltinBloomFilter". Although this can read existing filters,
// this policy does not build new filters, so new SST files generated
// under the policy will get no filters (like nullptr FilterPolicy).
// This class is considered internal API and subject to change.
// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
class BloomFilterPolicy : public BuiltinFilterPolicy {
class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy {
public:
// An internal marker for operating modes of BloomFilterPolicy, in terms
// of selecting an implementation. This makes it easier for tests to track
// or to walk over the built-in set of Bloom filter implementations. The
// only variance in BloomFilterPolicy by mode/implementation is in
// GetFilterBitsBuilder(), so an enum is practical here vs. subclasses.
//
// This enum is essentially the union of all the different kinds of return
// value from GetFilterBitsBuilder, or "underlying implementation", and
// higher-level modes that choose an underlying implementation based on
// context information.
enum Mode {
// Legacy implementation of Bloom filter for full and partitioned filters.
// Set to 0 in case of value confusion with bool use_block_based_builder
// NOTE: TESTING ONLY as this mode does not use best compatible
// implementation
kLegacyBloom = 0,
// Deprecated block-based Bloom filter implementation.
// Set to 1 in case of value confusion with bool use_block_based_builder
// NOTE: DEPRECATED but user exposed
kDeprecatedBlock = 1,
// A fast, cache-local Bloom filter implementation. See description in
// FastLocalBloomImpl.
// NOTE: TESTING ONLY as this mode does not check format_version
kFastLocalBloom = 2,
// A Bloom alternative saving about 30% space for ~3-4x construction
// CPU time. See ribbon_alg.h and ribbon_impl.h.
kStandard128Ribbon = 3,
// Automatically choose between kLegacyBloom and kFastLocalBloom based on
// context at build time, including compatibility with format_version.
kAutoBloom = 100,
};
// All the different underlying implementations that a BloomFilterPolicy
// might use, as a mode that says "always use this implementation."
// Only appropriate for unit tests.
static const std::vector<Mode> kAllFixedImpls;
// All the different modes of BloomFilterPolicy that are exposed from
// user APIs. Only appropriate for higher-level unit tests. Integration
// tests should prefer using NewBloomFilterPolicy (user-exposed).
static const std::vector<Mode> kAllUserModes;
explicit BloomFilterPolicy(double bits_per_key, Mode mode);
~BloomFilterPolicy() override;
// For Deprecated block-based filter (no longer customizable in public API)
static void CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst);
static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter);
// Convert to a string understood by FilterPolicy::CreateFromString
virtual std::string GetId() const override { return Name(); }
// To use this function, call GetBuilderFromContext().
//
// Neither the context nor any objects therein should be saved beyond
// the call to this function, unless it's shared_ptr.
// Does not write filters.
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
const FilterBuildingContext&) const override {
return nullptr;
}
};
// Internal contract: for kDeprecatedBlock, GetBuilderWithContext returns
// a new fake builder that encodes bits per key into a special value from
// EstimateEntriesAdded(), using kSecretBitsPerKeyStart + bits_per_key
static constexpr size_t kSecretBitsPerKeyStart = 1234567890U;
// RocksDB built-in filter policy for Bloom or Bloom-like filters including
// Ribbon filters.
// This class is considered internal API and subject to change.
// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
class BloomLikeFilterPolicy : public BuiltinFilterPolicy {
public:
explicit BloomLikeFilterPolicy(double bits_per_key);
// Returns a new FilterBitsBuilder from the filter_policy in
// table_options of a context, or nullptr if not applicable.
// (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
~BloomLikeFilterPolicy() override;
// Essentially for testing only: configured millibits/key
int GetMillibitsPerKey() const { return millibits_per_key_; }
// Essentially for testing only: legacy whole bits/key
int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
// Testing only
Mode GetMode() const { return mode_; }
// All the different underlying implementations that a BloomLikeFilterPolicy
// might use, as a configuration string name for a testing mode for
// "always use this implementation." Only appropriate for unit tests.
static const std::vector<std::string>& GetAllFixedImpls();
// Convenience function for creating by name for fixed impls
static std::shared_ptr<const FilterPolicy> Create(const std::string& name,
double bits_per_key);
protected:
// Some implementations used by aggregating policies
FilterBitsBuilder* GetLegacyBloomBuilderWithContext(
const FilterBuildingContext& context) const;
FilterBitsBuilder* GetFastLocalBloomBuilderWithContext(
const FilterBuildingContext& context) const;
FilterBitsBuilder* GetStandard128RibbonBuilderWithContext(
const FilterBuildingContext& context) const;
std::string GetBitsPerKeySuffix() const;
private:
// Bits per key settings are for configuring Bloom filters.
@ -177,10 +160,6 @@ class BloomFilterPolicy : public BuiltinFilterPolicy {
// example, 100 -> 1% fp rate.
double desired_one_in_fp_rate_;
// Selected mode (a specific implementation or way of selecting an
// implementation) for building new SST filters.
Mode mode_;
// Whether relevant warnings have been logged already. (Remember so we
// only report once per BloomFilterPolicy instance, to keep the noise down.)
mutable std::atomic<bool> warned_;
@ -196,28 +175,111 @@ class BloomFilterPolicy : public BuiltinFilterPolicy {
mutable std::atomic<int64_t> aggregate_rounding_balance_;
};
// Chooses between two filter policies based on LSM level, but
// only for Level and Universal compaction styles. Flush is treated
// as level -1. Policy b is considered fallback / primary policy.
class LevelThresholdFilterPolicy : public BuiltinFilterPolicy {
// For NewBloomFilterPolicy
//
// This is a user-facing policy that automatically choose between
// LegacyBloom and FastLocalBloom based on context at build time,
// including compatibility with format_version.
class BloomFilterPolicy : public BloomLikeFilterPolicy {
public:
LevelThresholdFilterPolicy(std::unique_ptr<const FilterPolicy>&& a,
std::unique_ptr<const FilterPolicy>&& b,
int starting_level_for_b);
explicit BloomFilterPolicy(double bits_per_key);
// To use this function, call BuiltinFilterPolicy::GetBuilderFromContext().
//
// Neither the context nor any objects therein should be saved beyond
// the call to this function, unless it's shared_ptr.
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
const FilterBuildingContext&) const override;
static const char* kName();
std::string GetId() const override;
};
inline int TEST_GetStartingLevelForB() const { return starting_level_for_b_; }
// For NewRibbonFilterPolicy
//
// This is a user-facing policy that chooses between Standard128Ribbon
// and FastLocalBloom based on context at build time (LSM level and other
// factors in extreme cases).
class RibbonFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level);
inline const FilterPolicy* TEST_GetPolicyA() const { return policy_a_.get(); }
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
inline const FilterPolicy* TEST_GetPolicyB() const { return policy_b_.get(); }
int GetBloomBeforeLevel() const { return bloom_before_level_; }
static const char* kName();
std::string GetId() const override;
private:
const std::unique_ptr<const FilterPolicy> policy_a_;
const std::unique_ptr<const FilterPolicy> policy_b_;
int starting_level_for_b_;
const int bloom_before_level_;
};
// Deprecated block-based filter only. We still support reading old
// block-based filters from any BuiltinFilterPolicy, but there is no public
// option to build them. However, this class is used to build them for testing
// and for a public backdoor to building them by constructing this policy from
// a string.
class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key);
// Internal contract: returns a new fake builder that encodes bits per key
// into a special value from EstimateEntriesAdded(), using
// kSecretBitsPerKeyStart
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const override;
static constexpr size_t kSecretBitsPerKeyStart = 1234567890U;
static const char* kName();
std::string GetId() const override;
static void CreateFilter(const Slice* keys, int n, int bits_per_key,
std::string* dst);
static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter);
};
// For testing only, but always constructable with internal names
namespace test {
class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit LegacyBloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit FastLocalBloomFilterPolicy(double bits_per_key)
: BloomLikeFilterPolicy(bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy {
public:
explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key)
: BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {}
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
static const char* kName();
std::string GetId() const override;
};
} // namespace test
} // namespace ROCKSDB_NAMESPACE

@ -4,6 +4,8 @@
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <memory>
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/block_based_table_reader.h"
@ -30,10 +32,15 @@ class MockBlockBasedTableTester {
std::unique_ptr<BlockBasedTable> table_;
explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy)
: MockBlockBasedTableTester(
std::shared_ptr<const FilterPolicy>(filter_policy)){};
explicit MockBlockBasedTableTester(
std::shared_ptr<const FilterPolicy> filter_policy)
: ioptions_(options_),
env_options_(options_),
icomp_(options_.comparator) {
table_options_.filter_policy.reset(filter_policy);
table_options_.filter_policy = std::move(filter_policy);
constexpr bool skip_filters = false;
constexpr bool immortal_table = false;

@ -39,6 +39,13 @@ DEFINE_int32(bits_per_key, 10, "");
namespace ROCKSDB_NAMESPACE {
namespace {
const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kName();
const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kName();
const std::string kStandard128Ribbon =
test::Standard128RibbonFilterPolicy::kName();
} // namespace
static const int kVerbose = 1;
static Slice Key(int i, char* buffer) {
@ -63,7 +70,7 @@ static int NextLength(int length) {
class BlockBasedBloomTest : public testing::Test {
private:
int bits_per_key_;
std::unique_ptr<const DeprecatedBlockBasedBloomFilterPolicy> policy_;
std::string filter_;
std::vector<std::string> keys_;
@ -76,9 +83,7 @@ class BlockBasedBloomTest : public testing::Test {
}
void ResetPolicy(double bits_per_key) {
bits_per_key_ =
BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kDeprecatedBlock)
.GetWholeBitsPerKey();
policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key));
Reset();
}
@ -94,9 +99,9 @@ class BlockBasedBloomTest : public testing::Test {
key_slices.push_back(Slice(keys_[i]));
}
filter_.clear();
BloomFilterPolicy::CreateFilter(key_slices.data(),
static_cast<int>(key_slices.size()),
bits_per_key_, &filter_);
DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(
&key_slices[0], static_cast<int>(key_slices.size()),
policy_->GetWholeBitsPerKey(), &filter_);
keys_.clear();
if (kVerbose >= 2) DumpFilter();
}
@ -122,7 +127,7 @@ class BlockBasedBloomTest : public testing::Test {
if (!keys_.empty()) {
Build();
}
return BloomFilterPolicy::KeyMayMatch(s, filter_);
return DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(s, filter_);
}
double FalsePositiveRate() {
@ -264,7 +269,7 @@ TEST_F(BlockBasedBloomTest, Schema) {
// Different bits-per-byte
class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
class FullBloomTest : public testing::TestWithParam<std::string> {
protected:
BlockBasedTableOptions table_options_;
@ -285,9 +290,9 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get());
}
const BloomFilterPolicy* GetBloomFilterPolicy() {
const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
// Throws on bad cast
return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
}
void Reset() {
@ -299,7 +304,7 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
}
void ResetPolicy(double bits_per_key) {
policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
Reset();
}
@ -420,7 +425,7 @@ TEST_P(FullBloomTest, FilterSize) {
{INFINITY, 100000},
{NAN, 100000}}) {
ResetPolicy(bpk.first);
auto bfp = GetBloomFilterPolicy();
auto bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
@ -433,7 +438,7 @@ TEST_P(FullBloomTest, FilterSize) {
computed -= 0.5;
some_computed_less_than_denoted |= (computed < bpk.first);
ResetPolicy(computed);
bfp = GetBloomFilterPolicy();
bfp = GetBloomLikeFilterPolicy();
EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
@ -451,7 +456,7 @@ TEST_P(FullBloomTest, FilterSize) {
size_t n2 = bits_builder->ApproximateNumEntries(space);
EXPECT_GE(n2, n);
size_t space2 = bits_builder->CalculateSpace(n2);
if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
if (n > 12000 && GetParam() == kStandard128Ribbon) {
// TODO(peterd): better approximation?
EXPECT_GE(space2, space);
EXPECT_LE(space2 * 0.998, space * 1.0);
@ -568,14 +573,14 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
}
int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
if (GetParam() == kStandard128Ribbon) {
// ~ 30% savings vs. Bloom filter
ex_min_total_size = 7 * ex_min_total_size / 10;
}
EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
if (GetParam() == BloomFilterPolicy::kLegacyBloom) {
if (GetParam() == kLegacyBloom) {
// this config can add extra cache line to make odd number
blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
}
@ -583,7 +588,7 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
EXPECT_GE(total_mem, total_size);
// optimize_filters_for_memory not implemented with legacy Bloom
if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) {
if (offm && GetParam() != kLegacyBloom) {
// This value can include a small extra penalty for kExtraPadding
fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
(total_mem - total_size) * 100.0 / total_size);
@ -629,8 +634,8 @@ TEST(FullBloomFilterConstructionReserveMemTest,
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache(NewLRUCache(lo));
table_options.block_cache = cache;
table_options.filter_policy.reset(new BloomFilterPolicy(
FLAGS_bits_per_key, BloomFilterPolicy::Mode::kStandard128Ribbon));
table_options.filter_policy =
BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
FilterBuildingContext ctx(table_options);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
table_options.filter_policy->GetBuilderWithContext(ctx));
@ -692,35 +697,35 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
// ability to read filters generated using other cache line sizes.
// See RawSchema.
TEST_P(FullBloomTest, Schema) {
#define EXPECT_EQ_Bloom(a, b) \
{ \
if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_Ribbon(a, b) \
{ \
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_FastBloom(a, b) \
{ \
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_LegacyBloom(a, b) \
{ \
if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_NotLegacy(a, b) \
{ \
if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
#define EXPECT_EQ_Bloom(a, b) \
{ \
if (GetParam() != kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_Ribbon(a, b) \
{ \
if (GetParam() == kStandard128Ribbon) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_FastBloom(a, b) \
{ \
if (GetParam() == kFastLocalBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_LegacyBloom(a, b) \
{ \
if (GetParam() == kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
}
#define EXPECT_EQ_NotLegacy(a, b) \
{ \
if (GetParam() != kLegacyBloom) { \
EXPECT_EQ(a, b); \
} \
}
char buffer[sizeof(int)];
@ -1259,9 +1264,8 @@ TEST_P(FullBloomTest, CorruptFilters) {
}
INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
testing::Values(BloomFilterPolicy::kLegacyBloom,
BloomFilterPolicy::kFastLocalBloom,
BloomFilterPolicy::kStandard128Ribbon));
testing::Values(kLegacyBloom, kFastLocalBloom,
kStandard128Ribbon));
static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
union {

@ -31,6 +31,7 @@ int main() {
#include "util/random.h"
#include "util/stderr_logger.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
using GFLAGS_NAMESPACE::RegisterFlagValidator;
@ -140,6 +141,7 @@ using ROCKSDB_NAMESPACE::Arena;
using ROCKSDB_NAMESPACE::BlockContents;
using ROCKSDB_NAMESPACE::BloomFilterPolicy;
using ROCKSDB_NAMESPACE::BloomHash;
using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy;
using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
using ROCKSDB_NAMESPACE::CachableEntry;
using ROCKSDB_NAMESPACE::Cache;
@ -147,6 +149,7 @@ using ROCKSDB_NAMESPACE::EncodeFixed32;
using ROCKSDB_NAMESPACE::FastRange32;
using ROCKSDB_NAMESPACE::FilterBitsReader;
using ROCKSDB_NAMESPACE::FilterBuildingContext;
using ROCKSDB_NAMESPACE::FilterPolicy;
using ROCKSDB_NAMESPACE::FullFilterBlockReader;
using ROCKSDB_NAMESPACE::GetSliceHash;
using ROCKSDB_NAMESPACE::GetSliceHash64;
@ -287,6 +290,16 @@ static uint32_t DryRunHash64(Slice &s) {
return Lower32of64(GetSliceHash64(s));
}
const std::shared_ptr<const FilterPolicy> &GetPolicy() {
static std::shared_ptr<const FilterPolicy> policy;
if (!policy) {
policy = BloomLikeFilterPolicy::Create(
BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl),
FLAGS_bits_per_key);
}
return policy;
}
struct FilterBench : public MockBlockBasedTableTester {
std::vector<KeyMaker> kms_;
std::vector<FilterInfo> infos_;
@ -297,9 +310,7 @@ struct FilterBench : public MockBlockBasedTableTester {
StderrLogger stderr_logger_;
FilterBench()
: MockBlockBasedTableTester(new BloomFilterPolicy(
FLAGS_bits_per_key,
static_cast<BloomFilterPolicy::Mode>(FLAGS_impl))),
: MockBlockBasedTableTester(GetPolicy()),
random_(FLAGS_seed),
m_queries_(0) {
for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {

Loading…
Cancel
Save