From 725833a424edb93e137c95cc9d8af96ceba0ea83 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 17 Feb 2022 16:33:39 -0800 Subject: [PATCH] Hide FilterBits{Builder,Reader} from public API (#9592) Summary: We don't have any evidence of people using these to build custom filters. The recommended way of customizing filter handling is to defer to various built-in policies based on FilterBuildingContext (e.g. to build Monkey filtering policy). With old API, we have evidence of people modifying keys going into filter, but most cases of that can be handled with prefix_extractor. Having FilterBitsBuilder+Reader in the public API is an ogoing hinderance to code evolution (e.g. recent new Finish and MaybePostVerify), and so this change removes them from the public API for 7.0. Maybe they will come back in some form later, but lacking evidence of them providing value in the public API, we want to take back more freedom to evolve these. With this moved to internal-only, there is no rush to clean up the complex Finish signatures, or add memory allocator support, but doing so is much easier with them out of public API, for example to use CacheAllocationPtr without exposing it in the public API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9592 Test Plan: cosmetic changes only Reviewed By: hx235 Differential Revision: D34315470 Pulled By: pdillinger fbshipit-source-id: 03e03bb66a72c73df2c464d2dbbbae906dd8f99b --- HISTORY.md | 7 +- include/rocksdb/filter_policy.h | 121 +++--------------- table/block_based/filter_policy_internal.h | 79 +++++++++++- table/block_based/full_filter_block.h | 2 +- table/block_based/parsed_full_filter_block.cc | 3 +- 5 files changed, 104 insertions(+), 108 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 19e903d64..38ce20baa 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -52,13 +52,14 @@ cache_index_and_filter_blocks=1 or partition_filters=1). * bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP rate) + * Remove class definitions for FilterBitsBuilder and FilterBitsReader from + public API, so these can evolve more easily as implementation details. + Custom FilterPolicy can still decide what kind of built-in filter to use + under what conditions. * Also removed deprecated functions - * FilterBitsBuilder::CalculateNumEntry() * FilterPolicy::GetFilterBitsBuilder() * NewExperimentalRibbonFilterPolicy() * Remove default implementations of - * FilterBitsBuilder::EstimateEntriesAdded() - * FilterBitsBuilder::ApproximateNumEntries() * FilterPolicy::GetBuilderWithContext() * Remove default implementation of Name() from FileSystemWrapper. * Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`. diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 311974482..d8fb33641 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -38,83 +38,9 @@ class Slice; struct BlockBasedTableOptions; struct ConfigOptions; -// A class that takes a bunch of keys, then generates filter -class FilterBitsBuilder { - public: - virtual ~FilterBitsBuilder() {} - - // Add a key (or prefix) to the filter. Typically, a builder will keep - // a set of 64-bit key hashes and only build the filter in Finish - // when the final number of keys is known. Keys are added in sorted order - // and duplicated keys are possible, so typically, the builder will - // only add this key if its hash is different from the most recently - // added. - virtual void AddKey(const Slice& key) = 0; - - // Called by RocksDB before Finish to populate - // TableProperties::num_filter_entries, so should represent the - // number of unique keys (and/or prefixes) added, but does not have - // to be exact. `return 0;` may be used to conspicuously indicate "unknown". - virtual size_t EstimateEntriesAdded() = 0; - - // Generate the filter using the keys that are added - // The return value of this function would be the filter bits, - // The ownership of actual data is set to buf - virtual Slice Finish(std::unique_ptr* buf) = 0; - - // Similar to Finish(std::unique_ptr* buf), except that - // for a non-null status pointer argument, it will point to - // Status::Corruption() when there is any corruption during filter - // construction or Status::OK() otherwise. - // - // WARNING: do not use a filter resulted from a corrupted construction - virtual Slice Finish(std::unique_ptr* buf, - Status* /* status */) { - return Finish(buf); - } - - // Verify the filter returned from calling FilterBitsBuilder::Finish. - // The function returns Status::Corruption() if there is any corruption in the - // constructed filter or Status::OK() otherwise. - // - // Implementations should normally consult - // FilterBuildingContext::table_options.detect_filter_construct_corruption - // to determine whether to perform verification or to skip by returning - // Status::OK(). The decision is left to the FilterBitsBuilder so that - // verification prerequisites before PostVerify can be skipped when not - // configured. - // - // RocksDB internal will always call MaybePostVerify() on the filter after - // it is returned from calling FilterBitsBuilder::Finish - // except for FilterBitsBuilder::Finish resulting a corruption - // status, which indicates the filter is already in a corrupted state and - // there is no need to post-verify - virtual Status MaybePostVerify(const Slice& /* filter_content */) { - return Status::OK(); - } - - // Approximate the number of keys that can be added and generate a filter - // <= the specified number of bytes. Callers (including RocksDB) should - // only use this result for optimizing performance and not as a guarantee. - virtual size_t ApproximateNumEntries(size_t bytes) = 0; -}; - -// A class that checks if a key can be in filter -// It should be initialized by Slice generated by BitsBuilder -class FilterBitsReader { - public: - virtual ~FilterBitsReader() {} - - // Check if the entry match the bits in filter - virtual bool MayMatch(const Slice& entry) = 0; - - // Check if an array of entries match the bits in filter - virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) { - for (int i = 0; i < num_keys; ++i) { - may_match[i] = MayMatch(*keys[i]); - } - } -}; +// As of RocksDB 7.0, the details of these classes are internal +class FilterBitsBuilder; +class FilterBitsReader; // Contextual information passed to BloomFilterPolicy at filter building time. // Used in overriding FilterPolicy::GetBuilderWithContext(). References other @@ -154,20 +80,11 @@ struct FilterBuildingContext { TableFileCreationReason reason = TableFileCreationReason::kMisc; }; -// We add a new format of filter block called full filter block -// This new interface gives you more space of customization -// -// For the full filter block, you can plug in your version by implement -// the FilterBitsBuilder and FilterBitsReader -// -// There are two sets of interface in FilterPolicy -// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter -// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for -// full filter. -// Set 1 MUST be implemented correctly, Set 2 is optional -// RocksDB would first try using functions in Set 2. if they return nullptr, -// it would use Set 1 instead. -// You can choose filter type in NewBloomFilterPolicy +// Determines what kind of filter (if any) to generate in SST files, and under +// which conditions. API users can create custom filter policies that +// defer to other built-in policies (see NewBloomFilterPolicy and +// NewRibbonFilterPolicy) based on the context provided to +// GetBuilderWithContext. class FilterPolicy { public: virtual ~FilterPolicy(); @@ -177,9 +94,9 @@ class FilterPolicy { // policy string. // The value describes the FilterPolicy being created. // For BloomFilters, value may be a ":"-delimited value of the form: - // "bloomfilter:[bits_per_key]:[use_block_based_builder]", - // e.g. ""bloomfilter:4:true" - // The above string is equivalent to calling NewBloomFilterPolicy(4, true). + // "bloomfilter:[bits_per_key]", + // e.g. ""bloomfilter:4" + // The above string is equivalent to calling NewBloomFilterPolicy(4). static Status CreateFromString(const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result); @@ -191,20 +108,20 @@ class FilterPolicy { virtual const char* Name() const = 0; // Return a new FilterBitsBuilder for constructing full or partitioned - // filter blocks. The configuration details can depend on the input - // FilterBuildingContext but must be serialized such that FilterBitsReader - // can operate based on the block contents without knowing a - // FilterBuildingContext. - // Change in 7.0 release: returning nullptr indicates "no filter" + // filter blocks, or return nullptr to indicate "no filter". Custom + // implementations should defer to a built-in FilterPolicy to get a + // new FilterBitsBuilder, but the FilterBuildingContext can be used + // to decide which built-in FilterPolicy to defer to. virtual FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const = 0; // Return a new FilterBitsReader for full or partitioned filter blocks. // Caller retains ownership of any buffer pointed to by the input Slice. + // Custom implementation should defer to GetFilterBitsReader on any + // built-in FilterPolicy, which can read filters generated by any other + // built-in FilterPolicy. virtual FilterBitsReader* GetFilterBitsReader( - const Slice& /*contents*/) const { - return nullptr; - } + const Slice& /*contents*/) const = 0; }; // Return a new filter policy that uses a bloom filter with approximately diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 4db0389f6..39fc4104e 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -18,7 +18,84 @@ namespace ROCKSDB_NAMESPACE { -class Slice; +// A class that takes a bunch of keys, then generates filter +class FilterBitsBuilder { + public: + virtual ~FilterBitsBuilder() {} + + // Add a key (or prefix) to the filter. Typically, a builder will keep + // a set of 64-bit key hashes and only build the filter in Finish + // when the final number of keys is known. Keys are added in sorted order + // and duplicated keys are possible, so typically, the builder will + // only add this key if its hash is different from the most recently + // added. + virtual void AddKey(const Slice& key) = 0; + + // Called by RocksDB before Finish to populate + // TableProperties::num_filter_entries, so should represent the + // number of unique keys (and/or prefixes) added, but does not have + // to be exact. `return 0;` may be used to conspicuously indicate "unknown". + virtual size_t EstimateEntriesAdded() = 0; + + // Generate the filter using the keys that are added + // The return value of this function would be the filter bits, + // The ownership of actual data is set to buf + virtual Slice Finish(std::unique_ptr* buf) = 0; + + // Similar to Finish(std::unique_ptr* buf), except that + // for a non-null status pointer argument, it will point to + // Status::Corruption() when there is any corruption during filter + // construction or Status::OK() otherwise. + // + // WARNING: do not use a filter resulted from a corrupted construction + // TODO: refactor this to have a better signature, consolidate + virtual Slice Finish(std::unique_ptr* buf, + Status* /* status */) { + return Finish(buf); + } + + // Verify the filter returned from calling FilterBitsBuilder::Finish. + // The function returns Status::Corruption() if there is any corruption in the + // constructed filter or Status::OK() otherwise. + // + // Implementations should normally consult + // FilterBuildingContext::table_options.detect_filter_construct_corruption + // to determine whether to perform verification or to skip by returning + // Status::OK(). The decision is left to the FilterBitsBuilder so that + // verification prerequisites before PostVerify can be skipped when not + // configured. + // + // RocksDB internal will always call MaybePostVerify() on the filter after + // it is returned from calling FilterBitsBuilder::Finish + // except for FilterBitsBuilder::Finish resulting a corruption + // status, which indicates the filter is already in a corrupted state and + // there is no need to post-verify + virtual Status MaybePostVerify(const Slice& /* filter_content */) { + return Status::OK(); + } + + // Approximate the number of keys that can be added and generate a filter + // <= the specified number of bytes. Callers (including RocksDB) should + // only use this result for optimizing performance and not as a guarantee. + virtual size_t ApproximateNumEntries(size_t bytes) = 0; +}; + +// A class that checks if a key can be in filter +// It should be initialized by Slice generated by BitsBuilder +class FilterBitsReader { + public: + virtual ~FilterBitsReader() {} + + // Check if the entry match the bits in filter + virtual bool MayMatch(const Slice& entry) = 0; + + // Check if an array of entries match the bits in filter + virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) { + for (int i = 0; i < num_keys; ++i) { + may_match[i] = MayMatch(*keys[i]); + } + } +}; // Exposes any extra information needed for testing built-in // FilterBitsBuilders diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 0068e0f4d..63d407dd5 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -12,11 +12,11 @@ #include #include -#include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/filter_block_reader_common.h" +#include "table/block_based/filter_policy_internal.h" #include "table/block_based/parsed_full_filter_block.h" #include "util/hash.h" diff --git a/table/block_based/parsed_full_filter_block.cc b/table/block_based/parsed_full_filter_block.cc index 3e555387e..9184a48d2 100644 --- a/table/block_based/parsed_full_filter_block.cc +++ b/table/block_based/parsed_full_filter_block.cc @@ -5,7 +5,8 @@ // #include "table/block_based/parsed_full_filter_block.h" -#include "rocksdb/filter_policy.h" + +#include "table/block_based/filter_policy_internal.h" namespace ROCKSDB_NAMESPACE {