diff --git a/HISTORY.md b/HISTORY.md index 19e903d64..38ce20baa 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -52,13 +52,14 @@ cache_index_and_filter_blocks=1 or partition_filters=1). * bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP rate) + * Remove class definitions for FilterBitsBuilder and FilterBitsReader from + public API, so these can evolve more easily as implementation details. + Custom FilterPolicy can still decide what kind of built-in filter to use + under what conditions. * Also removed deprecated functions - * FilterBitsBuilder::CalculateNumEntry() * FilterPolicy::GetFilterBitsBuilder() * NewExperimentalRibbonFilterPolicy() * Remove default implementations of - * FilterBitsBuilder::EstimateEntriesAdded() - * FilterBitsBuilder::ApproximateNumEntries() * FilterPolicy::GetBuilderWithContext() * Remove default implementation of Name() from FileSystemWrapper. * Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`. diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 311974482..d8fb33641 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -38,83 +38,9 @@ class Slice; struct BlockBasedTableOptions; struct ConfigOptions; -// A class that takes a bunch of keys, then generates filter -class FilterBitsBuilder { - public: - virtual ~FilterBitsBuilder() {} - - // Add a key (or prefix) to the filter. Typically, a builder will keep - // a set of 64-bit key hashes and only build the filter in Finish - // when the final number of keys is known. Keys are added in sorted order - // and duplicated keys are possible, so typically, the builder will - // only add this key if its hash is different from the most recently - // added. - virtual void AddKey(const Slice& key) = 0; - - // Called by RocksDB before Finish to populate - // TableProperties::num_filter_entries, so should represent the - // number of unique keys (and/or prefixes) added, but does not have - // to be exact. `return 0;` may be used to conspicuously indicate "unknown". - virtual size_t EstimateEntriesAdded() = 0; - - // Generate the filter using the keys that are added - // The return value of this function would be the filter bits, - // The ownership of actual data is set to buf - virtual Slice Finish(std::unique_ptr* buf) = 0; - - // Similar to Finish(std::unique_ptr* buf), except that - // for a non-null status pointer argument, it will point to - // Status::Corruption() when there is any corruption during filter - // construction or Status::OK() otherwise. - // - // WARNING: do not use a filter resulted from a corrupted construction - virtual Slice Finish(std::unique_ptr* buf, - Status* /* status */) { - return Finish(buf); - } - - // Verify the filter returned from calling FilterBitsBuilder::Finish. - // The function returns Status::Corruption() if there is any corruption in the - // constructed filter or Status::OK() otherwise. - // - // Implementations should normally consult - // FilterBuildingContext::table_options.detect_filter_construct_corruption - // to determine whether to perform verification or to skip by returning - // Status::OK(). The decision is left to the FilterBitsBuilder so that - // verification prerequisites before PostVerify can be skipped when not - // configured. - // - // RocksDB internal will always call MaybePostVerify() on the filter after - // it is returned from calling FilterBitsBuilder::Finish - // except for FilterBitsBuilder::Finish resulting a corruption - // status, which indicates the filter is already in a corrupted state and - // there is no need to post-verify - virtual Status MaybePostVerify(const Slice& /* filter_content */) { - return Status::OK(); - } - - // Approximate the number of keys that can be added and generate a filter - // <= the specified number of bytes. Callers (including RocksDB) should - // only use this result for optimizing performance and not as a guarantee. - virtual size_t ApproximateNumEntries(size_t bytes) = 0; -}; - -// A class that checks if a key can be in filter -// It should be initialized by Slice generated by BitsBuilder -class FilterBitsReader { - public: - virtual ~FilterBitsReader() {} - - // Check if the entry match the bits in filter - virtual bool MayMatch(const Slice& entry) = 0; - - // Check if an array of entries match the bits in filter - virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) { - for (int i = 0; i < num_keys; ++i) { - may_match[i] = MayMatch(*keys[i]); - } - } -}; +// As of RocksDB 7.0, the details of these classes are internal +class FilterBitsBuilder; +class FilterBitsReader; // Contextual information passed to BloomFilterPolicy at filter building time. // Used in overriding FilterPolicy::GetBuilderWithContext(). References other @@ -154,20 +80,11 @@ struct FilterBuildingContext { TableFileCreationReason reason = TableFileCreationReason::kMisc; }; -// We add a new format of filter block called full filter block -// This new interface gives you more space of customization -// -// For the full filter block, you can plug in your version by implement -// the FilterBitsBuilder and FilterBitsReader -// -// There are two sets of interface in FilterPolicy -// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter -// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for -// full filter. -// Set 1 MUST be implemented correctly, Set 2 is optional -// RocksDB would first try using functions in Set 2. if they return nullptr, -// it would use Set 1 instead. -// You can choose filter type in NewBloomFilterPolicy +// Determines what kind of filter (if any) to generate in SST files, and under +// which conditions. API users can create custom filter policies that +// defer to other built-in policies (see NewBloomFilterPolicy and +// NewRibbonFilterPolicy) based on the context provided to +// GetBuilderWithContext. class FilterPolicy { public: virtual ~FilterPolicy(); @@ -177,9 +94,9 @@ class FilterPolicy { // policy string. // The value describes the FilterPolicy being created. // For BloomFilters, value may be a ":"-delimited value of the form: - // "bloomfilter:[bits_per_key]:[use_block_based_builder]", - // e.g. ""bloomfilter:4:true" - // The above string is equivalent to calling NewBloomFilterPolicy(4, true). + // "bloomfilter:[bits_per_key]", + // e.g. ""bloomfilter:4" + // The above string is equivalent to calling NewBloomFilterPolicy(4). static Status CreateFromString(const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result); @@ -191,20 +108,20 @@ class FilterPolicy { virtual const char* Name() const = 0; // Return a new FilterBitsBuilder for constructing full or partitioned - // filter blocks. The configuration details can depend on the input - // FilterBuildingContext but must be serialized such that FilterBitsReader - // can operate based on the block contents without knowing a - // FilterBuildingContext. - // Change in 7.0 release: returning nullptr indicates "no filter" + // filter blocks, or return nullptr to indicate "no filter". Custom + // implementations should defer to a built-in FilterPolicy to get a + // new FilterBitsBuilder, but the FilterBuildingContext can be used + // to decide which built-in FilterPolicy to defer to. virtual FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const = 0; // Return a new FilterBitsReader for full or partitioned filter blocks. // Caller retains ownership of any buffer pointed to by the input Slice. + // Custom implementation should defer to GetFilterBitsReader on any + // built-in FilterPolicy, which can read filters generated by any other + // built-in FilterPolicy. virtual FilterBitsReader* GetFilterBitsReader( - const Slice& /*contents*/) const { - return nullptr; - } + const Slice& /*contents*/) const = 0; }; // Return a new filter policy that uses a bloom filter with approximately diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 4db0389f6..39fc4104e 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -18,7 +18,84 @@ namespace ROCKSDB_NAMESPACE { -class Slice; +// A class that takes a bunch of keys, then generates filter +class FilterBitsBuilder { + public: + virtual ~FilterBitsBuilder() {} + + // Add a key (or prefix) to the filter. Typically, a builder will keep + // a set of 64-bit key hashes and only build the filter in Finish + // when the final number of keys is known. Keys are added in sorted order + // and duplicated keys are possible, so typically, the builder will + // only add this key if its hash is different from the most recently + // added. + virtual void AddKey(const Slice& key) = 0; + + // Called by RocksDB before Finish to populate + // TableProperties::num_filter_entries, so should represent the + // number of unique keys (and/or prefixes) added, but does not have + // to be exact. `return 0;` may be used to conspicuously indicate "unknown". + virtual size_t EstimateEntriesAdded() = 0; + + // Generate the filter using the keys that are added + // The return value of this function would be the filter bits, + // The ownership of actual data is set to buf + virtual Slice Finish(std::unique_ptr* buf) = 0; + + // Similar to Finish(std::unique_ptr* buf), except that + // for a non-null status pointer argument, it will point to + // Status::Corruption() when there is any corruption during filter + // construction or Status::OK() otherwise. + // + // WARNING: do not use a filter resulted from a corrupted construction + // TODO: refactor this to have a better signature, consolidate + virtual Slice Finish(std::unique_ptr* buf, + Status* /* status */) { + return Finish(buf); + } + + // Verify the filter returned from calling FilterBitsBuilder::Finish. + // The function returns Status::Corruption() if there is any corruption in the + // constructed filter or Status::OK() otherwise. + // + // Implementations should normally consult + // FilterBuildingContext::table_options.detect_filter_construct_corruption + // to determine whether to perform verification or to skip by returning + // Status::OK(). The decision is left to the FilterBitsBuilder so that + // verification prerequisites before PostVerify can be skipped when not + // configured. + // + // RocksDB internal will always call MaybePostVerify() on the filter after + // it is returned from calling FilterBitsBuilder::Finish + // except for FilterBitsBuilder::Finish resulting a corruption + // status, which indicates the filter is already in a corrupted state and + // there is no need to post-verify + virtual Status MaybePostVerify(const Slice& /* filter_content */) { + return Status::OK(); + } + + // Approximate the number of keys that can be added and generate a filter + // <= the specified number of bytes. Callers (including RocksDB) should + // only use this result for optimizing performance and not as a guarantee. + virtual size_t ApproximateNumEntries(size_t bytes) = 0; +}; + +// A class that checks if a key can be in filter +// It should be initialized by Slice generated by BitsBuilder +class FilterBitsReader { + public: + virtual ~FilterBitsReader() {} + + // Check if the entry match the bits in filter + virtual bool MayMatch(const Slice& entry) = 0; + + // Check if an array of entries match the bits in filter + virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) { + for (int i = 0; i < num_keys; ++i) { + may_match[i] = MayMatch(*keys[i]); + } + } +}; // Exposes any extra information needed for testing built-in // FilterBitsBuilders diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 0068e0f4d..63d407dd5 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -12,11 +12,11 @@ #include #include -#include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/filter_block_reader_common.h" +#include "table/block_based/filter_policy_internal.h" #include "table/block_based/parsed_full_filter_block.h" #include "util/hash.h" diff --git a/table/block_based/parsed_full_filter_block.cc b/table/block_based/parsed_full_filter_block.cc index 3e555387e..9184a48d2 100644 --- a/table/block_based/parsed_full_filter_block.cc +++ b/table/block_based/parsed_full_filter_block.cc @@ -5,7 +5,8 @@ // #include "table/block_based/parsed_full_filter_block.h" -#include "rocksdb/filter_policy.h" + +#include "table/block_based/filter_policy_internal.h" namespace ROCKSDB_NAMESPACE {