Hide FilterBits{Builder,Reader} from public API (#9592)

Summary:
We don't have any evidence of people using these to build custom
filters. The recommended way of customizing filter handling is to
defer to various built-in policies based on FilterBuildingContext
(e.g. to build Monkey filtering policy). With old API, we have
evidence of people modifying keys going into filter, but most cases
of that can be handled with prefix_extractor.

Having FilterBitsBuilder+Reader in the public API is an ogoing
hinderance to code evolution (e.g. recent new Finish and
MaybePostVerify), and so this change removes them from the public API
for 7.0. Maybe they will come back in some form later, but lacking
evidence of them providing value in the public API, we want to take back
more freedom to evolve these.

With this moved to internal-only, there is no rush to clean up the
complex Finish signatures, or add memory allocator support, but doing so
is much easier with them out of public API, for example to use
CacheAllocationPtr without exposing it in the public API.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9592

Test Plan: cosmetic changes only

Reviewed By: hx235

Differential Revision: D34315470

Pulled By: pdillinger

fbshipit-source-id: 03e03bb66a72c73df2c464d2dbbbae906dd8f99b
main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent 627deb7ceb
commit 725833a424
  1. 7
      HISTORY.md
  2. 121
      include/rocksdb/filter_policy.h
  3. 79
      table/block_based/filter_policy_internal.h
  4. 2
      table/block_based/full_filter_block.h
  5. 3
      table/block_based/parsed_full_filter_block.cc

@ -52,13 +52,14 @@
cache_index_and_filter_blocks=1 or partition_filters=1).
* bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP
rate)
* Remove class definitions for FilterBitsBuilder and FilterBitsReader from
public API, so these can evolve more easily as implementation details.
Custom FilterPolicy can still decide what kind of built-in filter to use
under what conditions.
* Also removed deprecated functions
* FilterBitsBuilder::CalculateNumEntry()
* FilterPolicy::GetFilterBitsBuilder()
* NewExperimentalRibbonFilterPolicy()
* Remove default implementations of
* FilterBitsBuilder::EstimateEntriesAdded()
* FilterBitsBuilder::ApproximateNumEntries()
* FilterPolicy::GetBuilderWithContext()
* Remove default implementation of Name() from FileSystemWrapper.
* Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`.

@ -38,83 +38,9 @@ class Slice;
struct BlockBasedTableOptions;
struct ConfigOptions;
// A class that takes a bunch of keys, then generates filter
class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add a key (or prefix) to the filter. Typically, a builder will keep
// a set of 64-bit key hashes and only build the filter in Finish
// when the final number of keys is known. Keys are added in sorted order
// and duplicated keys are possible, so typically, the builder will
// only add this key if its hash is different from the most recently
// added.
virtual void AddKey(const Slice& key) = 0;
// Called by RocksDB before Finish to populate
// TableProperties::num_filter_entries, so should represent the
// number of unique keys (and/or prefixes) added, but does not have
// to be exact. `return 0;` may be used to conspicuously indicate "unknown".
virtual size_t EstimateEntriesAdded() = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Similar to Finish(std::unique_ptr<const char[]>* buf), except that
// for a non-null status pointer argument, it will point to
// Status::Corruption() when there is any corruption during filter
// construction or Status::OK() otherwise.
//
// WARNING: do not use a filter resulted from a corrupted construction
virtual Slice Finish(std::unique_ptr<const char[]>* buf,
Status* /* status */) {
return Finish(buf);
}
// Verify the filter returned from calling FilterBitsBuilder::Finish.
// The function returns Status::Corruption() if there is any corruption in the
// constructed filter or Status::OK() otherwise.
//
// Implementations should normally consult
// FilterBuildingContext::table_options.detect_filter_construct_corruption
// to determine whether to perform verification or to skip by returning
// Status::OK(). The decision is left to the FilterBitsBuilder so that
// verification prerequisites before PostVerify can be skipped when not
// configured.
//
// RocksDB internal will always call MaybePostVerify() on the filter after
// it is returned from calling FilterBitsBuilder::Finish
// except for FilterBitsBuilder::Finish resulting a corruption
// status, which indicates the filter is already in a corrupted state and
// there is no need to post-verify
virtual Status MaybePostVerify(const Slice& /* filter_content */) {
return Status::OK();
}
// Approximate the number of keys that can be added and generate a filter
// <= the specified number of bytes. Callers (including RocksDB) should
// only use this result for optimizing performance and not as a guarantee.
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
// Check if an array of entries match the bits in filter
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
for (int i = 0; i < num_keys; ++i) {
may_match[i] = MayMatch(*keys[i]);
}
}
};
// As of RocksDB 7.0, the details of these classes are internal
class FilterBitsBuilder;
class FilterBitsReader;
// Contextual information passed to BloomFilterPolicy at filter building time.
// Used in overriding FilterPolicy::GetBuilderWithContext(). References other
@ -154,20 +80,11 @@ struct FilterBuildingContext {
TableFileCreationReason reason = TableFileCreationReason::kMisc;
};
// We add a new format of filter block called full filter block
// This new interface gives you more space of customization
//
// For the full filter block, you can plug in your version by implement
// the FilterBitsBuilder and FilterBitsReader
//
// There are two sets of interface in FilterPolicy
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
// full filter.
// Set 1 MUST be implemented correctly, Set 2 is optional
// RocksDB would first try using functions in Set 2. if they return nullptr,
// it would use Set 1 instead.
// You can choose filter type in NewBloomFilterPolicy
// Determines what kind of filter (if any) to generate in SST files, and under
// which conditions. API users can create custom filter policies that
// defer to other built-in policies (see NewBloomFilterPolicy and
// NewRibbonFilterPolicy) based on the context provided to
// GetBuilderWithContext.
class FilterPolicy {
public:
virtual ~FilterPolicy();
@ -177,9 +94,9 @@ class FilterPolicy {
// policy string.
// The value describes the FilterPolicy being created.
// For BloomFilters, value may be a ":"-delimited value of the form:
// "bloomfilter:[bits_per_key]:[use_block_based_builder]",
// e.g. ""bloomfilter:4:true"
// The above string is equivalent to calling NewBloomFilterPolicy(4, true).
// "bloomfilter:[bits_per_key]",
// e.g. ""bloomfilter:4"
// The above string is equivalent to calling NewBloomFilterPolicy(4).
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& value,
std::shared_ptr<const FilterPolicy>* result);
@ -191,20 +108,20 @@ class FilterPolicy {
virtual const char* Name() const = 0;
// Return a new FilterBitsBuilder for constructing full or partitioned
// filter blocks. The configuration details can depend on the input
// FilterBuildingContext but must be serialized such that FilterBitsReader
// can operate based on the block contents without knowing a
// FilterBuildingContext.
// Change in 7.0 release: returning nullptr indicates "no filter"
// filter blocks, or return nullptr to indicate "no filter". Custom
// implementations should defer to a built-in FilterPolicy to get a
// new FilterBitsBuilder, but the FilterBuildingContext can be used
// to decide which built-in FilterPolicy to defer to.
virtual FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const = 0;
// Return a new FilterBitsReader for full or partitioned filter blocks.
// Caller retains ownership of any buffer pointed to by the input Slice.
// Custom implementation should defer to GetFilterBitsReader on any
// built-in FilterPolicy, which can read filters generated by any other
// built-in FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader(
const Slice& /*contents*/) const {
return nullptr;
}
const Slice& /*contents*/) const = 0;
};
// Return a new filter policy that uses a bloom filter with approximately

@ -18,7 +18,84 @@
namespace ROCKSDB_NAMESPACE {
class Slice;
// A class that takes a bunch of keys, then generates filter
class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add a key (or prefix) to the filter. Typically, a builder will keep
// a set of 64-bit key hashes and only build the filter in Finish
// when the final number of keys is known. Keys are added in sorted order
// and duplicated keys are possible, so typically, the builder will
// only add this key if its hash is different from the most recently
// added.
virtual void AddKey(const Slice& key) = 0;
// Called by RocksDB before Finish to populate
// TableProperties::num_filter_entries, so should represent the
// number of unique keys (and/or prefixes) added, but does not have
// to be exact. `return 0;` may be used to conspicuously indicate "unknown".
virtual size_t EstimateEntriesAdded() = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Similar to Finish(std::unique_ptr<const char[]>* buf), except that
// for a non-null status pointer argument, it will point to
// Status::Corruption() when there is any corruption during filter
// construction or Status::OK() otherwise.
//
// WARNING: do not use a filter resulted from a corrupted construction
// TODO: refactor this to have a better signature, consolidate
virtual Slice Finish(std::unique_ptr<const char[]>* buf,
Status* /* status */) {
return Finish(buf);
}
// Verify the filter returned from calling FilterBitsBuilder::Finish.
// The function returns Status::Corruption() if there is any corruption in the
// constructed filter or Status::OK() otherwise.
//
// Implementations should normally consult
// FilterBuildingContext::table_options.detect_filter_construct_corruption
// to determine whether to perform verification or to skip by returning
// Status::OK(). The decision is left to the FilterBitsBuilder so that
// verification prerequisites before PostVerify can be skipped when not
// configured.
//
// RocksDB internal will always call MaybePostVerify() on the filter after
// it is returned from calling FilterBitsBuilder::Finish
// except for FilterBitsBuilder::Finish resulting a corruption
// status, which indicates the filter is already in a corrupted state and
// there is no need to post-verify
virtual Status MaybePostVerify(const Slice& /* filter_content */) {
return Status::OK();
}
// Approximate the number of keys that can be added and generate a filter
// <= the specified number of bytes. Callers (including RocksDB) should
// only use this result for optimizing performance and not as a guarantee.
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
// Check if an array of entries match the bits in filter
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
for (int i = 0; i < num_keys; ++i) {
may_match[i] = MayMatch(*keys[i]);
}
}
};
// Exposes any extra information needed for testing built-in
// FilterBitsBuilders

@ -12,11 +12,11 @@
#include <string>
#include <vector>
#include "rocksdb/filter_policy.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "table/block_based/filter_block_reader_common.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/block_based/parsed_full_filter_block.h"
#include "util/hash.h"

@ -5,7 +5,8 @@
//
#include "table/block_based/parsed_full_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/filter_policy_internal.h"
namespace ROCKSDB_NAMESPACE {

Loading…
Cancel
Save