Hide FilterBits{Builder,Reader} from public API (#9592)

Summary:
We don't have any evidence of people using these to build custom
filters. The recommended way of customizing filter handling is to
defer to various built-in policies based on FilterBuildingContext
(e.g. to build Monkey filtering policy). With old API, we have
evidence of people modifying keys going into filter, but most cases
of that can be handled with prefix_extractor.

Having FilterBitsBuilder+Reader in the public API is an ogoing
hinderance to code evolution (e.g. recent new Finish and
MaybePostVerify), and so this change removes them from the public API
for 7.0. Maybe they will come back in some form later, but lacking
evidence of them providing value in the public API, we want to take back
more freedom to evolve these.

With this moved to internal-only, there is no rush to clean up the
complex Finish signatures, or add memory allocator support, but doing so
is much easier with them out of public API, for example to use
CacheAllocationPtr without exposing it in the public API.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9592

Test Plan: cosmetic changes only

Reviewed By: hx235

Differential Revision: D34315470

Pulled By: pdillinger

fbshipit-source-id: 03e03bb66a72c73df2c464d2dbbbae906dd8f99b
main
Peter Dillinger 3 years ago committed by Facebook GitHub Bot
parent 627deb7ceb
commit 725833a424
  1. 7
      HISTORY.md
  2. 121
      include/rocksdb/filter_policy.h
  3. 79
      table/block_based/filter_policy_internal.h
  4. 2
      table/block_based/full_filter_block.h
  5. 3
      table/block_based/parsed_full_filter_block.cc

@ -52,13 +52,14 @@
cache_index_and_filter_blocks=1 or partition_filters=1). cache_index_and_filter_blocks=1 or partition_filters=1).
* bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP * bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP
rate) rate)
* Remove class definitions for FilterBitsBuilder and FilterBitsReader from
public API, so these can evolve more easily as implementation details.
Custom FilterPolicy can still decide what kind of built-in filter to use
under what conditions.
* Also removed deprecated functions * Also removed deprecated functions
* FilterBitsBuilder::CalculateNumEntry()
* FilterPolicy::GetFilterBitsBuilder() * FilterPolicy::GetFilterBitsBuilder()
* NewExperimentalRibbonFilterPolicy() * NewExperimentalRibbonFilterPolicy()
* Remove default implementations of * Remove default implementations of
* FilterBitsBuilder::EstimateEntriesAdded()
* FilterBitsBuilder::ApproximateNumEntries()
* FilterPolicy::GetBuilderWithContext() * FilterPolicy::GetBuilderWithContext()
* Remove default implementation of Name() from FileSystemWrapper. * Remove default implementation of Name() from FileSystemWrapper.
* Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`. * Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`.

@ -38,83 +38,9 @@ class Slice;
struct BlockBasedTableOptions; struct BlockBasedTableOptions;
struct ConfigOptions; struct ConfigOptions;
// A class that takes a bunch of keys, then generates filter // As of RocksDB 7.0, the details of these classes are internal
class FilterBitsBuilder { class FilterBitsBuilder;
public: class FilterBitsReader;
virtual ~FilterBitsBuilder() {}
// Add a key (or prefix) to the filter. Typically, a builder will keep
// a set of 64-bit key hashes and only build the filter in Finish
// when the final number of keys is known. Keys are added in sorted order
// and duplicated keys are possible, so typically, the builder will
// only add this key if its hash is different from the most recently
// added.
virtual void AddKey(const Slice& key) = 0;
// Called by RocksDB before Finish to populate
// TableProperties::num_filter_entries, so should represent the
// number of unique keys (and/or prefixes) added, but does not have
// to be exact. `return 0;` may be used to conspicuously indicate "unknown".
virtual size_t EstimateEntriesAdded() = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Similar to Finish(std::unique_ptr<const char[]>* buf), except that
// for a non-null status pointer argument, it will point to
// Status::Corruption() when there is any corruption during filter
// construction or Status::OK() otherwise.
//
// WARNING: do not use a filter resulted from a corrupted construction
virtual Slice Finish(std::unique_ptr<const char[]>* buf,
Status* /* status */) {
return Finish(buf);
}
// Verify the filter returned from calling FilterBitsBuilder::Finish.
// The function returns Status::Corruption() if there is any corruption in the
// constructed filter or Status::OK() otherwise.
//
// Implementations should normally consult
// FilterBuildingContext::table_options.detect_filter_construct_corruption
// to determine whether to perform verification or to skip by returning
// Status::OK(). The decision is left to the FilterBitsBuilder so that
// verification prerequisites before PostVerify can be skipped when not
// configured.
//
// RocksDB internal will always call MaybePostVerify() on the filter after
// it is returned from calling FilterBitsBuilder::Finish
// except for FilterBitsBuilder::Finish resulting a corruption
// status, which indicates the filter is already in a corrupted state and
// there is no need to post-verify
virtual Status MaybePostVerify(const Slice& /* filter_content */) {
return Status::OK();
}
// Approximate the number of keys that can be added and generate a filter
// <= the specified number of bytes. Callers (including RocksDB) should
// only use this result for optimizing performance and not as a guarantee.
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
// Check if an array of entries match the bits in filter
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
for (int i = 0; i < num_keys; ++i) {
may_match[i] = MayMatch(*keys[i]);
}
}
};
// Contextual information passed to BloomFilterPolicy at filter building time. // Contextual information passed to BloomFilterPolicy at filter building time.
// Used in overriding FilterPolicy::GetBuilderWithContext(). References other // Used in overriding FilterPolicy::GetBuilderWithContext(). References other
@ -154,20 +80,11 @@ struct FilterBuildingContext {
TableFileCreationReason reason = TableFileCreationReason::kMisc; TableFileCreationReason reason = TableFileCreationReason::kMisc;
}; };
// We add a new format of filter block called full filter block // Determines what kind of filter (if any) to generate in SST files, and under
// This new interface gives you more space of customization // which conditions. API users can create custom filter policies that
// // defer to other built-in policies (see NewBloomFilterPolicy and
// For the full filter block, you can plug in your version by implement // NewRibbonFilterPolicy) based on the context provided to
// the FilterBitsBuilder and FilterBitsReader // GetBuilderWithContext.
//
// There are two sets of interface in FilterPolicy
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
// full filter.
// Set 1 MUST be implemented correctly, Set 2 is optional
// RocksDB would first try using functions in Set 2. if they return nullptr,
// it would use Set 1 instead.
// You can choose filter type in NewBloomFilterPolicy
class FilterPolicy { class FilterPolicy {
public: public:
virtual ~FilterPolicy(); virtual ~FilterPolicy();
@ -177,9 +94,9 @@ class FilterPolicy {
// policy string. // policy string.
// The value describes the FilterPolicy being created. // The value describes the FilterPolicy being created.
// For BloomFilters, value may be a ":"-delimited value of the form: // For BloomFilters, value may be a ":"-delimited value of the form:
// "bloomfilter:[bits_per_key]:[use_block_based_builder]", // "bloomfilter:[bits_per_key]",
// e.g. ""bloomfilter:4:true" // e.g. ""bloomfilter:4"
// The above string is equivalent to calling NewBloomFilterPolicy(4, true). // The above string is equivalent to calling NewBloomFilterPolicy(4).
static Status CreateFromString(const ConfigOptions& config_options, static Status CreateFromString(const ConfigOptions& config_options,
const std::string& value, const std::string& value,
std::shared_ptr<const FilterPolicy>* result); std::shared_ptr<const FilterPolicy>* result);
@ -191,20 +108,20 @@ class FilterPolicy {
virtual const char* Name() const = 0; virtual const char* Name() const = 0;
// Return a new FilterBitsBuilder for constructing full or partitioned // Return a new FilterBitsBuilder for constructing full or partitioned
// filter blocks. The configuration details can depend on the input // filter blocks, or return nullptr to indicate "no filter". Custom
// FilterBuildingContext but must be serialized such that FilterBitsReader // implementations should defer to a built-in FilterPolicy to get a
// can operate based on the block contents without knowing a // new FilterBitsBuilder, but the FilterBuildingContext can be used
// FilterBuildingContext. // to decide which built-in FilterPolicy to defer to.
// Change in 7.0 release: returning nullptr indicates "no filter"
virtual FilterBitsBuilder* GetBuilderWithContext( virtual FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const = 0; const FilterBuildingContext&) const = 0;
// Return a new FilterBitsReader for full or partitioned filter blocks. // Return a new FilterBitsReader for full or partitioned filter blocks.
// Caller retains ownership of any buffer pointed to by the input Slice. // Caller retains ownership of any buffer pointed to by the input Slice.
// Custom implementation should defer to GetFilterBitsReader on any
// built-in FilterPolicy, which can read filters generated by any other
// built-in FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader( virtual FilterBitsReader* GetFilterBitsReader(
const Slice& /*contents*/) const { const Slice& /*contents*/) const = 0;
return nullptr;
}
}; };
// Return a new filter policy that uses a bloom filter with approximately // Return a new filter policy that uses a bloom filter with approximately

@ -18,7 +18,84 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Slice; // A class that takes a bunch of keys, then generates filter
class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add a key (or prefix) to the filter. Typically, a builder will keep
// a set of 64-bit key hashes and only build the filter in Finish
// when the final number of keys is known. Keys are added in sorted order
// and duplicated keys are possible, so typically, the builder will
// only add this key if its hash is different from the most recently
// added.
virtual void AddKey(const Slice& key) = 0;
// Called by RocksDB before Finish to populate
// TableProperties::num_filter_entries, so should represent the
// number of unique keys (and/or prefixes) added, but does not have
// to be exact. `return 0;` may be used to conspicuously indicate "unknown".
virtual size_t EstimateEntriesAdded() = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Similar to Finish(std::unique_ptr<const char[]>* buf), except that
// for a non-null status pointer argument, it will point to
// Status::Corruption() when there is any corruption during filter
// construction or Status::OK() otherwise.
//
// WARNING: do not use a filter resulted from a corrupted construction
// TODO: refactor this to have a better signature, consolidate
virtual Slice Finish(std::unique_ptr<const char[]>* buf,
Status* /* status */) {
return Finish(buf);
}
// Verify the filter returned from calling FilterBitsBuilder::Finish.
// The function returns Status::Corruption() if there is any corruption in the
// constructed filter or Status::OK() otherwise.
//
// Implementations should normally consult
// FilterBuildingContext::table_options.detect_filter_construct_corruption
// to determine whether to perform verification or to skip by returning
// Status::OK(). The decision is left to the FilterBitsBuilder so that
// verification prerequisites before PostVerify can be skipped when not
// configured.
//
// RocksDB internal will always call MaybePostVerify() on the filter after
// it is returned from calling FilterBitsBuilder::Finish
// except for FilterBitsBuilder::Finish resulting a corruption
// status, which indicates the filter is already in a corrupted state and
// there is no need to post-verify
virtual Status MaybePostVerify(const Slice& /* filter_content */) {
return Status::OK();
}
// Approximate the number of keys that can be added and generate a filter
// <= the specified number of bytes. Callers (including RocksDB) should
// only use this result for optimizing performance and not as a guarantee.
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
// Check if an array of entries match the bits in filter
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
for (int i = 0; i < num_keys; ++i) {
may_match[i] = MayMatch(*keys[i]);
}
}
};
// Exposes any extra information needed for testing built-in // Exposes any extra information needed for testing built-in
// FilterBitsBuilders // FilterBitsBuilders

@ -12,11 +12,11 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "rocksdb/filter_policy.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "table/block_based/filter_block_reader_common.h" #include "table/block_based/filter_block_reader_common.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/block_based/parsed_full_filter_block.h" #include "table/block_based/parsed_full_filter_block.h"
#include "util/hash.h" #include "util/hash.h"

@ -5,7 +5,8 @@
// //
#include "table/block_based/parsed_full_filter_block.h" #include "table/block_based/parsed_full_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/filter_policy_internal.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {

Loading…
Cancel
Save