fork of https://github.com/rust-rocksdb/rust-rocksdb for nextgraph
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
341 lines
13 KiB
341 lines
13 KiB
1 year ago
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||
|
// This source code is licensed under both the GPLv2 (found in the
|
||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||
|
// (found in the LICENSE.Apache file in the root directory).
|
||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <atomic>
|
||
|
#include <memory>
|
||
|
#include <string>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "rocksdb/filter_policy.h"
|
||
|
#include "rocksdb/table.h"
|
||
|
|
||
|
namespace ROCKSDB_NAMESPACE {
|
||
|
|
||
|
// A class that takes a bunch of keys, then generates filter
|
||
|
class FilterBitsBuilder {
|
||
|
public:
|
||
|
virtual ~FilterBitsBuilder() {}
|
||
|
|
||
|
// Add a key (or prefix) to the filter. Typically, a builder will keep
|
||
|
// a set of 64-bit key hashes and only build the filter in Finish
|
||
|
// when the final number of keys is known. Keys are added in sorted order
|
||
|
// and duplicated keys are possible, so typically, the builder will
|
||
|
// only add this key if its hash is different from the most recently
|
||
|
// added.
|
||
|
virtual void AddKey(const Slice& key) = 0;
|
||
|
|
||
|
// Called by RocksDB before Finish to populate
|
||
|
// TableProperties::num_filter_entries, so should represent the
|
||
|
// number of unique keys (and/or prefixes) added, but does not have
|
||
|
// to be exact. `return 0;` may be used to conspicuously indicate "unknown".
|
||
|
virtual size_t EstimateEntriesAdded() = 0;
|
||
|
|
||
|
// Generate the filter using the keys that are added
|
||
|
// The return value of this function would be the filter bits,
|
||
|
// The ownership of actual data is set to buf
|
||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
||
|
|
||
|
// Similar to Finish(std::unique_ptr<const char[]>* buf), except that
|
||
|
// for a non-null status pointer argument, it will point to
|
||
|
// Status::Corruption() when there is any corruption during filter
|
||
|
// construction or Status::OK() otherwise.
|
||
|
//
|
||
|
// WARNING: do not use a filter resulted from a corrupted construction
|
||
|
// TODO: refactor this to have a better signature, consolidate
|
||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf,
|
||
|
Status* /* status */) {
|
||
|
return Finish(buf);
|
||
|
}
|
||
|
|
||
|
// Verify the filter returned from calling FilterBitsBuilder::Finish.
|
||
|
// The function returns Status::Corruption() if there is any corruption in the
|
||
|
// constructed filter or Status::OK() otherwise.
|
||
|
//
|
||
|
// Implementations should normally consult
|
||
|
// FilterBuildingContext::table_options.detect_filter_construct_corruption
|
||
|
// to determine whether to perform verification or to skip by returning
|
||
|
// Status::OK(). The decision is left to the FilterBitsBuilder so that
|
||
|
// verification prerequisites before PostVerify can be skipped when not
|
||
|
// configured.
|
||
|
//
|
||
|
// RocksDB internal will always call MaybePostVerify() on the filter after
|
||
|
// it is returned from calling FilterBitsBuilder::Finish
|
||
|
// except for FilterBitsBuilder::Finish resulting a corruption
|
||
|
// status, which indicates the filter is already in a corrupted state and
|
||
|
// there is no need to post-verify
|
||
|
virtual Status MaybePostVerify(const Slice& /* filter_content */) {
|
||
|
return Status::OK();
|
||
|
}
|
||
|
|
||
|
// Approximate the number of keys that can be added and generate a filter
|
||
|
// <= the specified number of bytes. Callers (including RocksDB) should
|
||
|
// only use this result for optimizing performance and not as a guarantee.
|
||
|
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
|
||
|
};
|
||
|
|
||
|
// A class that checks if a key can be in filter
|
||
|
// It should be initialized by Slice generated by BitsBuilder
|
||
|
class FilterBitsReader {
|
||
|
public:
|
||
|
virtual ~FilterBitsReader() {}
|
||
|
|
||
|
// Check if the entry match the bits in filter
|
||
|
virtual bool MayMatch(const Slice& entry) = 0;
|
||
|
|
||
|
// Check if an array of entries match the bits in filter
|
||
|
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
|
||
|
for (int i = 0; i < num_keys; ++i) {
|
||
|
may_match[i] = MayMatch(*keys[i]);
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Exposes any extra information needed for testing built-in
|
||
|
// FilterBitsBuilders
|
||
|
class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
|
||
|
public:
|
||
|
// Calculate number of bytes needed for a new filter, including
|
||
|
// metadata. Passing the result to ApproximateNumEntries should
|
||
|
// (ideally, usually) return >= the num_entry passed in.
|
||
|
// When optimize_filters_for_memory is enabled, this function
|
||
|
// is not authoritative but represents a target size that should
|
||
|
// be close to the average size.
|
||
|
virtual size_t CalculateSpace(size_t num_entries) = 0;
|
||
|
|
||
|
// Returns an estimate of the FP rate of the returned filter if
|
||
|
// `num_entries` keys are added and the filter returned by Finish
|
||
|
// is `bytes` bytes.
|
||
|
virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
|
||
|
};
|
||
|
|
||
|
// Base class for RocksDB built-in filter reader with
|
||
|
// extra useful functionalities for inernal.
|
||
|
class BuiltinFilterBitsReader : public FilterBitsReader {
|
||
|
public:
|
||
|
// Check if the hash of the entry match the bits in filter
|
||
|
virtual bool HashMayMatch(const uint64_t /* h */) { return true; }
|
||
|
};
|
||
|
|
||
|
// Base class for RocksDB built-in filter policies. This provides the
|
||
|
// ability to read all kinds of built-in filters (so that old filters can
|
||
|
// be used even when you change between built-in policies).
|
||
|
class BuiltinFilterPolicy : public FilterPolicy {
|
||
|
public: // overrides
|
||
|
// Read metadata to determine what kind of FilterBitsReader is needed
|
||
|
// and return a new one. This must successfully process any filter data
|
||
|
// generated by a built-in FilterBitsBuilder, regardless of the impl
|
||
|
// chosen for this BloomFilterPolicy.
|
||
|
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
|
||
|
static const char* kClassName();
|
||
|
bool IsInstanceOf(const std::string& id) const override;
|
||
|
// All variants of BuiltinFilterPolicy can read each others filters.
|
||
|
const char* CompatibilityName() const override;
|
||
|
static const char* kCompatibilityName();
|
||
|
|
||
|
public: // new
|
||
|
// An internal function for the implementation of
|
||
|
// BuiltinFilterBitsReader::GetFilterBitsReader without requiring an instance
|
||
|
// or working around potential virtual overrides.
|
||
|
static BuiltinFilterBitsReader* GetBuiltinFilterBitsReader(
|
||
|
const Slice& contents);
|
||
|
|
||
|
// Returns a new FilterBitsBuilder from the filter_policy in
|
||
|
// table_options of a context, or nullptr if not applicable.
|
||
|
// (An internal convenience function to save boilerplate.)
|
||
|
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
|
||
|
|
||
|
private:
|
||
|
// For Bloom filter implementation(s)
|
||
|
static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents);
|
||
|
|
||
|
// For Ribbon filter implementation(s)
|
||
|
static BuiltinFilterBitsReader* GetRibbonBitsReader(const Slice& contents);
|
||
|
};
|
||
|
|
||
|
// A "read only" filter policy used for backward compatibility with old
|
||
|
// OPTIONS files, which did not specifying a Bloom configuration, just
|
||
|
// "rocksdb.BuiltinBloomFilter". Although this can read existing filters,
|
||
|
// this policy does not build new filters, so new SST files generated
|
||
|
// under the policy will get no filters (like nullptr FilterPolicy).
|
||
|
// This class is considered internal API and subject to change.
|
||
|
class ReadOnlyBuiltinFilterPolicy : public BuiltinFilterPolicy {
|
||
|
public:
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
static const char* kClassName();
|
||
|
|
||
|
// Does not write filters.
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext&) const override {
|
||
|
return nullptr;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// RocksDB built-in filter policy for Bloom or Bloom-like filters including
|
||
|
// Ribbon filters.
|
||
|
// This class is considered internal API and subject to change.
|
||
|
// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
|
||
|
class BloomLikeFilterPolicy : public BuiltinFilterPolicy {
|
||
|
public:
|
||
|
explicit BloomLikeFilterPolicy(double bits_per_key);
|
||
|
|
||
|
~BloomLikeFilterPolicy() override;
|
||
|
static const char* kClassName();
|
||
|
bool IsInstanceOf(const std::string& id) const override;
|
||
|
|
||
|
std::string GetId() const override;
|
||
|
|
||
|
// Essentially for testing only: configured millibits/key
|
||
|
int GetMillibitsPerKey() const { return millibits_per_key_; }
|
||
|
// Essentially for testing only: legacy whole bits/key
|
||
|
int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
|
||
|
|
||
|
// All the different underlying implementations that a BloomLikeFilterPolicy
|
||
|
// might use, as a configuration string name for a testing mode for
|
||
|
// "always use this implementation." Only appropriate for unit tests.
|
||
|
static const std::vector<std::string>& GetAllFixedImpls();
|
||
|
|
||
|
// Convenience function for creating by name for fixed impls
|
||
|
static std::shared_ptr<const FilterPolicy> Create(const std::string& name,
|
||
|
double bits_per_key);
|
||
|
|
||
|
protected:
|
||
|
// Some implementations used by aggregating policies
|
||
|
FilterBitsBuilder* GetLegacyBloomBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const;
|
||
|
FilterBitsBuilder* GetFastLocalBloomBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const;
|
||
|
FilterBitsBuilder* GetStandard128RibbonBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const;
|
||
|
|
||
|
std::string GetBitsPerKeySuffix() const;
|
||
|
|
||
|
private:
|
||
|
// Bits per key settings are for configuring Bloom filters.
|
||
|
|
||
|
// Newer filters support fractional bits per key. For predictable behavior
|
||
|
// of 0.001-precision values across floating point implementations, we
|
||
|
// round to thousandths of a bit (on average) per key.
|
||
|
int millibits_per_key_;
|
||
|
|
||
|
// Older filters round to whole number bits per key. (There *should* be no
|
||
|
// compatibility issue with fractional bits per key, but preserving old
|
||
|
// behavior with format_version < 5 just in case.)
|
||
|
int whole_bits_per_key_;
|
||
|
|
||
|
// For configuring Ribbon filter: a desired value for 1/fp_rate. For
|
||
|
// example, 100 -> 1% fp rate.
|
||
|
double desired_one_in_fp_rate_;
|
||
|
|
||
|
// Whether relevant warnings have been logged already. (Remember so we
|
||
|
// only report once per BloomFilterPolicy instance, to keep the noise down.)
|
||
|
mutable std::atomic<bool> warned_;
|
||
|
|
||
|
// State for implementing optimize_filters_for_memory. Essentially, this
|
||
|
// tracks a surplus or deficit in total FP rate of filters generated by
|
||
|
// builders under this policy vs. what would have been generated without
|
||
|
// optimize_filters_for_memory.
|
||
|
//
|
||
|
// To avoid floating point weirdness, the actual value is
|
||
|
// Sum over all generated filters f:
|
||
|
// (predicted_fp_rate(f) - predicted_fp_rate(f|o_f_f_m=false)) * 2^32
|
||
|
mutable std::atomic<int64_t> aggregate_rounding_balance_;
|
||
|
};
|
||
|
|
||
|
// For NewBloomFilterPolicy
|
||
|
//
|
||
|
// This is a user-facing policy that automatically choose between
|
||
|
// LegacyBloom and FastLocalBloom based on context at build time,
|
||
|
// including compatibility with format_version.
|
||
|
class BloomFilterPolicy : public BloomLikeFilterPolicy {
|
||
|
public:
|
||
|
explicit BloomFilterPolicy(double bits_per_key);
|
||
|
|
||
|
// To use this function, call BuiltinFilterPolicy::GetBuilderFromContext().
|
||
|
//
|
||
|
// Neither the context nor any objects therein should be saved beyond
|
||
|
// the call to this function, unless it's shared_ptr.
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext&) const override;
|
||
|
|
||
|
static const char* kClassName();
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
static const char* kNickName();
|
||
|
const char* NickName() const override { return kNickName(); }
|
||
|
std::string GetId() const override;
|
||
|
};
|
||
|
|
||
|
// For NewRibbonFilterPolicy
|
||
|
//
|
||
|
// This is a user-facing policy that chooses between Standard128Ribbon
|
||
|
// and FastLocalBloom based on context at build time (LSM level and other
|
||
|
// factors in extreme cases).
|
||
|
class RibbonFilterPolicy : public BloomLikeFilterPolicy {
|
||
|
public:
|
||
|
explicit RibbonFilterPolicy(double bloom_equivalent_bits_per_key,
|
||
|
int bloom_before_level);
|
||
|
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext&) const override;
|
||
|
|
||
|
int GetBloomBeforeLevel() const { return bloom_before_level_; }
|
||
|
|
||
|
static const char* kClassName();
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
static const char* kNickName();
|
||
|
const char* NickName() const override { return kNickName(); }
|
||
|
std::string GetId() const override;
|
||
|
|
||
|
private:
|
||
|
const int bloom_before_level_;
|
||
|
};
|
||
|
|
||
|
// For testing only, but always constructable with internal names
|
||
|
namespace test {
|
||
|
|
||
|
class LegacyBloomFilterPolicy : public BloomLikeFilterPolicy {
|
||
|
public:
|
||
|
explicit LegacyBloomFilterPolicy(double bits_per_key)
|
||
|
: BloomLikeFilterPolicy(bits_per_key) {}
|
||
|
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const override;
|
||
|
|
||
|
static const char* kClassName();
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
};
|
||
|
|
||
|
class FastLocalBloomFilterPolicy : public BloomLikeFilterPolicy {
|
||
|
public:
|
||
|
explicit FastLocalBloomFilterPolicy(double bits_per_key)
|
||
|
: BloomLikeFilterPolicy(bits_per_key) {}
|
||
|
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const override;
|
||
|
|
||
|
static const char* kClassName();
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
};
|
||
|
|
||
|
class Standard128RibbonFilterPolicy : public BloomLikeFilterPolicy {
|
||
|
public:
|
||
|
explicit Standard128RibbonFilterPolicy(double bloom_equiv_bits_per_key)
|
||
|
: BloomLikeFilterPolicy(bloom_equiv_bits_per_key) {}
|
||
|
|
||
|
FilterBitsBuilder* GetBuilderWithContext(
|
||
|
const FilterBuildingContext& context) const override;
|
||
|
|
||
|
static const char* kClassName();
|
||
|
const char* Name() const override { return kClassName(); }
|
||
|
};
|
||
|
|
||
|
} // namespace test
|
||
|
|
||
|
} // namespace ROCKSDB_NAMESPACE
|