Clean up some filter tests and comments (#5960)

Summary:
Some filtering tests were unfriendly to new implementations of
FilterBitsBuilder because of dynamic_cast to FullFilterBitsBuilder. Most
of those have now been cleaned up, worked around, or at least changed
from crash on dynamic_cast failure to individual test failure.

Also put some clarifying comments on filter-related APIs.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5960

Test Plan: make check

Differential Revision: D18121223

Pulled By: pdillinger

fbshipit-source-id: e83827d9d5d96315d96f8e25a99cd70f497d802c
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 2309fd63bf
commit 013babc685
  1. 15
      include/rocksdb/filter_policy.h
  2. 8
      table/block_based/filter_policy.cc
  3. 11
      table/block_based/filter_policy_internal.h
  4. 52
      table/block_based/full_filter_block_test.cc
  5. 18
      table/block_based/partitioned_filter_block_test.cc
  6. 1
      util/bloom_test.cc

@ -44,12 +44,13 @@ class FilterBitsBuilder {
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Calculate num of entries fit into a space.
// Calculate num of keys that can be added and generate a filter
// <= the specified number of bytes.
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4702) // unreachable code
#endif
virtual int CalculateNumEntry(const uint32_t /*space*/) {
virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
#ifndef ROCKSDB_LITE
throw std::runtime_error("CalculateNumEntry not Implemented");
#else
@ -119,13 +120,13 @@ class FilterPolicy {
// list, but it should aim to return false with a high probability.
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
// Get the FilterBitsBuilder, which is ONLY used for full filter block
// It contains interface to take individual key, then generate filter
// Return a new FilterBitsBuilder for full or partitioned filter blocks, or
// nullptr if using block-based filter.
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
// Get the FilterBitsReader, which is ONLY used for full filter block
// It contains interface to tell if key can be in filter
// The input slice should NOT be deleted by FilterPolicy
// Return a new FilterBitsReader for full or partitioned filter blocks, or
// nullptr if using block-based filter.
// As here, the input slice should NOT be deleted by FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader(
const Slice& /*contents*/) const {
return nullptr;

@ -103,16 +103,16 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
return data;
}
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
assert(bits_per_key_);
assert(space > 0);
assert(bytes > 0);
uint32_t dont_care1, dont_care2;
int high = static_cast<int>(space * 8 / bits_per_key_ + 1);
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
int low = 1;
int n = high;
for (; n >= low; n--) {
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
if (sz <= space) {
if (sz <= bytes) {
break;
}
}

@ -28,7 +28,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
~FullFilterBitsBuilder();
virtual void AddKey(const Slice& key) override;
void AddKey(const Slice& key) override;
// Create a filter that for hashes [0, n-1], the filter is allocated here
// When creating filter, it is ensured that
@ -44,12 +44,13 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
// +----------------------------------------------------------------+
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
// +----------------------------------------------------------------+
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override;
Slice Finish(std::unique_ptr<const char[]>* buf) override;
// Calculate num of entries fit into a space.
virtual int CalculateNumEntry(const uint32_t space) override;
int CalculateNumEntry(const uint32_t bytes) override;
// Calculate space for new filter. This is reverse of CalculateNumEntry.
// Calculate number of bytes needed for a new filter, including
// metadata. Passing the result to CalculateNumEntry should
// return >= the num_entry passed in.
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);

@ -3,6 +3,8 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <set>
#include "table/block_based/full_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
@ -205,24 +207,62 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
/*lookup_context=*/nullptr));
}
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
std::unique_ptr<FilterBitsBuilder> b_;
std::set<std::string> uniq_;
public:
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
~CountUniqueFilterBitsBuilderWrapper() override {}
void AddKey(const Slice& key) override {
b_->AddKey(key);
uniq_.insert(key.ToString());
}
Slice Finish(std::unique_ptr<const char[]>* buf) override {
Slice rv = b_->Finish(buf);
uniq_.clear();
return rv;
}
int CalculateNumEntry(const uint32_t bytes) override {
return b_->CalculateNumEntry(bytes);
}
size_t CountUnique() { return uniq_.size(); }
};
TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add("key"); // test with empty prefix
ASSERT_EQ(2, bits_builder->hash_entries_.size());
ASSERT_EQ(0, bits_builder->CountUnique());
// adds key and empty prefix; both abstractions count them
builder.Add("key1");
ASSERT_EQ(2, builder.NumAdded());
ASSERT_EQ(2, bits_builder->CountUnique());
// Add different key (unique) and also empty prefix (not unique).
// From here in this test, it's immaterial whether the block builder
// can count unique keys.
builder.Add("key2");
ASSERT_EQ(3, bits_builder->CountUnique());
// Empty key -> nothing unique
builder.Add("");
ASSERT_EQ(3, bits_builder->CountUnique());
}
// mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
@ -234,8 +274,8 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
builder.Add("prefix1key2");
builder.Add("prefix1key3");
builder.Add("prefix2key4");
// two prefix adn 4 keys
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
// 1 empty, 2 non-empty prefixes, and 4 non-empty keys
ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
}
TEST_F(FullFilterBlockTest, SingleChunk) {

@ -65,12 +65,15 @@ class PartitionedFilterBlockTest
InternalKeyComparator icomp_;
std::unique_ptr<BlockBasedTable> table_;
std::shared_ptr<Cache> cache_;
int bits_per_key_;
PartitionedFilterBlockTest()
: ioptions_(options_),
env_options_(options_),
icomp_(options_.comparator) {
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false));
icomp_(options_.comparator),
bits_per_key_(10) {
table_options_.filter_policy.reset(
NewBloomFilterPolicy(bits_per_key_, false));
table_options_.format_version = GetParam();
table_options_.index_block_restart_interval = 3;
}
@ -91,16 +94,9 @@ class PartitionedFilterBlockTest
}
uint64_t MaxFilterSize() {
uint32_t dont_care1, dont_care2;
int num_keys = sizeof(keys) / sizeof(*keys);
auto filter_bits_reader = dynamic_cast<rocksdb::FullFilterBitsBuilder*>(
table_options_.filter_policy->GetFilterBitsBuilder());
assert(filter_bits_reader);
auto partition_size =
filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2);
delete filter_bits_reader;
return partition_size +
partition_size * table_options_.block_size_deviation / 100;
// General, rough over-approximation
return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
}
uint64_t last_offset = 10;

@ -324,6 +324,7 @@ class FullBloomTest : public testing::Test {
TEST_F(FullBloomTest, FilterSize) {
uint32_t dont_care1, dont_care2;
auto full_bits_builder = GetFullFilterBitsBuilder();
ASSERT_TRUE(full_bits_builder != nullptr);
for (int n = 1; n < 100; n++) {
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
auto n2 = full_bits_builder->CalculateNumEntry(space);

Loading…
Cancel
Save