Fix the bloom filter skipping empty prefixes

Summary:
bc0da4b512 optimized bloom filters by skipping duplicate entires when the whole key and prefixes are both added to the bloom. It however used empty string as the initial value of the last entry added to the bloom. This is incorrect since empty key/prefix are valid entires by themselves. This patch fixes that.
Closes https://github.com/facebook/rocksdb/pull/3776

Differential Revision: D7778803

Pulled By: maysamyabandeh

fbshipit-source-id: d5a065daebee17f9403cac51e9d5626aac87bfbc
main
Maysam Yabandeh 7 years ago committed by Facebook Github Bot
parent e5a4dacf6d
commit 7e4e381495
  1. 12
      table/full_filter_block.cc
  2. 2
      table/full_filter_block.h
  3. 17
      table/full_filter_block_test.cc

@ -17,6 +17,8 @@ FullFilterBlockBuilder::FullFilterBlockBuilder(
FilterBitsBuilder* filter_bits_builder) FilterBitsBuilder* filter_bits_builder)
: prefix_extractor_(prefix_extractor), : prefix_extractor_(prefix_extractor),
whole_key_filtering_(whole_key_filtering), whole_key_filtering_(whole_key_filtering),
last_whole_key_recorded_(false),
last_prefix_recorded_(false),
num_added_(0) { num_added_(0) {
assert(filter_bits_builder != nullptr); assert(filter_bits_builder != nullptr);
filter_bits_builder_.reset(filter_bits_builder); filter_bits_builder_.reset(filter_bits_builder);
@ -33,9 +35,10 @@ void FullFilterBlockBuilder::Add(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the // bits builder to properly detect the duplicates by comparing with the
// last item. // last item.
Slice last_whole_key = Slice(last_whole_key_str_); Slice last_whole_key = Slice(last_whole_key_str_);
if (last_whole_key.compare(key) != 0) { if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
AddKey(key); AddKey(key);
last_whole_key_str_ = key.ToString(); last_whole_key_recorded_ = true;
last_whole_key_str_.assign(key.data(), key.size());
} }
} }
} }
@ -59,9 +62,10 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the last // bits builder to properly detect the duplicates by comparing with the last
// item. // item.
Slice last_prefix = Slice(last_prefix_str_); Slice last_prefix = Slice(last_prefix_str_);
if (last_prefix.compare(prefix) != 0) { if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
AddKey(prefix); AddKey(prefix);
last_prefix_str_ = prefix.ToString(); last_prefix_recorded_ = true;
last_prefix_str_.assign(prefix.data(), prefix.size());
} }
} else { } else {
AddKey(prefix); AddKey(prefix);

@ -59,7 +59,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
// should NOT dereference them. // should NOT dereference them.
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
bool whole_key_filtering_; bool whole_key_filtering_;
bool last_whole_key_recorded_;
std::string last_whole_key_str_; std::string last_whole_key_str_;
bool last_prefix_recorded_;
std::string last_prefix_str_; std::string last_prefix_str_;
uint32_t num_added_; uint32_t num_added_;

@ -162,6 +162,20 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
} }
TEST_F(FullFilterBlockTest, DuplicateEntries) { TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add("key"); // test with empty prefix
ASSERT_EQ(2, bits_builder->hash_entries_.size());
}
// mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor( std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7)); NewFixedPrefixTransform(7));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>( auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
@ -170,13 +184,14 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder); bits_builder);
ASSERT_EQ(0, builder.NumAdded()); ASSERT_EQ(0, builder.NumAdded());
builder.Add(""); // test with empty key too
builder.Add("prefix1key1"); builder.Add("prefix1key1");
builder.Add("prefix1key1"); builder.Add("prefix1key1");
builder.Add("prefix1key2"); builder.Add("prefix1key2");
builder.Add("prefix1key3"); builder.Add("prefix1key3");
builder.Add("prefix2key4"); builder.Add("prefix2key4");
// two prefix adn 4 keys // two prefix adn 4 keys
ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size()); ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
} }
TEST_F(FullFilterBlockTest, SingleChunk) { TEST_F(FullFilterBlockTest, SingleChunk) {

Loading…
Cancel
Save