diff --git a/table/full_filter_bits_builder.h b/table/full_filter_bits_builder.h index b3be7e897..851ed1e2a 100644 --- a/table/full_filter_bits_builder.h +++ b/table/full_filter_bits_builder.h @@ -51,6 +51,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder { uint32_t* num_lines); private: + friend class FullFilterBlockTest_DuplicateEntries_Test; size_t bits_per_key_; size_t num_probes_; std::vector hash_entries_; diff --git a/table/full_filter_block.cc b/table/full_filter_block.cc index feab0b7a4..4af3125ba 100644 --- a/table/full_filter_block.cc +++ b/table/full_filter_block.cc @@ -23,10 +23,23 @@ FullFilterBlockBuilder::FullFilterBlockBuilder( } void FullFilterBlockBuilder::Add(const Slice& key) { + const bool add_prefix = prefix_extractor_ && prefix_extractor_->InDomain(key); if (whole_key_filtering_) { - AddKey(key); + if (!add_prefix) { + AddKey(key); + } else { + // if both whole_key and prefix are added to bloom then we will have whole + // key and prefix addition being interleaved and thus cannot rely on the + // bits builder to properly detect the duplicates by comparing with the + // last item. + Slice last_whole_key = Slice(last_whole_key_str_); + if (last_whole_key.compare(key) != 0) { + AddKey(key); + last_whole_key_str_ = key.ToString(); + } + } } - if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { + if (add_prefix) { AddPrefix(key); } } @@ -40,7 +53,19 @@ inline void FullFilterBlockBuilder::AddKey(const Slice& key) { // Add prefix to filter if needed inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) { Slice prefix = prefix_extractor_->Transform(key); - AddKey(prefix); + if (whole_key_filtering_) { + // if both whole_key and prefix are added to bloom then we will have whole + // key and prefix addition being interleaved and thus cannot rely on the + // bits builder to properly detect the duplicates by comparing with the last + // item. + Slice last_prefix = Slice(last_prefix_str_); + if (last_prefix.compare(prefix) != 0) { + AddKey(prefix); + last_prefix_str_ = prefix.ToString(); + } + } else { + AddKey(prefix); + } } Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, diff --git a/table/full_filter_block.h b/table/full_filter_block.h index 6aec5530a..eb2703a4d 100644 --- a/table/full_filter_block.h +++ b/table/full_filter_block.h @@ -59,6 +59,8 @@ class FullFilterBlockBuilder : public FilterBlockBuilder { // should NOT dereference them. const SliceTransform* prefix_extractor_; bool whole_key_filtering_; + std::string last_whole_key_str_; + std::string last_prefix_str_; uint32_t num_added_; std::unique_ptr filter_data_; diff --git a/table/full_filter_block_test.cc b/table/full_filter_block_test.cc index 2e73612d1..73541264a 100644 --- a/table/full_filter_block_test.cc +++ b/table/full_filter_block_test.cc @@ -6,6 +6,7 @@ #include "table/full_filter_block.h" #include "rocksdb/filter_policy.h" +#include "table/full_filter_bits_builder.h" #include "util/coding.h" #include "util/hash.h" #include "util/string_util.h" @@ -160,6 +161,24 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { ASSERT_TRUE(reader.KeyMayMatch("foo")); } +TEST_F(FullFilterBlockTest, DuplicateEntries) { + std::unique_ptr prefix_extractor( + NewFixedPrefixTransform(7)); + auto bits_builder = dynamic_cast( + table_options_.filter_policy->GetFilterBitsBuilder()); + const bool WHOLE_KEY = true; + FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, + bits_builder); + ASSERT_EQ(0, builder.NumAdded()); + builder.Add("prefix1key1"); + builder.Add("prefix1key1"); + builder.Add("prefix1key2"); + builder.Add("prefix1key3"); + builder.Add("prefix2key4"); + // two prefix adn 4 keys + ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size()); +} + TEST_F(FullFilterBlockTest, SingleChunk) { FullFilterBlockBuilder builder( nullptr, true, table_options_.filter_policy->GetFilterBitsBuilder());