From 5009b5326b851d6d0f308597a87e2b33c670a8a9 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 9 Jun 2016 15:48:45 -0700 Subject: [PATCH] BlockBasedTable::FullFilterKeyMayMatch() Should skip prefix bloom if full key bloom exists Summary: Currently, if users define both of full key bloom and prefix bloom in SST files. During Get(), if full key bloom shows the key may exist, we still go ahead and check prefix bloom. This is wasteful. If bloom filter for full keys exists, we should always ignore prefix bloom in Get(). Test Plan: Run existing tests Reviewers: yhchiang, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D57825 --- db/db_bloom_filter_test.cc | 38 ++++++++++++++++++++++--------- table/block_based_filter_block.cc | 5 ++-- table/block_based_filter_block.h | 1 - table/block_based_table_reader.cc | 4 ++-- table/filter_block.h | 15 +++++++++--- table/full_filter_block.cc | 9 ++++---- table/full_filter_block.h | 1 - 7 files changed, 47 insertions(+), 26 deletions(-) diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 731d523e1..47915c96a 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -20,15 +20,29 @@ class DBBloomFilterTest : public DBTestBase { DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} }; +class DBBloomFilterTestWithParam : public DBTestBase, + public testing::WithParamInterface { + protected: + bool use_block_based_filter_; + + public: + DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {} + + ~DBBloomFilterTestWithParam() {} + + void SetUp() override { use_block_based_filter_ = GetParam(); } +}; + // KeyMayExist can lead to a few false positives, but not false negatives. // To make test deterministic, use a much larger number of bits per key-20 than // bits in the key, so that false positives are eliminated -TEST_F(DBBloomFilterTest, KeyMayExist) { +TEST_P(DBBloomFilterTestWithParam, KeyMayExist) { do { ReadOptions ropts; std::string value; anon::OptionsOverride options_override; - options_override.filter_policy.reset(NewBloomFilterPolicy(20)); + options_override.filter_policy.reset( + NewBloomFilterPolicy(20, use_block_based_filter_)); Options options = CurrentOptions(options_override); options.statistics = rocksdb::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); @@ -89,10 +103,11 @@ TEST_F(DBBloomFilterTest, KeyMayExist) { // A delete is skipped for key if KeyMayExist(key) returns False // Tests Writebatch consistency and proper delete behaviour -TEST_F(DBBloomFilterTest, FilterDeletes) { +TEST_P(DBBloomFilterTestWithParam, FilterDeletes) { do { anon::OptionsOverride options_override; - options_override.filter_policy.reset(NewBloomFilterPolicy(20)); + options_override.filter_policy.reset( + NewBloomFilterPolicy(20, use_block_based_filter_)); Options options = CurrentOptions(options_override); options.filter_deletes = true; CreateAndReopenWithCF({"pikachu"}, options); @@ -315,7 +330,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); } -TEST_F(DBBloomFilterTest, BloomFilter) { +TEST_P(DBBloomFilterTestWithParam, BloomFilter) { do { Options options = CurrentOptions(); env_->count_random_reads_ = true; @@ -324,7 +339,8 @@ TEST_F(DBBloomFilterTest, BloomFilter) { // trigger reset of table_factory BlockBasedTableOptions table_options; table_options.no_block_cache = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, use_block_based_filter_)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); @@ -367,6 +383,9 @@ TEST_F(DBBloomFilterTest, BloomFilter) { } while (ChangeCompactOptions()); } +INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam, + ::testing::Bool()); + TEST_F(DBBloomFilterTest, BloomFilterRate) { while (ChangeFilterOptions()) { Options options = CurrentOptions(); @@ -685,13 +704,10 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTest) { ASSERT_EQ(0, perf_context.bloom_sst_miss_count); // check SST bloom stats - // NOTE: hits per get differs because of code paths differences - // in BlockBasedTable::Get() - int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1; ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); + ASSERT_EQ(1, perf_context.bloom_sst_hit_count); ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); + ASSERT_EQ(2, perf_context.bloom_sst_hit_count); ASSERT_EQ("NOT_FOUND", Get(key2)); ASSERT_EQ(1, perf_context.bloom_sst_miss_count); diff --git a/table/block_based_filter_block.cc b/table/block_based_filter_block.cc index 0685d54a5..427c9fe9c 100644 --- a/table/block_based_filter_block.cc +++ b/table/block_based_filter_block.cc @@ -160,12 +160,11 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() { BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt, bool whole_key_filtering, + const BlockBasedTableOptions& table_opt, bool _whole_key_filtering, BlockContents&& contents, Statistics* stats) - : FilterBlockReader(contents.data.size(), stats), + : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), policy_(table_opt.filter_policy.get()), prefix_extractor_(prefix_extractor), - whole_key_filtering_(whole_key_filtering), data_(nullptr), offset_(nullptr), num_(0), diff --git a/table/block_based_filter_block.h b/table/block_based_filter_block.h index d11786f21..ca3f10e78 100644 --- a/table/block_based_filter_block.h +++ b/table/block_based_filter_block.h @@ -92,7 +92,6 @@ class BlockBasedFilterBlockReader : public FilterBlockReader { private: const FilterPolicy* policy_; const SliceTransform* prefix_extractor_; - bool whole_key_filtering_; const char* data_; // Pointer to filter data (at block-start) const char* offset_; // Pointer to beginning of offset array (at block-end) size_t num_; // Number of entries in offset array diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 080f4a1b1..beb6f74ea 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1347,8 +1347,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options, return true; } Slice user_key = ExtractUserKey(internal_key); - if (!filter->KeyMayMatch(user_key)) { - return false; + if (filter->whole_key_filtering()) { + return filter->KeyMayMatch(user_key); } if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor && rep_->ioptions.prefix_extractor->InDomain(user_key) && diff --git a/table/filter_block.h b/table/filter_block.h index 0612c3cb2..1fe428ec5 100644 --- a/table/filter_block.h +++ b/table/filter_block.h @@ -65,9 +65,13 @@ class FilterBlockBuilder { // BlockBased/Full FilterBlock would be called in the same way. class FilterBlockReader { public: - explicit FilterBlockReader() : size_(0), statistics_(nullptr) {} - explicit FilterBlockReader(size_t s, Statistics* stats) - : size_(s), statistics_(stats) {} + explicit FilterBlockReader() + : whole_key_filtering_(true), size_(0), statistics_(nullptr) {} + explicit FilterBlockReader(size_t s, Statistics* stats, + bool _whole_key_filtering) + : whole_key_filtering_(_whole_key_filtering), + size_(s), + statistics_(stats) {} virtual ~FilterBlockReader() {} virtual bool IsBlockBased() = 0; // If is blockbased filter @@ -79,12 +83,17 @@ class FilterBlockReader { virtual size_t size() const { return size_; } virtual Statistics* statistics() const { return statistics_; } + bool whole_key_filtering() const { return whole_key_filtering_; } + // convert this object to a human readable form virtual std::string ToString() const { std::string error_msg("Unsupported filter \n"); return error_msg; } + protected: + bool whole_key_filtering_; + private: // No copying allowed FilterBlockReader(const FilterBlockReader&); diff --git a/table/full_filter_block.cc b/table/full_filter_block.cc index 1774c2d79..1c89cc1c4 100644 --- a/table/full_filter_block.cc +++ b/table/full_filter_block.cc @@ -53,22 +53,21 @@ Slice FullFilterBlockBuilder::Finish() { } FullFilterBlockReader::FullFilterBlockReader( - const SliceTransform* prefix_extractor, bool whole_key_filtering, + const SliceTransform* prefix_extractor, bool _whole_key_filtering, const Slice& contents, FilterBitsReader* filter_bits_reader, Statistics* stats) - : FilterBlockReader(contents.size(), stats), + : FilterBlockReader(contents.size(), stats, _whole_key_filtering), prefix_extractor_(prefix_extractor), - whole_key_filtering_(whole_key_filtering), contents_(contents) { assert(filter_bits_reader != nullptr); filter_bits_reader_.reset(filter_bits_reader); } FullFilterBlockReader::FullFilterBlockReader( - const SliceTransform* prefix_extractor, bool whole_key_filtering, + const SliceTransform* prefix_extractor, bool _whole_key_filtering, BlockContents&& contents, FilterBitsReader* filter_bits_reader, Statistics* stats) - : FullFilterBlockReader(prefix_extractor, whole_key_filtering, + : FullFilterBlockReader(prefix_extractor, _whole_key_filtering, contents.data, filter_bits_reader, stats) { block_contents_ = std::move(contents); } diff --git a/table/full_filter_block.h b/table/full_filter_block.h index 6634f505a..4aa357f8a 100644 --- a/table/full_filter_block.h +++ b/table/full_filter_block.h @@ -96,7 +96,6 @@ class FullFilterBlockReader : public FilterBlockReader { private: const SliceTransform* prefix_extractor_; - bool whole_key_filtering_; std::unique_ptr filter_bits_reader_; Slice contents_;