From e63140d52baff154a502452aa94baef5bcec87b6 Mon Sep 17 00:00:00 2001 From: sdong Date: Mon, 2 Feb 2015 17:42:57 -0800 Subject: [PATCH] Get() to use prefix bloom filter when filter is not block based Summary: Get() now doesn't make use of bloom filter if it is prefix based. Add the check. Didn't touch block based bloom filter. I can't fully reason whether it is correct to do that. But it's straight-forward to for full bloom filter. Test Plan: make all check Add a test case in DBTest Reviewers: rven, yhchiang, igor Reviewed By: igor Subscribers: MarkCallaghan, leveldb, dhruba, yoshinorim Differential Revision: https://reviews.facebook.net/D31941 --- HISTORY.md | 1 + db/db_test.cc | 35 +++++++++++++++++++++++++++++++ table/block_based_table_reader.cc | 20 ++++++++++++++++-- table/block_based_table_reader.h | 3 +++ 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c688585e5..3502df3ea 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -13,6 +13,7 @@ * GetThreadStatus() is now able to report compaction activity. * MemEnv (env that stores data in memory) is now available in default library build. You can create it by calling NewMemEnv(). * Add SliceTransform.SameResultWhenAppended() to help users determine it is safe to apply prefix bloom/hash. +* Block based table now makes use of prefix bloom filter if it is a full fulter. ### Public API changes * Deprecated skip_log_error_on_recovery option diff --git a/db/db_test.cc b/db/db_test.cc index 5ae209ad6..4271e3281 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1976,6 +1976,41 @@ TEST(DBTest, FilterDeletes) { } while (ChangeCompactOptions()); } +TEST(DBTest, GetFilterByPrefixBloom) { + Options options = last_options_; + options.prefix_extractor.reset(NewFixedPrefixTransform(8)); + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + + WriteOptions wo; + ReadOptions ro; + FlushOptions fo; + fo.wait = true; + std::string value; + + ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); + ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); + ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); + + dbfull()->Flush(fo); + + ASSERT_EQ("foo", Get("barbarbar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ("foo2", Get("barbarbar2")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); + + ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); + + ASSERT_EQ("NOT_FOUND", Get("foobarbar")); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); +} TEST(DBTest, IterSeekBeforePrev) { ASSERT_OK(Put("a", "b")); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 8747d83d7..f03ab2b4b 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1104,6 +1104,23 @@ Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, NewIndexIterator(read_options), arena); } +bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter, + const Slice& internal_key) const { + if (filter == nullptr || filter->IsBlockBased()) { + return true; + } + Slice user_key = ExtractUserKey(internal_key); + if (!filter->KeyMayMatch(user_key)) { + return false; + } + if (rep_->ioptions.prefix_extractor && + !filter->PrefixMayMatch( + rep_->ioptions.prefix_extractor->Transform(user_key))) { + return false; + } + return true; +} + Status BlockBasedTable::Get( const ReadOptions& read_options, const Slice& key, GetContext* get_context) { @@ -1113,8 +1130,7 @@ Status BlockBasedTable::Get( // First check the full filter // If full filter not useful, Then go into each block - if (filter != nullptr && !filter->IsBlockBased() - && !filter->KeyMayMatch(ExtractUserKey(key))) { + if (!FullFilterKeyMayMatch(filter, key)) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); } else { BlockIter iiter; diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 2902aa441..e3594cf7c 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -180,6 +180,9 @@ class BlockBasedTable : public TableReader { Status CreateIndexReader(IndexReader** index_reader, Iterator* preloaded_meta_index_iter = nullptr); + bool FullFilterKeyMayMatch(FilterBlockReader* filter, + const Slice& user_key) const; + // Read the meta block from sst. static Status ReadMetaBlock( Rep* rep,