BlockBasedTable::FullFilterKeyMayMatch() Should skip prefix bloom if full key bloom exists

Summary: Currently, if users define both of full key bloom and prefix bloom in SST files. During Get(), if full key bloom shows the key may exist, we still go ahead and check prefix bloom. This is wasteful. If bloom filter for full keys exists, we should always ignore prefix bloom in Get().

Test Plan: Run existing tests

Reviewers: yhchiang, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57825
main
sdong 9 years ago
parent 2d05eaeb28
commit 5009b5326b
  1. 38
      db/db_bloom_filter_test.cc
  2. 5
      table/block_based_filter_block.cc
  3. 1
      table/block_based_filter_block.h
  4. 4
      table/block_based_table_reader.cc
  5. 15
      table/filter_block.h
  6. 9
      table/full_filter_block.cc
  7. 1
      table/full_filter_block.h

@ -20,15 +20,29 @@ class DBBloomFilterTest : public DBTestBase {
DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {}
}; };
class DBBloomFilterTestWithParam : public DBTestBase,
public testing::WithParamInterface<bool> {
protected:
bool use_block_based_filter_;
public:
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
~DBBloomFilterTestWithParam() {}
void SetUp() override { use_block_based_filter_ = GetParam(); }
};
// KeyMayExist can lead to a few false positives, but not false negatives. // KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than // To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated // bits in the key, so that false positives are eliminated
TEST_F(DBBloomFilterTest, KeyMayExist) { TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
do { do {
ReadOptions ropts; ReadOptions ropts;
std::string value; std::string value;
anon::OptionsOverride options_override; anon::OptionsOverride options_override;
options_override.filter_policy.reset(NewBloomFilterPolicy(20)); options_override.filter_policy.reset(
NewBloomFilterPolicy(20, use_block_based_filter_));
Options options = CurrentOptions(options_override); Options options = CurrentOptions(options_override);
options.statistics = rocksdb::CreateDBStatistics(); options.statistics = rocksdb::CreateDBStatistics();
CreateAndReopenWithCF({"pikachu"}, options); CreateAndReopenWithCF({"pikachu"}, options);
@ -89,10 +103,11 @@ TEST_F(DBBloomFilterTest, KeyMayExist) {
// A delete is skipped for key if KeyMayExist(key) returns False // A delete is skipped for key if KeyMayExist(key) returns False
// Tests Writebatch consistency and proper delete behaviour // Tests Writebatch consistency and proper delete behaviour
TEST_F(DBBloomFilterTest, FilterDeletes) { TEST_P(DBBloomFilterTestWithParam, FilterDeletes) {
do { do {
anon::OptionsOverride options_override; anon::OptionsOverride options_override;
options_override.filter_policy.reset(NewBloomFilterPolicy(20)); options_override.filter_policy.reset(
NewBloomFilterPolicy(20, use_block_based_filter_));
Options options = CurrentOptions(options_override); Options options = CurrentOptions(options_override);
options.filter_deletes = true; options.filter_deletes = true;
CreateAndReopenWithCF({"pikachu"}, options); CreateAndReopenWithCF({"pikachu"}, options);
@ -315,7 +330,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12);
} }
TEST_F(DBBloomFilterTest, BloomFilter) { TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
do { do {
Options options = CurrentOptions(); Options options = CurrentOptions();
env_->count_random_reads_ = true; env_->count_random_reads_ = true;
@ -324,7 +339,8 @@ TEST_F(DBBloomFilterTest, BloomFilter) {
// trigger reset of table_factory // trigger reset of table_factory
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.no_block_cache = true; table_options.no_block_cache = true;
table_options.filter_policy.reset(NewBloomFilterPolicy(10)); table_options.filter_policy.reset(
NewBloomFilterPolicy(10, use_block_based_filter_));
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu"}, options); CreateAndReopenWithCF({"pikachu"}, options);
@ -367,6 +383,9 @@ TEST_F(DBBloomFilterTest, BloomFilter) {
} while (ChangeCompactOptions()); } while (ChangeCompactOptions());
} }
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
::testing::Bool());
TEST_F(DBBloomFilterTest, BloomFilterRate) { TEST_F(DBBloomFilterTest, BloomFilterRate) {
while (ChangeFilterOptions()) { while (ChangeFilterOptions()) {
Options options = CurrentOptions(); Options options = CurrentOptions();
@ -685,13 +704,10 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
ASSERT_EQ(0, perf_context.bloom_sst_miss_count); ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
// check SST bloom stats // check SST bloom stats
// NOTE: hits per get differs because of code paths differences
// in BlockBasedTable::Get()
int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1;
ASSERT_EQ(value1, Get(key1)); ASSERT_EQ(value1, Get(key1));
ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
ASSERT_EQ(value3, Get(key3)); ASSERT_EQ(value3, Get(key3));
ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
ASSERT_EQ("NOT_FOUND", Get(key2)); ASSERT_EQ("NOT_FOUND", Get(key2));
ASSERT_EQ(1, perf_context.bloom_sst_miss_count); ASSERT_EQ(1, perf_context.bloom_sst_miss_count);

@ -160,12 +160,11 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() {
BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
const SliceTransform* prefix_extractor, const SliceTransform* prefix_extractor,
const BlockBasedTableOptions& table_opt, bool whole_key_filtering, const BlockBasedTableOptions& table_opt, bool _whole_key_filtering,
BlockContents&& contents, Statistics* stats) BlockContents&& contents, Statistics* stats)
: FilterBlockReader(contents.data.size(), stats), : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
policy_(table_opt.filter_policy.get()), policy_(table_opt.filter_policy.get()),
prefix_extractor_(prefix_extractor), prefix_extractor_(prefix_extractor),
whole_key_filtering_(whole_key_filtering),
data_(nullptr), data_(nullptr),
offset_(nullptr), offset_(nullptr),
num_(0), num_(0),

@ -92,7 +92,6 @@ class BlockBasedFilterBlockReader : public FilterBlockReader {
private: private:
const FilterPolicy* policy_; const FilterPolicy* policy_;
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
const char* data_; // Pointer to filter data (at block-start) const char* data_; // Pointer to filter data (at block-start)
const char* offset_; // Pointer to beginning of offset array (at block-end) const char* offset_; // Pointer to beginning of offset array (at block-end)
size_t num_; // Number of entries in offset array size_t num_; // Number of entries in offset array

@ -1347,8 +1347,8 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
return true; return true;
} }
Slice user_key = ExtractUserKey(internal_key); Slice user_key = ExtractUserKey(internal_key);
if (!filter->KeyMayMatch(user_key)) { if (filter->whole_key_filtering()) {
return false; return filter->KeyMayMatch(user_key);
} }
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor && if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
rep_->ioptions.prefix_extractor->InDomain(user_key) && rep_->ioptions.prefix_extractor->InDomain(user_key) &&

@ -65,9 +65,13 @@ class FilterBlockBuilder {
// BlockBased/Full FilterBlock would be called in the same way. // BlockBased/Full FilterBlock would be called in the same way.
class FilterBlockReader { class FilterBlockReader {
public: public:
explicit FilterBlockReader() : size_(0), statistics_(nullptr) {} explicit FilterBlockReader()
explicit FilterBlockReader(size_t s, Statistics* stats) : whole_key_filtering_(true), size_(0), statistics_(nullptr) {}
: size_(s), statistics_(stats) {} explicit FilterBlockReader(size_t s, Statistics* stats,
bool _whole_key_filtering)
: whole_key_filtering_(_whole_key_filtering),
size_(s),
statistics_(stats) {}
virtual ~FilterBlockReader() {} virtual ~FilterBlockReader() {}
virtual bool IsBlockBased() = 0; // If is blockbased filter virtual bool IsBlockBased() = 0; // If is blockbased filter
@ -79,12 +83,17 @@ class FilterBlockReader {
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
virtual Statistics* statistics() const { return statistics_; } virtual Statistics* statistics() const { return statistics_; }
bool whole_key_filtering() const { return whole_key_filtering_; }
// convert this object to a human readable form // convert this object to a human readable form
virtual std::string ToString() const { virtual std::string ToString() const {
std::string error_msg("Unsupported filter \n"); std::string error_msg("Unsupported filter \n");
return error_msg; return error_msg;
} }
protected:
bool whole_key_filtering_;
private: private:
// No copying allowed // No copying allowed
FilterBlockReader(const FilterBlockReader&); FilterBlockReader(const FilterBlockReader&);

@ -53,22 +53,21 @@ Slice FullFilterBlockBuilder::Finish() {
} }
FullFilterBlockReader::FullFilterBlockReader( FullFilterBlockReader::FullFilterBlockReader(
const SliceTransform* prefix_extractor, bool whole_key_filtering, const SliceTransform* prefix_extractor, bool _whole_key_filtering,
const Slice& contents, FilterBitsReader* filter_bits_reader, const Slice& contents, FilterBitsReader* filter_bits_reader,
Statistics* stats) Statistics* stats)
: FilterBlockReader(contents.size(), stats), : FilterBlockReader(contents.size(), stats, _whole_key_filtering),
prefix_extractor_(prefix_extractor), prefix_extractor_(prefix_extractor),
whole_key_filtering_(whole_key_filtering),
contents_(contents) { contents_(contents) {
assert(filter_bits_reader != nullptr); assert(filter_bits_reader != nullptr);
filter_bits_reader_.reset(filter_bits_reader); filter_bits_reader_.reset(filter_bits_reader);
} }
FullFilterBlockReader::FullFilterBlockReader( FullFilterBlockReader::FullFilterBlockReader(
const SliceTransform* prefix_extractor, bool whole_key_filtering, const SliceTransform* prefix_extractor, bool _whole_key_filtering,
BlockContents&& contents, FilterBitsReader* filter_bits_reader, BlockContents&& contents, FilterBitsReader* filter_bits_reader,
Statistics* stats) Statistics* stats)
: FullFilterBlockReader(prefix_extractor, whole_key_filtering, : FullFilterBlockReader(prefix_extractor, _whole_key_filtering,
contents.data, filter_bits_reader, stats) { contents.data, filter_bits_reader, stats) {
block_contents_ = std::move(contents); block_contents_ = std::move(contents);
} }

@ -96,7 +96,6 @@ class FullFilterBlockReader : public FilterBlockReader {
private: private:
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
std::unique_ptr<FilterBitsReader> filter_bits_reader_; std::unique_ptr<FilterBitsReader> filter_bits_reader_;
Slice contents_; Slice contents_;

Loading…
Cancel
Save