Summary:
1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file.
2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter.
3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type.
4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h.
5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc
Benchmark: base commit 1d23b5c470
Command:
db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
Read QPS increase for about 30% from 2230002 to 2991411.
Test Plan:
make all check
valgrind db_test
db_stress --use_block_based_filter = 0
./auto_sanity_test.sh
Reviewers: igor, yhchiang, ljin, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D20979
main
parent
9360cc690e
commit
0af157f9bf
@ -0,0 +1,102 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// A filter block is stored near the end of a Table file. It contains
|
||||
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||
// into a single filter block.
|
||||
|
||||
#pragma once |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
#include <string> |
||||
#include <memory> |
||||
#include <vector> |
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/slice.h" |
||||
#include "rocksdb/slice_transform.h" |
||||
#include "table/filter_block.h" |
||||
#include "util/hash.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
|
||||
// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a
|
||||
// particular Table. It generates a single string which is stored as
|
||||
// a special block in the Table.
|
||||
//
|
||||
// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp:
|
||||
// (StartBlock Add*)* Finish
|
||||
class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { |
||||
public: |
||||
BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt); |
||||
|
||||
virtual bool IsBlockBased() override { return true; } |
||||
virtual void StartBlock(uint64_t block_offset) override; |
||||
virtual void Add(const Slice& key) override; |
||||
virtual Slice Finish() override; |
||||
|
||||
private: |
||||
void AddKey(const Slice& key); |
||||
void AddPrefix(const Slice& key); |
||||
void GenerateFilter(); |
||||
|
||||
// important: all of these might point to invalid addresses
|
||||
// at the time of destruction of this filter block. destructor
|
||||
// should NOT dereference them.
|
||||
const FilterPolicy* policy_; |
||||
const SliceTransform* prefix_extractor_; |
||||
bool whole_key_filtering_; |
||||
|
||||
std::string entries_; // Flattened entry contents
|
||||
std::vector<size_t> start_; // Starting index in entries_ of each entry
|
||||
uint32_t added_to_start_; // To indicate if key is added
|
||||
std::string result_; // Filter data computed so far
|
||||
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
|
||||
std::vector<uint32_t> filter_offsets_; |
||||
|
||||
// No copying allowed
|
||||
BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&); |
||||
void operator=(const BlockBasedFilterBlockBuilder&); |
||||
}; |
||||
|
||||
// A FilterBlockReader is used to parse filter from SST table.
|
||||
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||
class BlockBasedFilterBlockReader : public FilterBlockReader { |
||||
public: |
||||
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||
BlockBasedFilterBlockReader(const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt, |
||||
const Slice& contents, |
||||
bool delete_contents_after_use = false); |
||||
virtual bool IsBlockBased() override { return true; } |
||||
virtual bool KeyMayMatch(const Slice& key, |
||||
uint64_t block_offset = kNotValid) override; |
||||
virtual bool PrefixMayMatch(const Slice& prefix, |
||||
uint64_t block_offset = kNotValid) override; |
||||
virtual size_t ApproximateMemoryUsage() const override; |
||||
|
||||
private: |
||||
const FilterPolicy* policy_; |
||||
const SliceTransform* prefix_extractor_; |
||||
bool whole_key_filtering_; |
||||
const char* data_; // Pointer to filter data (at block-start)
|
||||
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||
size_t num_; // Number of entries in offset array
|
||||
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||
std::unique_ptr<const char[]> filter_data; |
||||
|
||||
bool MayMatch(const Slice& entry, uint64_t block_offset); |
||||
|
||||
// No copying allowed
|
||||
BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&); |
||||
void operator=(const BlockBasedFilterBlockReader&); |
||||
}; |
||||
} // namespace rocksdb
|
@ -0,0 +1,242 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "table/block_based_filter_block.h" |
||||
|
||||
#include "rocksdb/filter_policy.h" |
||||
#include "util/coding.h" |
||||
#include "util/hash.h" |
||||
#include "util/logging.h" |
||||
#include "util/testharness.h" |
||||
#include "util/testutil.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// For testing: emit an array with one hash value per key
|
||||
class TestHashFilter : public FilterPolicy { |
||||
public: |
||||
virtual const char* Name() const { |
||||
return "TestHashFilter"; |
||||
} |
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||
for (int i = 0; i < n; i++) { |
||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||
PutFixed32(dst, h); |
||||
} |
||||
} |
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||
uint32_t h = Hash(key.data(), key.size(), 1); |
||||
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||
if (h == DecodeFixed32(filter.data() + i)) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
}; |
||||
|
||||
class FilterBlockTest { |
||||
public: |
||||
TestHashFilter policy_; |
||||
BlockBasedTableOptions table_options_; |
||||
|
||||
FilterBlockTest() { |
||||
table_options_.filter_policy.reset(new TestHashFilter()); |
||||
} |
||||
}; |
||||
|
||||
TEST(FilterBlockTest, EmptyBuilder) { |
||||
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||
Slice block = builder.Finish(); |
||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo", 100000)); |
||||
} |
||||
|
||||
TEST(FilterBlockTest, SingleChunk) { |
||||
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||
builder.StartBlock(100); |
||||
builder.Add("foo"); |
||||
builder.Add("bar"); |
||||
builder.Add("box"); |
||||
builder.StartBlock(200); |
||||
builder.Add("box"); |
||||
builder.StartBlock(300); |
||||
builder.Add("hello"); |
||||
Slice block = builder.Finish(); |
||||
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("bar", 100)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("box", 100)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("hello", 100)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("missing", 100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("other", 100)); |
||||
} |
||||
|
||||
TEST(FilterBlockTest, MultiChunk) { |
||||
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||
|
||||
// First filter
|
||||
builder.StartBlock(0); |
||||
builder.Add("foo"); |
||||
builder.StartBlock(2000); |
||||
builder.Add("bar"); |
||||
|
||||
// Second filter
|
||||
builder.StartBlock(3100); |
||||
builder.Add("box"); |
||||
|
||||
// Third filter is empty
|
||||
|
||||
// Last filter
|
||||
builder.StartBlock(9000); |
||||
builder.Add("box"); |
||||
builder.Add("hello"); |
||||
|
||||
Slice block = builder.Finish(); |
||||
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||
|
||||
// Check first filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("bar", 2000)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("box", 0)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("hello", 0)); |
||||
|
||||
// Check second filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch("box", 3100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("foo", 3100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("bar", 3100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("hello", 3100)); |
||||
|
||||
// Check third filter (empty)
|
||||
ASSERT_TRUE(!reader.KeyMayMatch("foo", 4100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("bar", 4100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("box", 4100)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("hello", 4100)); |
||||
|
||||
// Check last filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch("box", 9000)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("hello", 9000)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("foo", 9000)); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("bar", 9000)); |
||||
} |
||||
|
||||
// Test for block based filter block
|
||||
// use new interface in FilterPolicy to create filter builder/reader
|
||||
class BlockBasedFilterBlockTest { |
||||
public: |
||||
BlockBasedTableOptions table_options_; |
||||
|
||||
BlockBasedFilterBlockTest() { |
||||
table_options_.filter_policy.reset(NewBloomFilterPolicy(10)); |
||||
} |
||||
|
||||
~BlockBasedFilterBlockTest() {} |
||||
}; |
||||
|
||||
TEST(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { |
||||
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||
nullptr, table_options_); |
||||
Slice block = builder->Finish(); |
||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||
nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("foo", 100000)); |
||||
|
||||
delete builder; |
||||
delete reader; |
||||
} |
||||
|
||||
TEST(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { |
||||
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||
nullptr, table_options_); |
||||
builder->StartBlock(100); |
||||
builder->Add("foo"); |
||||
builder->Add("bar"); |
||||
builder->Add("box"); |
||||
builder->StartBlock(200); |
||||
builder->Add("box"); |
||||
builder->StartBlock(300); |
||||
builder->Add("hello"); |
||||
Slice block = builder->Finish(); |
||||
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||
nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("bar", 100)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("box", 100)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("hello", 100)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("missing", 100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("other", 100)); |
||||
|
||||
delete builder; |
||||
delete reader; |
||||
} |
||||
|
||||
TEST(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { |
||||
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||
nullptr, table_options_); |
||||
|
||||
// First filter
|
||||
builder->StartBlock(0); |
||||
builder->Add("foo"); |
||||
builder->StartBlock(2000); |
||||
builder->Add("bar"); |
||||
|
||||
// Second filter
|
||||
builder->StartBlock(3100); |
||||
builder->Add("box"); |
||||
|
||||
// Third filter is empty
|
||||
|
||||
// Last filter
|
||||
builder->StartBlock(9000); |
||||
builder->Add("box"); |
||||
builder->Add("hello"); |
||||
|
||||
Slice block = builder->Finish(); |
||||
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||
nullptr, table_options_, block); |
||||
|
||||
// Check first filter
|
||||
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("bar", 2000)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("box", 0)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("hello", 0)); |
||||
|
||||
// Check second filter
|
||||
ASSERT_TRUE(reader->KeyMayMatch("box", 3100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("foo", 3100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("bar", 3100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("hello", 3100)); |
||||
|
||||
// Check third filter (empty)
|
||||
ASSERT_TRUE(!reader->KeyMayMatch("foo", 4100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("bar", 4100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("box", 4100)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("hello", 4100)); |
||||
|
||||
// Check last filter
|
||||
ASSERT_TRUE(reader->KeyMayMatch("box", 9000)); |
||||
ASSERT_TRUE(reader->KeyMayMatch("hello", 9000)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("foo", 9000)); |
||||
ASSERT_TRUE(!reader->KeyMayMatch("bar", 9000)); |
||||
|
||||
delete builder; |
||||
delete reader; |
||||
} |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
@ -1,139 +0,0 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "table/filter_block.h" |
||||
|
||||
#include "rocksdb/filter_policy.h" |
||||
#include "util/coding.h" |
||||
#include "util/hash.h" |
||||
#include "util/logging.h" |
||||
#include "util/testharness.h" |
||||
#include "util/testutil.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// For testing: emit an array with one hash value per key
|
||||
class TestHashFilter : public FilterPolicy { |
||||
public: |
||||
virtual const char* Name() const { |
||||
return "TestHashFilter"; |
||||
} |
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||
for (int i = 0; i < n; i++) { |
||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||
PutFixed32(dst, h); |
||||
} |
||||
} |
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||
uint32_t h = Hash(key.data(), key.size(), 1); |
||||
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||
if (h == DecodeFixed32(filter.data() + i)) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
}; |
||||
|
||||
class FilterBlockTest { |
||||
public: |
||||
const Comparator* comparator_; |
||||
BlockBasedTableOptions table_options_; |
||||
|
||||
FilterBlockTest() |
||||
: comparator_(BytewiseComparator()) { |
||||
table_options_.filter_policy.reset(new TestHashFilter()); |
||||
} |
||||
}; |
||||
|
||||
TEST(FilterBlockTest, EmptyBuilder) { |
||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
||||
Slice block = builder.Finish(); |
||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||
FilterBlockReader reader(nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); |
||||
} |
||||
|
||||
TEST(FilterBlockTest, SingleChunk) { |
||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
||||
builder.StartBlock(100); |
||||
builder.AddKey("foo"); |
||||
builder.AddKey("bar"); |
||||
builder.AddKey("box"); |
||||
builder.StartBlock(200); |
||||
builder.AddKey("box"); |
||||
builder.StartBlock(300); |
||||
builder.AddKey("hello"); |
||||
Slice block = builder.Finish(); |
||||
FilterBlockReader reader(nullptr, table_options_, block); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "box")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); |
||||
} |
||||
|
||||
TEST(FilterBlockTest, MultiChunk) { |
||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
||||
|
||||
// First filter
|
||||
builder.StartBlock(0); |
||||
builder.AddKey("foo"); |
||||
builder.StartBlock(2000); |
||||
builder.AddKey("bar"); |
||||
|
||||
// Second filter
|
||||
builder.StartBlock(3100); |
||||
builder.AddKey("box"); |
||||
|
||||
// Third filter is empty
|
||||
|
||||
// Last filter
|
||||
builder.StartBlock(9000); |
||||
builder.AddKey("box"); |
||||
builder.AddKey("hello"); |
||||
|
||||
Slice block = builder.Finish(); |
||||
FilterBlockReader reader(nullptr, table_options_, block); |
||||
|
||||
// Check first filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); |
||||
|
||||
// Check second filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); |
||||
|
||||
// Check third filter (empty)
|
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); |
||||
|
||||
// Check last filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); |
||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); |
||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); |
||||
} |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { |
||||
return rocksdb::test::RunAllTests(); |
||||
} |
@ -0,0 +1,99 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include "table/full_filter_block.h" |
||||
|
||||
#include "rocksdb/filter_policy.h" |
||||
#include "port/port.h" |
||||
#include "util/coding.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
FullFilterBlockBuilder::FullFilterBlockBuilder( |
||||
const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt, |
||||
FilterBitsBuilder* filter_bits_builder) |
||||
: prefix_extractor_(prefix_extractor), |
||||
whole_key_filtering_(table_opt.whole_key_filtering), |
||||
num_added_(0) { |
||||
assert(filter_bits_builder != nullptr); |
||||
filter_bits_builder_.reset(filter_bits_builder); |
||||
} |
||||
|
||||
void FullFilterBlockBuilder::Add(const Slice& key) { |
||||
if (whole_key_filtering_) { |
||||
AddKey(key); |
||||
} |
||||
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { |
||||
AddPrefix(key); |
||||
} |
||||
} |
||||
|
||||
// Add key to filter if needed
|
||||
inline void FullFilterBlockBuilder::AddKey(const Slice& key) { |
||||
filter_bits_builder_->AddKey(key); |
||||
num_added_++; |
||||
} |
||||
|
||||
// Add prefix to filter if needed
|
||||
inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) { |
||||
Slice prefix = prefix_extractor_->Transform(key); |
||||
filter_bits_builder_->AddKey(prefix); |
||||
num_added_++; |
||||
} |
||||
|
||||
Slice FullFilterBlockBuilder::Finish() { |
||||
if (num_added_ != 0) { |
||||
num_added_ = 0; |
||||
return filter_bits_builder_->Finish(&filter_data); |
||||
} |
||||
return Slice(); |
||||
} |
||||
|
||||
FullFilterBlockReader::FullFilterBlockReader( |
||||
const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt, |
||||
const Slice& contents, |
||||
FilterBitsReader* filter_bits_reader, bool delete_contents_after_use) |
||||
: prefix_extractor_(prefix_extractor), |
||||
whole_key_filtering_(table_opt.whole_key_filtering), |
||||
contents_(contents) { |
||||
assert(filter_bits_reader != nullptr); |
||||
filter_bits_reader_.reset(filter_bits_reader); |
||||
|
||||
if (delete_contents_after_use) { |
||||
filter_data.reset(contents.data()); |
||||
} |
||||
} |
||||
|
||||
bool FullFilterBlockReader::KeyMayMatch(const Slice& key, |
||||
uint64_t block_offset) { |
||||
assert(block_offset == kNotValid); |
||||
if (!whole_key_filtering_) { |
||||
return true; |
||||
} |
||||
return MayMatch(key); |
||||
} |
||||
|
||||
bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix, |
||||
uint64_t block_offset) { |
||||
assert(block_offset == kNotValid); |
||||
if (!prefix_extractor_) { |
||||
return true; |
||||
} |
||||
return MayMatch(prefix); |
||||
} |
||||
|
||||
bool FullFilterBlockReader::MayMatch(const Slice& entry) { |
||||
if (contents_.size() != 0) { |
||||
return filter_bits_reader_->MayMatch(entry); |
||||
} |
||||
return true; // remain the same with block_based filter
|
||||
} |
||||
|
||||
size_t FullFilterBlockReader::ApproximateMemoryUsage() const { |
||||
return contents_.size(); |
||||
} |
||||
} // namespace rocksdb
|
@ -0,0 +1,107 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#pragma once |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <vector> |
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/slice.h" |
||||
#include "rocksdb/slice_transform.h" |
||||
#include "db/dbformat.h" |
||||
#include "util/hash.h" |
||||
#include "table/filter_block.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
class FilterPolicy; |
||||
class FilterBitsBuilder; |
||||
class FilterBitsReader; |
||||
|
||||
// A FullFilterBlockBuilder is used to construct a full filter for a
|
||||
// particular Table. It generates a single string which is stored as
|
||||
// a special block in the Table.
|
||||
// The format of full filter block is:
|
||||
// +----------------------------------------------------------------+
|
||||
// | full filter for all keys in sst file |
|
||||
// +----------------------------------------------------------------+
|
||||
// The full filter can be very large. At the end of it, we put
|
||||
// num_probes: how many hash functions are used in bloom filter
|
||||
//
|
||||
class FullFilterBlockBuilder : public FilterBlockBuilder { |
||||
public: |
||||
explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt, |
||||
FilterBitsBuilder* filter_bits_builder); |
||||
// bits_builder is created in filter_policy, it should be passed in here
|
||||
// directly. and be deleted here
|
||||
~FullFilterBlockBuilder() {} |
||||
|
||||
virtual bool IsBlockBased() override { return false; } |
||||
virtual void StartBlock(uint64_t block_offset) override {} |
||||
virtual void Add(const Slice& key) override; |
||||
virtual Slice Finish() override; |
||||
|
||||
private: |
||||
// important: all of these might point to invalid addresses
|
||||
// at the time of destruction of this filter block. destructor
|
||||
// should NOT dereference them.
|
||||
const SliceTransform* prefix_extractor_; |
||||
bool whole_key_filtering_; |
||||
|
||||
uint32_t num_added_; |
||||
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_; |
||||
std::unique_ptr<const char[]> filter_data; |
||||
|
||||
void AddKey(const Slice& key); |
||||
void AddPrefix(const Slice& key); |
||||
|
||||
// No copying allowed
|
||||
FullFilterBlockBuilder(const FullFilterBlockBuilder&); |
||||
void operator=(const FullFilterBlockBuilder&); |
||||
}; |
||||
|
||||
// A FilterBlockReader is used to parse filter from SST table.
|
||||
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||
class FullFilterBlockReader : public FilterBlockReader { |
||||
public: |
||||
// REQUIRES: "contents" and filter_bits_reader must stay live
|
||||
// while *this is live.
|
||||
explicit FullFilterBlockReader(const SliceTransform* prefix_extractor, |
||||
const BlockBasedTableOptions& table_opt, |
||||
const Slice& contents, |
||||
FilterBitsReader* filter_bits_reader, |
||||
bool delete_contents_after_use = false); |
||||
|
||||
// bits_reader is created in filter_policy, it should be passed in here
|
||||
// directly. and be deleted here
|
||||
~FullFilterBlockReader() {} |
||||
|
||||
virtual bool IsBlockBased() override { return false; } |
||||
virtual bool KeyMayMatch(const Slice& key, |
||||
uint64_t block_offset = kNotValid) override; |
||||
virtual bool PrefixMayMatch(const Slice& prefix, |
||||
uint64_t block_offset = kNotValid) override; |
||||
virtual size_t ApproximateMemoryUsage() const override; |
||||
|
||||
private: |
||||
const SliceTransform* prefix_extractor_; |
||||
bool whole_key_filtering_; |
||||
|
||||
std::unique_ptr<FilterBitsReader> filter_bits_reader_; |
||||
Slice contents_; |
||||
std::unique_ptr<const char[]> filter_data; |
||||
|
||||
bool MayMatch(const Slice& entry); |
||||
|
||||
// No copying allowed
|
||||
FullFilterBlockReader(const FullFilterBlockReader&); |
||||
void operator=(const FullFilterBlockReader&); |
||||
}; |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,181 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include "table/full_filter_block.h" |
||||
|
||||
#include "rocksdb/filter_policy.h" |
||||
#include "util/coding.h" |
||||
#include "util/hash.h" |
||||
#include "util/logging.h" |
||||
#include "util/testharness.h" |
||||
#include "util/testutil.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
class TestFilterBitsBuilder : public FilterBitsBuilder { |
||||
public: |
||||
explicit TestFilterBitsBuilder() {} |
||||
|
||||
// Add Key to filter
|
||||
virtual void AddKey(const Slice& key) override { |
||||
hash_entries_.push_back(Hash(key.data(), key.size(), 1)); |
||||
} |
||||
|
||||
// Generate the filter using the keys that are added
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override { |
||||
uint32_t len = hash_entries_.size() * 4; |
||||
char* data = new char[len]; |
||||
for (size_t i = 0; i < hash_entries_.size(); i++) { |
||||
EncodeFixed32(data + i * 4, hash_entries_[i]); |
||||
} |
||||
buf->reset(data); |
||||
return Slice(data, len); |
||||
} |
||||
|
||||
private: |
||||
std::vector<uint32_t> hash_entries_; |
||||
}; |
||||
|
||||
class TestFilterBitsReader : public FilterBitsReader { |
||||
public: |
||||
explicit TestFilterBitsReader(const Slice& contents) |
||||
: data_(contents.data()), len_(contents.size()) {} |
||||
|
||||
virtual bool MayMatch(const Slice& entry) override { |
||||
uint32_t h = Hash(entry.data(), entry.size(), 1); |
||||
for (size_t i = 0; i + 4 <= len_; i += 4) { |
||||
if (h == DecodeFixed32(data_ + i)) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
private: |
||||
const char* data_; |
||||
uint32_t len_; |
||||
}; |
||||
|
||||
|
||||
class TestHashFilter : public FilterPolicy { |
||||
public: |
||||
virtual const char* Name() const { |
||||
return "TestHashFilter"; |
||||
} |
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||
for (int i = 0; i < n; i++) { |
||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||
PutFixed32(dst, h); |
||||
} |
||||
} |
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||
uint32_t h = Hash(key.data(), key.size(), 1); |
||||
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||
if (h == DecodeFixed32(filter.data() + i)) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
virtual FilterBitsBuilder* GetFilterBitsBuilder() const override { |
||||
return new TestFilterBitsBuilder(); |
||||
} |
||||
|
||||
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) |
||||
const override { |
||||
return new TestFilterBitsReader(contents); |
||||
} |
||||
}; |
||||
|
||||
class PluginFullFilterBlockTest { |
||||
public: |
||||
BlockBasedTableOptions table_options_; |
||||
|
||||
PluginFullFilterBlockTest() { |
||||
table_options_.filter_policy.reset(new TestHashFilter()); |
||||
} |
||||
}; |
||||
|
||||
TEST(PluginFullFilterBlockTest, PluginEmptyBuilder) { |
||||
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||
Slice block = builder.Finish(); |
||||
ASSERT_EQ("", EscapeString(block)); |
||||
|
||||
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||
// Remain same symantic with blockbased filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
} |
||||
|
||||
TEST(PluginFullFilterBlockTest, PluginSingleChunk) { |
||||
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||
builder.Add("foo"); |
||||
builder.Add("bar"); |
||||
builder.Add("box"); |
||||
builder.Add("box"); |
||||
builder.Add("hello"); |
||||
Slice block = builder.Finish(); |
||||
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("bar")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("box")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("hello")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("missing")); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("other")); |
||||
} |
||||
|
||||
class FullFilterBlockTest { |
||||
public: |
||||
BlockBasedTableOptions table_options_; |
||||
|
||||
FullFilterBlockTest() { |
||||
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
||||
} |
||||
|
||||
~FullFilterBlockTest() {} |
||||
}; |
||||
|
||||
TEST(FullFilterBlockTest, EmptyBuilder) { |
||||
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||
Slice block = builder.Finish(); |
||||
ASSERT_EQ("", EscapeString(block)); |
||||
|
||||
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||
// Remain same symantic with blockbased filter
|
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
} |
||||
|
||||
TEST(FullFilterBlockTest, SingleChunk) { |
||||
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||
builder.Add("foo"); |
||||
builder.Add("bar"); |
||||
builder.Add("box"); |
||||
builder.Add("box"); |
||||
builder.Add("hello"); |
||||
Slice block = builder.Finish(); |
||||
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("bar")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("box")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("hello")); |
||||
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("missing")); |
||||
ASSERT_TRUE(!reader.KeyMayMatch("other")); |
||||
} |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
Loading…
Reference in new issue