Summary:
1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file.
2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter.
3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type.
4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h.
5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc
Benchmark: base commit 1d23b5c470
Command:
db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
Read QPS increase for about 30% from 2230002 to 2991411.
Test Plan:
make all check
valgrind db_test
db_stress --use_block_based_filter = 0
./auto_sanity_test.sh
Reviewers: igor, yhchiang, ljin, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D20979
main
parent
9360cc690e
commit
0af157f9bf
@ -0,0 +1,102 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
|
// into a single filter block.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string> |
||||||
|
#include <memory> |
||||||
|
#include <vector> |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "table/filter_block.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
|
||||||
|
// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
//
|
||||||
|
// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp:
|
||||||
|
// (StartBlock Add*)* Finish
|
||||||
|
class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { |
||||||
|
public: |
||||||
|
BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt); |
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return true; } |
||||||
|
virtual void StartBlock(uint64_t block_offset) override; |
||||||
|
virtual void Add(const Slice& key) override; |
||||||
|
virtual Slice Finish() override; |
||||||
|
|
||||||
|
private: |
||||||
|
void AddKey(const Slice& key); |
||||||
|
void AddPrefix(const Slice& key); |
||||||
|
void GenerateFilter(); |
||||||
|
|
||||||
|
// important: all of these might point to invalid addresses
|
||||||
|
// at the time of destruction of this filter block. destructor
|
||||||
|
// should NOT dereference them.
|
||||||
|
const FilterPolicy* policy_; |
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
|
||||||
|
std::string entries_; // Flattened entry contents
|
||||||
|
std::vector<size_t> start_; // Starting index in entries_ of each entry
|
||||||
|
uint32_t added_to_start_; // To indicate if key is added
|
||||||
|
std::string result_; // Filter data computed so far
|
||||||
|
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
|
||||||
|
std::vector<uint32_t> filter_offsets_; |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&); |
||||||
|
void operator=(const BlockBasedFilterBlockBuilder&); |
||||||
|
}; |
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
class BlockBasedFilterBlockReader : public FilterBlockReader { |
||||||
|
public: |
||||||
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
|
BlockBasedFilterBlockReader(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
const Slice& contents, |
||||||
|
bool delete_contents_after_use = false); |
||||||
|
virtual bool IsBlockBased() override { return true; } |
||||||
|
virtual bool KeyMayMatch(const Slice& key, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual bool PrefixMayMatch(const Slice& prefix, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual size_t ApproximateMemoryUsage() const override; |
||||||
|
|
||||||
|
private: |
||||||
|
const FilterPolicy* policy_; |
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
const char* data_; // Pointer to filter data (at block-start)
|
||||||
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
|
size_t num_; // Number of entries in offset array
|
||||||
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
std::unique_ptr<const char[]> filter_data; |
||||||
|
|
||||||
|
bool MayMatch(const Slice& entry, uint64_t block_offset); |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&); |
||||||
|
void operator=(const BlockBasedFilterBlockReader&); |
||||||
|
}; |
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,242 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/block_based_filter_block.h" |
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// For testing: emit an array with one hash value per key
|
||||||
|
class TestHashFilter : public FilterPolicy { |
||||||
|
public: |
||||||
|
virtual const char* Name() const { |
||||||
|
return "TestHashFilter"; |
||||||
|
} |
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||||
|
for (int i = 0; i < n; i++) { |
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||||
|
PutFixed32(dst, h); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1); |
||||||
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||||
|
if (h == DecodeFixed32(filter.data() + i)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
class FilterBlockTest { |
||||||
|
public: |
||||||
|
TestHashFilter policy_; |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
FilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(new TestHashFilter()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(FilterBlockTest, EmptyBuilder) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100000)); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, SingleChunk) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
builder.StartBlock(100); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.Add("bar"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.StartBlock(200); |
||||||
|
builder.Add("box"); |
||||||
|
builder.StartBlock(300); |
||||||
|
builder.Add("hello"); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 100)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing", 100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other", 100)); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, MultiChunk) { |
||||||
|
BlockBasedFilterBlockBuilder builder(nullptr, table_options_); |
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder.StartBlock(0); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.StartBlock(2000); |
||||||
|
builder.Add("bar"); |
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder.StartBlock(3100); |
||||||
|
builder.Add("box"); |
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder.StartBlock(9000); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("hello"); |
||||||
|
|
||||||
|
Slice block = builder.Finish(); |
||||||
|
BlockBasedFilterBlockReader reader(nullptr, table_options_, block); |
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar", 2000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 0)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 0)); |
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 3100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 3100)); |
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("box", 4100)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("hello", 4100)); |
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box", 9000)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello", 9000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("foo", 9000)); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("bar", 9000)); |
||||||
|
} |
||||||
|
|
||||||
|
// Test for block based filter block
|
||||||
|
// use new interface in FilterPolicy to create filter builder/reader
|
||||||
|
class BlockBasedFilterBlockTest { |
||||||
|
public: |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
BlockBasedFilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10)); |
||||||
|
} |
||||||
|
|
||||||
|
~BlockBasedFilterBlockTest() {} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
Slice block = builder->Finish(); |
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, block); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100000)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
builder->StartBlock(100); |
||||||
|
builder->Add("foo"); |
||||||
|
builder->Add("bar"); |
||||||
|
builder->Add("box"); |
||||||
|
builder->StartBlock(200); |
||||||
|
builder->Add("box"); |
||||||
|
builder->StartBlock(300); |
||||||
|
builder->Add("hello"); |
||||||
|
Slice block = builder->Finish(); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, block); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 100)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("missing", 100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("other", 100)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { |
||||||
|
FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( |
||||||
|
nullptr, table_options_); |
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder->StartBlock(0); |
||||||
|
builder->Add("foo"); |
||||||
|
builder->StartBlock(2000); |
||||||
|
builder->Add("bar"); |
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder->StartBlock(3100); |
||||||
|
builder->Add("box"); |
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder->StartBlock(9000); |
||||||
|
builder->Add("box"); |
||||||
|
builder->Add("hello"); |
||||||
|
|
||||||
|
Slice block = builder->Finish(); |
||||||
|
FilterBlockReader* reader = new BlockBasedFilterBlockReader( |
||||||
|
nullptr, table_options_, block); |
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("foo", 0)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("bar", 2000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 0)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 0)); |
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 3100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 3100)); |
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("box", 4100)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("hello", 4100)); |
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("box", 9000)); |
||||||
|
ASSERT_TRUE(reader->KeyMayMatch("hello", 9000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("foo", 9000)); |
||||||
|
ASSERT_TRUE(!reader->KeyMayMatch("bar", 9000)); |
||||||
|
|
||||||
|
delete builder; |
||||||
|
delete reader; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
@ -1,139 +0,0 @@ |
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
//
|
|
||||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style license that can be
|
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
||||||
|
|
||||||
#include "table/filter_block.h" |
|
||||||
|
|
||||||
#include "rocksdb/filter_policy.h" |
|
||||||
#include "util/coding.h" |
|
||||||
#include "util/hash.h" |
|
||||||
#include "util/logging.h" |
|
||||||
#include "util/testharness.h" |
|
||||||
#include "util/testutil.h" |
|
||||||
|
|
||||||
namespace rocksdb { |
|
||||||
|
|
||||||
// For testing: emit an array with one hash value per key
|
|
||||||
class TestHashFilter : public FilterPolicy { |
|
||||||
public: |
|
||||||
virtual const char* Name() const { |
|
||||||
return "TestHashFilter"; |
|
||||||
} |
|
||||||
|
|
||||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
|
||||||
for (int i = 0; i < n; i++) { |
|
||||||
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
|
||||||
PutFixed32(dst, h); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
|
||||||
uint32_t h = Hash(key.data(), key.size(), 1); |
|
||||||
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
|
||||||
if (h == DecodeFixed32(filter.data() + i)) { |
|
||||||
return true; |
|
||||||
} |
|
||||||
} |
|
||||||
return false; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
class FilterBlockTest { |
|
||||||
public: |
|
||||||
const Comparator* comparator_; |
|
||||||
BlockBasedTableOptions table_options_; |
|
||||||
|
|
||||||
FilterBlockTest() |
|
||||||
: comparator_(BytewiseComparator()) { |
|
||||||
table_options_.filter_policy.reset(new TestHashFilter()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
TEST(FilterBlockTest, EmptyBuilder) { |
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
|
||||||
Slice block = builder.Finish(); |
|
||||||
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
|
||||||
FilterBlockReader reader(nullptr, table_options_, block); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); |
|
||||||
} |
|
||||||
|
|
||||||
TEST(FilterBlockTest, SingleChunk) { |
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
|
||||||
builder.StartBlock(100); |
|
||||||
builder.AddKey("foo"); |
|
||||||
builder.AddKey("bar"); |
|
||||||
builder.AddKey("box"); |
|
||||||
builder.StartBlock(200); |
|
||||||
builder.AddKey("box"); |
|
||||||
builder.StartBlock(300); |
|
||||||
builder.AddKey("hello"); |
|
||||||
Slice block = builder.Finish(); |
|
||||||
FilterBlockReader reader(nullptr, table_options_, block); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "box")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); |
|
||||||
} |
|
||||||
|
|
||||||
TEST(FilterBlockTest, MultiChunk) { |
|
||||||
FilterBlockBuilder builder(nullptr, table_options_, comparator_); |
|
||||||
|
|
||||||
// First filter
|
|
||||||
builder.StartBlock(0); |
|
||||||
builder.AddKey("foo"); |
|
||||||
builder.StartBlock(2000); |
|
||||||
builder.AddKey("bar"); |
|
||||||
|
|
||||||
// Second filter
|
|
||||||
builder.StartBlock(3100); |
|
||||||
builder.AddKey("box"); |
|
||||||
|
|
||||||
// Third filter is empty
|
|
||||||
|
|
||||||
// Last filter
|
|
||||||
builder.StartBlock(9000); |
|
||||||
builder.AddKey("box"); |
|
||||||
builder.AddKey("hello"); |
|
||||||
|
|
||||||
Slice block = builder.Finish(); |
|
||||||
FilterBlockReader reader(nullptr, table_options_, block); |
|
||||||
|
|
||||||
// Check first filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); |
|
||||||
|
|
||||||
// Check second filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); |
|
||||||
|
|
||||||
// Check third filter (empty)
|
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); |
|
||||||
|
|
||||||
// Check last filter
|
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); |
|
||||||
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); |
|
||||||
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); |
|
||||||
} |
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
||||||
|
|
||||||
int main(int argc, char** argv) { |
|
||||||
return rocksdb::test::RunAllTests(); |
|
||||||
} |
|
@ -0,0 +1,99 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/full_filter_block.h" |
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "port/port.h" |
||||||
|
#include "util/coding.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
FullFilterBlockBuilder::FullFilterBlockBuilder( |
||||||
|
const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
FilterBitsBuilder* filter_bits_builder) |
||||||
|
: prefix_extractor_(prefix_extractor), |
||||||
|
whole_key_filtering_(table_opt.whole_key_filtering), |
||||||
|
num_added_(0) { |
||||||
|
assert(filter_bits_builder != nullptr); |
||||||
|
filter_bits_builder_.reset(filter_bits_builder); |
||||||
|
} |
||||||
|
|
||||||
|
void FullFilterBlockBuilder::Add(const Slice& key) { |
||||||
|
if (whole_key_filtering_) { |
||||||
|
AddKey(key); |
||||||
|
} |
||||||
|
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { |
||||||
|
AddPrefix(key); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Add key to filter if needed
|
||||||
|
inline void FullFilterBlockBuilder::AddKey(const Slice& key) { |
||||||
|
filter_bits_builder_->AddKey(key); |
||||||
|
num_added_++; |
||||||
|
} |
||||||
|
|
||||||
|
// Add prefix to filter if needed
|
||||||
|
inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) { |
||||||
|
Slice prefix = prefix_extractor_->Transform(key); |
||||||
|
filter_bits_builder_->AddKey(prefix); |
||||||
|
num_added_++; |
||||||
|
} |
||||||
|
|
||||||
|
Slice FullFilterBlockBuilder::Finish() { |
||||||
|
if (num_added_ != 0) { |
||||||
|
num_added_ = 0; |
||||||
|
return filter_bits_builder_->Finish(&filter_data); |
||||||
|
} |
||||||
|
return Slice(); |
||||||
|
} |
||||||
|
|
||||||
|
FullFilterBlockReader::FullFilterBlockReader( |
||||||
|
const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
const Slice& contents, |
||||||
|
FilterBitsReader* filter_bits_reader, bool delete_contents_after_use) |
||||||
|
: prefix_extractor_(prefix_extractor), |
||||||
|
whole_key_filtering_(table_opt.whole_key_filtering), |
||||||
|
contents_(contents) { |
||||||
|
assert(filter_bits_reader != nullptr); |
||||||
|
filter_bits_reader_.reset(filter_bits_reader); |
||||||
|
|
||||||
|
if (delete_contents_after_use) { |
||||||
|
filter_data.reset(contents.data()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
bool FullFilterBlockReader::KeyMayMatch(const Slice& key, |
||||||
|
uint64_t block_offset) { |
||||||
|
assert(block_offset == kNotValid); |
||||||
|
if (!whole_key_filtering_) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
return MayMatch(key); |
||||||
|
} |
||||||
|
|
||||||
|
bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix, |
||||||
|
uint64_t block_offset) { |
||||||
|
assert(block_offset == kNotValid); |
||||||
|
if (!prefix_extractor_) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
return MayMatch(prefix); |
||||||
|
} |
||||||
|
|
||||||
|
bool FullFilterBlockReader::MayMatch(const Slice& entry) { |
||||||
|
if (contents_.size() != 0) { |
||||||
|
return filter_bits_reader_->MayMatch(entry); |
||||||
|
} |
||||||
|
return true; // remain the same with block_based filter
|
||||||
|
} |
||||||
|
|
||||||
|
size_t FullFilterBlockReader::ApproximateMemoryUsage() const { |
||||||
|
return contents_.size(); |
||||||
|
} |
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,107 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <memory> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/slice.h" |
||||||
|
#include "rocksdb/slice_transform.h" |
||||||
|
#include "db/dbformat.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "table/filter_block.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class FilterPolicy; |
||||||
|
class FilterBitsBuilder; |
||||||
|
class FilterBitsReader; |
||||||
|
|
||||||
|
// A FullFilterBlockBuilder is used to construct a full filter for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
// The format of full filter block is:
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// | full filter for all keys in sst file |
|
||||||
|
// +----------------------------------------------------------------+
|
||||||
|
// The full filter can be very large. At the end of it, we put
|
||||||
|
// num_probes: how many hash functions are used in bloom filter
|
||||||
|
//
|
||||||
|
class FullFilterBlockBuilder : public FilterBlockBuilder { |
||||||
|
public: |
||||||
|
explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
FilterBitsBuilder* filter_bits_builder); |
||||||
|
// bits_builder is created in filter_policy, it should be passed in here
|
||||||
|
// directly. and be deleted here
|
||||||
|
~FullFilterBlockBuilder() {} |
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return false; } |
||||||
|
virtual void StartBlock(uint64_t block_offset) override {} |
||||||
|
virtual void Add(const Slice& key) override; |
||||||
|
virtual Slice Finish() override; |
||||||
|
|
||||||
|
private: |
||||||
|
// important: all of these might point to invalid addresses
|
||||||
|
// at the time of destruction of this filter block. destructor
|
||||||
|
// should NOT dereference them.
|
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
|
||||||
|
uint32_t num_added_; |
||||||
|
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_; |
||||||
|
std::unique_ptr<const char[]> filter_data; |
||||||
|
|
||||||
|
void AddKey(const Slice& key); |
||||||
|
void AddPrefix(const Slice& key); |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FullFilterBlockBuilder(const FullFilterBlockBuilder&); |
||||||
|
void operator=(const FullFilterBlockBuilder&); |
||||||
|
}; |
||||||
|
|
||||||
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
// KeyMayMatch and PrefixMayMatch would trigger filter checking
|
||||||
|
class FullFilterBlockReader : public FilterBlockReader { |
||||||
|
public: |
||||||
|
// REQUIRES: "contents" and filter_bits_reader must stay live
|
||||||
|
// while *this is live.
|
||||||
|
explicit FullFilterBlockReader(const SliceTransform* prefix_extractor, |
||||||
|
const BlockBasedTableOptions& table_opt, |
||||||
|
const Slice& contents, |
||||||
|
FilterBitsReader* filter_bits_reader, |
||||||
|
bool delete_contents_after_use = false); |
||||||
|
|
||||||
|
// bits_reader is created in filter_policy, it should be passed in here
|
||||||
|
// directly. and be deleted here
|
||||||
|
~FullFilterBlockReader() {} |
||||||
|
|
||||||
|
virtual bool IsBlockBased() override { return false; } |
||||||
|
virtual bool KeyMayMatch(const Slice& key, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual bool PrefixMayMatch(const Slice& prefix, |
||||||
|
uint64_t block_offset = kNotValid) override; |
||||||
|
virtual size_t ApproximateMemoryUsage() const override; |
||||||
|
|
||||||
|
private: |
||||||
|
const SliceTransform* prefix_extractor_; |
||||||
|
bool whole_key_filtering_; |
||||||
|
|
||||||
|
std::unique_ptr<FilterBitsReader> filter_bits_reader_; |
||||||
|
Slice contents_; |
||||||
|
std::unique_ptr<const char[]> filter_data; |
||||||
|
|
||||||
|
bool MayMatch(const Slice& entry); |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FullFilterBlockReader(const FullFilterBlockReader&); |
||||||
|
void operator=(const FullFilterBlockReader&); |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,181 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/full_filter_block.h" |
||||||
|
|
||||||
|
#include "rocksdb/filter_policy.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class TestFilterBitsBuilder : public FilterBitsBuilder { |
||||||
|
public: |
||||||
|
explicit TestFilterBitsBuilder() {} |
||||||
|
|
||||||
|
// Add Key to filter
|
||||||
|
virtual void AddKey(const Slice& key) override { |
||||||
|
hash_entries_.push_back(Hash(key.data(), key.size(), 1)); |
||||||
|
} |
||||||
|
|
||||||
|
// Generate the filter using the keys that are added
|
||||||
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override { |
||||||
|
uint32_t len = hash_entries_.size() * 4; |
||||||
|
char* data = new char[len]; |
||||||
|
for (size_t i = 0; i < hash_entries_.size(); i++) { |
||||||
|
EncodeFixed32(data + i * 4, hash_entries_[i]); |
||||||
|
} |
||||||
|
buf->reset(data); |
||||||
|
return Slice(data, len); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
std::vector<uint32_t> hash_entries_; |
||||||
|
}; |
||||||
|
|
||||||
|
class TestFilterBitsReader : public FilterBitsReader { |
||||||
|
public: |
||||||
|
explicit TestFilterBitsReader(const Slice& contents) |
||||||
|
: data_(contents.data()), len_(contents.size()) {} |
||||||
|
|
||||||
|
virtual bool MayMatch(const Slice& entry) override { |
||||||
|
uint32_t h = Hash(entry.data(), entry.size(), 1); |
||||||
|
for (size_t i = 0; i + 4 <= len_; i += 4) { |
||||||
|
if (h == DecodeFixed32(data_ + i)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
const char* data_; |
||||||
|
uint32_t len_; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
class TestHashFilter : public FilterPolicy { |
||||||
|
public: |
||||||
|
virtual const char* Name() const { |
||||||
|
return "TestHashFilter"; |
||||||
|
} |
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||||
|
for (int i = 0; i < n; i++) { |
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||||
|
PutFixed32(dst, h); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1); |
||||||
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { |
||||||
|
if (h == DecodeFixed32(filter.data() + i)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
virtual FilterBitsBuilder* GetFilterBitsBuilder() const override { |
||||||
|
return new TestFilterBitsBuilder(); |
||||||
|
} |
||||||
|
|
||||||
|
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) |
||||||
|
const override { |
||||||
|
return new TestFilterBitsReader(contents); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
class PluginFullFilterBlockTest { |
||||||
|
public: |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
PluginFullFilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(new TestHashFilter()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(PluginFullFilterBlockTest, PluginEmptyBuilder) { |
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
ASSERT_EQ("", EscapeString(block)); |
||||||
|
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||||
|
// Remain same symantic with blockbased filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(PluginFullFilterBlockTest, PluginSingleChunk) { |
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.Add("bar"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("hello"); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing")); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other")); |
||||||
|
} |
||||||
|
|
||||||
|
class FullFilterBlockTest { |
||||||
|
public: |
||||||
|
BlockBasedTableOptions table_options_; |
||||||
|
|
||||||
|
FullFilterBlockTest() { |
||||||
|
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
||||||
|
} |
||||||
|
|
||||||
|
~FullFilterBlockTest() {} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(FullFilterBlockTest, EmptyBuilder) { |
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
ASSERT_EQ("", EscapeString(block)); |
||||||
|
|
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||||
|
// Remain same symantic with blockbased filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FullFilterBlockTest, SingleChunk) { |
||||||
|
FullFilterBlockBuilder builder(nullptr, table_options_, |
||||||
|
table_options_.filter_policy->GetFilterBitsBuilder()); |
||||||
|
builder.Add("foo"); |
||||||
|
builder.Add("bar"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("box"); |
||||||
|
builder.Add("hello"); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
FullFilterBlockReader reader(nullptr, table_options_, block, |
||||||
|
table_options_.filter_policy->GetFilterBitsReader(block)); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("bar")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("box")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("hello")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch("foo")); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("missing")); |
||||||
|
ASSERT_TRUE(!reader.KeyMayMatch("other")); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
Loading…
Reference in new issue