In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.main
parent
bc1ee4d25e
commit
85584d497e
@ -1,5 +1,8 @@ |
|||||||
build_config.mk |
build_config.mk |
||||||
*.a |
*.a |
||||||
*.o |
*.o |
||||||
|
*.dylib* |
||||||
|
*.so |
||||||
|
*.so.* |
||||||
*_test |
*_test |
||||||
db_bench |
db_bench |
||||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,70 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A database can be configured with a custom FilterPolicy object.
|
||||||
|
// This object is responsible for creating a small filter from a set
|
||||||
|
// of keys. These filters are stored in leveldb and are consulted
|
||||||
|
// automatically by leveldb to decide whether or not to read some
|
||||||
|
// information from disk. In many cases, a filter can cut down the
|
||||||
|
// number of disk seeks form a handful to a single disk seek per
|
||||||
|
// DB::Get() call.
|
||||||
|
//
|
||||||
|
// Most people will want to use the builtin bloom filter support (see
|
||||||
|
// NewBloomFilterPolicy() below).
|
||||||
|
|
||||||
|
#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ |
||||||
|
#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ |
||||||
|
|
||||||
|
#include <string> |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
class Slice; |
||||||
|
|
||||||
|
class FilterPolicy { |
||||||
|
public: |
||||||
|
virtual ~FilterPolicy(); |
||||||
|
|
||||||
|
// Return the name of this policy. Note that if the filter encoding
|
||||||
|
// changes in an incompatible way, the name returned by this method
|
||||||
|
// must be changed. Otherwise, old incompatible filters may be
|
||||||
|
// passed to methods of this type.
|
||||||
|
virtual const char* Name() const = 0; |
||||||
|
|
||||||
|
// keys[0,n-1] contains a list of keys (potentially with duplicates)
|
||||||
|
// that are ordered according to the user supplied comparator.
|
||||||
|
// Append a filter that summarizes keys[0,n-1] to *dst.
|
||||||
|
//
|
||||||
|
// Warning: do not change the initial contents of *dst. Instead,
|
||||||
|
// append the newly constructed filter to *dst.
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) |
||||||
|
const = 0; |
||||||
|
|
||||||
|
// "filter" contains the data appended by a preceding call to
|
||||||
|
// CreateFilter() on this class. This method must return true if
|
||||||
|
// the key was in the list of keys passed to CreateFilter().
|
||||||
|
// This method may return true or false if the key was not on the
|
||||||
|
// list, but it should aim to return false with a high probability.
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
// Return a new filter policy that uses a bloom filter with approximately
|
||||||
|
// the specified number of bits per key. A good value for bits_per_key
|
||||||
|
// is 10, which yields a filter with ~ 1% false positive rate.
|
||||||
|
//
|
||||||
|
// Callers must delete the result after any database that is using the
|
||||||
|
// result has been closed.
|
||||||
|
//
|
||||||
|
// Note: if you are using a custom comparator that ignores some parts
|
||||||
|
// of the keys being compared, you must not use NewBloomFilterPolicy()
|
||||||
|
// and must provide your own FilterPolicy that also ignores the
|
||||||
|
// corresponding parts of the keys. For example, if the comparator
|
||||||
|
// ignores trailing spaces, it would be incorrect to use a
|
||||||
|
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
||||||
|
// trailing spaces in keys.
|
||||||
|
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
@ -0,0 +1,111 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/filter_block.h" |
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h" |
||||||
|
#include "util/coding.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
// See doc/table_format.txt for an explanation of the filter block format.
|
||||||
|
|
||||||
|
// Generate new filter every 2KB of data
|
||||||
|
static const size_t kFilterBaseLg = 11; |
||||||
|
static const size_t kFilterBase = 1 << kFilterBaseLg; |
||||||
|
|
||||||
|
FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) |
||||||
|
: policy_(policy) { |
||||||
|
} |
||||||
|
|
||||||
|
void FilterBlockBuilder::StartBlock(uint64_t block_offset) { |
||||||
|
uint64_t filter_index = (block_offset / kFilterBase); |
||||||
|
assert(filter_index >= filter_offsets_.size()); |
||||||
|
while (filter_index > filter_offsets_.size()) { |
||||||
|
GenerateFilter(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void FilterBlockBuilder::AddKey(const Slice& key) { |
||||||
|
Slice k = key; |
||||||
|
start_.push_back(keys_.size()); |
||||||
|
keys_.append(k.data(), k.size()); |
||||||
|
} |
||||||
|
|
||||||
|
Slice FilterBlockBuilder::Finish() { |
||||||
|
if (!start_.empty()) { |
||||||
|
GenerateFilter(); |
||||||
|
} |
||||||
|
|
||||||
|
// Append array of per-filter offsets
|
||||||
|
const uint32_t array_offset = result_.size(); |
||||||
|
for (size_t i = 0; i < filter_offsets_.size(); i++) { |
||||||
|
PutFixed32(&result_, filter_offsets_[i]); |
||||||
|
} |
||||||
|
|
||||||
|
PutFixed32(&result_, array_offset); |
||||||
|
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
|
||||||
|
return Slice(result_); |
||||||
|
} |
||||||
|
|
||||||
|
void FilterBlockBuilder::GenerateFilter() { |
||||||
|
const size_t num_keys = start_.size(); |
||||||
|
if (num_keys == 0) { |
||||||
|
// Fast path if there are no keys for this filter
|
||||||
|
filter_offsets_.push_back(result_.size()); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
// Make list of keys from flattened key structure
|
||||||
|
start_.push_back(keys_.size()); // Simplify length computation
|
||||||
|
tmp_keys_.resize(num_keys); |
||||||
|
for (size_t i = 0; i < num_keys; i++) { |
||||||
|
const char* base = keys_.data() + start_[i]; |
||||||
|
size_t length = start_[i+1] - start_[i]; |
||||||
|
tmp_keys_[i] = Slice(base, length); |
||||||
|
} |
||||||
|
|
||||||
|
// Generate filter for current set of keys and append to result_.
|
||||||
|
filter_offsets_.push_back(result_.size()); |
||||||
|
policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_); |
||||||
|
|
||||||
|
tmp_keys_.clear(); |
||||||
|
keys_.clear(); |
||||||
|
start_.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, |
||||||
|
const Slice& contents) |
||||||
|
: policy_(policy), |
||||||
|
data_(NULL), |
||||||
|
offset_(NULL), |
||||||
|
num_(0), |
||||||
|
base_lg_(0) { |
||||||
|
size_t n = contents.size(); |
||||||
|
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
||||||
|
base_lg_ = contents[n-1]; |
||||||
|
uint32_t last_word = DecodeFixed32(contents.data() + n - 5); |
||||||
|
if (last_word > n - 5) return; |
||||||
|
data_ = contents.data(); |
||||||
|
offset_ = data_ + last_word; |
||||||
|
num_ = (n - 5 - last_word) / 4; |
||||||
|
} |
||||||
|
|
||||||
|
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { |
||||||
|
uint64_t index = block_offset >> base_lg_; |
||||||
|
if (index < num_) { |
||||||
|
uint32_t start = DecodeFixed32(offset_ + index*4); |
||||||
|
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); |
||||||
|
if (start <= limit && limit <= (offset_ - data_)) { |
||||||
|
Slice filter = Slice(data_ + start, limit - start); |
||||||
|
return policy_->KeyMayMatch(key, filter); |
||||||
|
} else if (start == limit) { |
||||||
|
// Empty filters do not match any keys
|
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
return true; // Errors are treated as potential matches
|
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,68 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
|
// into a single filter block.
|
||||||
|
|
||||||
|
#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ |
||||||
|
#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
#include "leveldb/slice.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
class FilterPolicy; |
||||||
|
|
||||||
|
// A FilterBlockBuilder is used to construct all of the filters for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
//
|
||||||
|
// The sequence of calls to FilterBlockBuilder must match the regexp:
|
||||||
|
// (StartBlock AddKey*)* Finish
|
||||||
|
class FilterBlockBuilder { |
||||||
|
public: |
||||||
|
explicit FilterBlockBuilder(const FilterPolicy*); |
||||||
|
|
||||||
|
void StartBlock(uint64_t block_offset); |
||||||
|
void AddKey(const Slice& key); |
||||||
|
Slice Finish(); |
||||||
|
|
||||||
|
private: |
||||||
|
void GenerateFilter(); |
||||||
|
|
||||||
|
const FilterPolicy* policy_; |
||||||
|
std::string keys_; // Flattened key contents
|
||||||
|
std::vector<size_t> start_; // Starting index in keys_ of each key
|
||||||
|
std::string result_; // Filter data computed so far
|
||||||
|
std::vector<Slice> tmp_keys_; // policy_->CreateFilter() argument
|
||||||
|
std::vector<uint32_t> filter_offsets_; |
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FilterBlockBuilder(const FilterBlockBuilder&); |
||||||
|
void operator=(const FilterBlockBuilder&); |
||||||
|
}; |
||||||
|
|
||||||
|
class FilterBlockReader { |
||||||
|
public: |
||||||
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
|
FilterBlockReader(const FilterPolicy* policy, const Slice& contents); |
||||||
|
bool KeyMayMatch(uint64_t block_offset, const Slice& key); |
||||||
|
|
||||||
|
private: |
||||||
|
const FilterPolicy* policy_; |
||||||
|
const char* data_; // Pointer to filter data (at block-start)
|
||||||
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
|
size_t num_; // Number of entries in offset array
|
||||||
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
}; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
@ -0,0 +1,128 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/filter_block.h" |
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "util/hash.h" |
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
// For testing: emit an array with one hash value per key
|
||||||
|
class TestHashFilter : public FilterPolicy { |
||||||
|
public: |
||||||
|
virtual const char* Name() const { |
||||||
|
return "TestHashFilter"; |
||||||
|
} |
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||||
|
for (int i = 0; i < n; i++) { |
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); |
||||||
|
PutFixed32(dst, h); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { |
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1); |
||||||
|
for (int i = 0; i + 4 <= filter.size(); i += 4) { |
||||||
|
if (h == DecodeFixed32(filter.data() + i)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return false; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
class FilterBlockTest { |
||||||
|
public: |
||||||
|
TestHashFilter policy_; |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(FilterBlockTest, EmptyBuilder) { |
||||||
|
FilterBlockBuilder builder(&policy_); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); |
||||||
|
FilterBlockReader reader(&policy_, block); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, SingleChunk) { |
||||||
|
FilterBlockBuilder builder(&policy_); |
||||||
|
builder.StartBlock(100); |
||||||
|
builder.AddKey("foo"); |
||||||
|
builder.AddKey("bar"); |
||||||
|
builder.AddKey("box"); |
||||||
|
builder.StartBlock(200); |
||||||
|
builder.AddKey("box"); |
||||||
|
builder.StartBlock(300); |
||||||
|
builder.AddKey("hello"); |
||||||
|
Slice block = builder.Finish(); |
||||||
|
FilterBlockReader reader(&policy_, block); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "box")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(FilterBlockTest, MultiChunk) { |
||||||
|
FilterBlockBuilder builder(&policy_); |
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder.StartBlock(0); |
||||||
|
builder.AddKey("foo"); |
||||||
|
builder.StartBlock(2000); |
||||||
|
builder.AddKey("bar"); |
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder.StartBlock(3100); |
||||||
|
builder.AddKey("box"); |
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder.StartBlock(9000); |
||||||
|
builder.AddKey("box"); |
||||||
|
builder.AddKey("hello"); |
||||||
|
|
||||||
|
Slice block = builder.Finish(); |
||||||
|
FilterBlockReader reader(&policy_, block); |
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); |
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); |
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); |
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); |
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); |
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace leveldb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
return leveldb::test::RunAllTests(); |
||||||
|
} |
@ -0,0 +1,95 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h" |
||||||
|
|
||||||
|
#include "leveldb/slice.h" |
||||||
|
#include "util/hash.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
static uint32_t BloomHash(const Slice& key) { |
||||||
|
return Hash(key.data(), key.size(), 0xbc9f1d34); |
||||||
|
} |
||||||
|
|
||||||
|
class BloomFilterPolicy : public FilterPolicy { |
||||||
|
private: |
||||||
|
size_t bits_per_key_; |
||||||
|
size_t k_; |
||||||
|
|
||||||
|
public: |
||||||
|
explicit BloomFilterPolicy(int bits_per_key) |
||||||
|
: bits_per_key_(bits_per_key) { |
||||||
|
// We intentionally round down to reduce probing cost a little bit
|
||||||
|
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
|
||||||
|
if (k_ < 1) k_ = 1; |
||||||
|
if (k_ > 30) k_ = 30; |
||||||
|
} |
||||||
|
|
||||||
|
virtual const char* Name() const { |
||||||
|
return "leveldb.BuiltinBloomFilter"; |
||||||
|
} |
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
||||||
|
// Compute bloom filter size (in both bits and bytes)
|
||||||
|
size_t bits = n * bits_per_key_; |
||||||
|
|
||||||
|
// For small n, we can see a very high false positive rate. Fix it
|
||||||
|
// by enforcing a minimum bloom filter length.
|
||||||
|
if (bits < 64) bits = 64; |
||||||
|
|
||||||
|
size_t bytes = (bits + 7) / 8; |
||||||
|
bits = bytes * 8; |
||||||
|
|
||||||
|
const size_t init_size = dst->size(); |
||||||
|
dst->resize(init_size + bytes, 0); |
||||||
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
||||||
|
char* array = &(*dst)[init_size]; |
||||||
|
for (size_t i = 0; i < n; i++) { |
||||||
|
// Use double-hashing to generate a sequence of hash values.
|
||||||
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
||||||
|
uint32_t h = BloomHash(keys[i]); |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (size_t j = 0; j < k_; j++) { |
||||||
|
const uint32_t bitpos = h % bits; |
||||||
|
array[bitpos/8] |= (1 << (bitpos % 8)); |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { |
||||||
|
const size_t len = bloom_filter.size(); |
||||||
|
if (len < 2) return false; |
||||||
|
|
||||||
|
const char* array = bloom_filter.data(); |
||||||
|
const size_t bits = (len - 1) * 8; |
||||||
|
|
||||||
|
// Use the encoded k so that we can read filters generated by
|
||||||
|
// bloom filters created using different parameters.
|
||||||
|
const size_t k = array[len-1]; |
||||||
|
if (k > 30) { |
||||||
|
// Reserved for potentially new encodings for short bloom filters.
|
||||||
|
// Consider it a match.
|
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t h = BloomHash(key); |
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (size_t j = 0; j < k; j++) { |
||||||
|
const uint32_t bitpos = h % bits; |
||||||
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; |
||||||
|
h += delta; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { |
||||||
|
return new BloomFilterPolicy(bits_per_key); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace leveldb
|
@ -0,0 +1,159 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h" |
||||||
|
|
||||||
|
#include "util/logging.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
static const int kVerbose = 1; |
||||||
|
|
||||||
|
static Slice Key(int i, char* buffer) { |
||||||
|
memcpy(buffer, &i, sizeof(i)); |
||||||
|
return Slice(buffer, sizeof(i)); |
||||||
|
} |
||||||
|
|
||||||
|
class BloomTest { |
||||||
|
private: |
||||||
|
const FilterPolicy* policy_; |
||||||
|
std::string filter_; |
||||||
|
std::vector<std::string> keys_; |
||||||
|
|
||||||
|
public: |
||||||
|
BloomTest() : policy_(NewBloomFilterPolicy(10)) { } |
||||||
|
|
||||||
|
~BloomTest() { |
||||||
|
delete policy_; |
||||||
|
} |
||||||
|
|
||||||
|
void Reset() { |
||||||
|
keys_.clear(); |
||||||
|
filter_.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
void Add(const Slice& s) { |
||||||
|
keys_.push_back(s.ToString()); |
||||||
|
} |
||||||
|
|
||||||
|
void Build() { |
||||||
|
std::vector<Slice> key_slices; |
||||||
|
for (size_t i = 0; i < keys_.size(); i++) { |
||||||
|
key_slices.push_back(Slice(keys_[i])); |
||||||
|
} |
||||||
|
filter_.clear(); |
||||||
|
policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_); |
||||||
|
keys_.clear(); |
||||||
|
if (kVerbose >= 2) DumpFilter(); |
||||||
|
} |
||||||
|
|
||||||
|
size_t FilterSize() const { |
||||||
|
return filter_.size(); |
||||||
|
} |
||||||
|
|
||||||
|
void DumpFilter() { |
||||||
|
fprintf(stderr, "F("); |
||||||
|
for (size_t i = 0; i+1 < filter_.size(); i++) { |
||||||
|
const unsigned int c = static_cast<unsigned int>(filter_[i]); |
||||||
|
for (int j = 0; j < 8; j++) { |
||||||
|
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.'); |
||||||
|
} |
||||||
|
} |
||||||
|
fprintf(stderr, ")\n"); |
||||||
|
} |
||||||
|
|
||||||
|
bool Matches(const Slice& s) { |
||||||
|
if (!keys_.empty()) { |
||||||
|
Build(); |
||||||
|
} |
||||||
|
return policy_->KeyMayMatch(s, filter_); |
||||||
|
} |
||||||
|
|
||||||
|
double FalsePositiveRate() { |
||||||
|
char buffer[sizeof(int)]; |
||||||
|
int result = 0; |
||||||
|
for (int i = 0; i < 10000; i++) { |
||||||
|
if (Matches(Key(i + 1000000000, buffer))) { |
||||||
|
result++; |
||||||
|
} |
||||||
|
} |
||||||
|
return result / 10000.0; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(BloomTest, EmptyFilter) { |
||||||
|
ASSERT_TRUE(! Matches("hello")); |
||||||
|
ASSERT_TRUE(! Matches("world")); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BloomTest, Small) { |
||||||
|
Add("hello"); |
||||||
|
Add("world"); |
||||||
|
ASSERT_TRUE(Matches("hello")); |
||||||
|
ASSERT_TRUE(Matches("world")); |
||||||
|
ASSERT_TRUE(! Matches("x")); |
||||||
|
ASSERT_TRUE(! Matches("foo")); |
||||||
|
} |
||||||
|
|
||||||
|
static int NextLength(int length) { |
||||||
|
if (length < 10) { |
||||||
|
length += 1; |
||||||
|
} else if (length < 100) { |
||||||
|
length += 10; |
||||||
|
} else if (length < 1000) { |
||||||
|
length += 100; |
||||||
|
} else { |
||||||
|
length += 1000; |
||||||
|
} |
||||||
|
return length; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(BloomTest, VaryingLengths) { |
||||||
|
char buffer[sizeof(int)]; |
||||||
|
|
||||||
|
// Count number of filters that significantly exceed the false positive rate
|
||||||
|
int mediocre_filters = 0; |
||||||
|
int good_filters = 0; |
||||||
|
|
||||||
|
for (int length = 1; length <= 10000; length = NextLength(length)) { |
||||||
|
Reset(); |
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
Add(Key(i, buffer)); |
||||||
|
} |
||||||
|
Build(); |
||||||
|
|
||||||
|
ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length; |
||||||
|
|
||||||
|
// All added keys must match
|
||||||
|
for (int i = 0; i < length; i++) { |
||||||
|
ASSERT_TRUE(Matches(Key(i, buffer))) |
||||||
|
<< "Length " << length << "; key " << i; |
||||||
|
} |
||||||
|
|
||||||
|
// Check false positive rate
|
||||||
|
double rate = FalsePositiveRate(); |
||||||
|
if (kVerbose >= 1) { |
||||||
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", |
||||||
|
rate*100.0, length, static_cast<int>(FilterSize())); |
||||||
|
} |
||||||
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||||
|
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
|
||||||
|
else good_filters++; |
||||||
|
} |
||||||
|
if (kVerbose >= 1) { |
||||||
|
fprintf(stderr, "Filters: %d good, %d mediocre\n", |
||||||
|
good_filters, mediocre_filters); |
||||||
|
} |
||||||
|
ASSERT_LE(mediocre_filters, good_filters/5); |
||||||
|
} |
||||||
|
|
||||||
|
// Different bits-per-byte
|
||||||
|
|
||||||
|
} // namespace leveldb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
return leveldb::test::RunAllTests(); |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h" |
||||||
|
|
||||||
|
namespace leveldb { |
||||||
|
|
||||||
|
FilterPolicy::~FilterPolicy() { } |
||||||
|
|
||||||
|
} // namespace leveldb
|
Loading…
Reference in new issue