Copy/split PlainTableBloomV1 from DynamicBloom (refactor) (#5767)

Summary:
DynamicBloom was being used both for memory-only and for on-disk filters, as part of the PlainTable format. To set up enhancements to the memtable Bloom filter, this splits the code into two copies and removes unused features from each copy. Adds test PlainTableDBTest.BloomSchema to ensure no accidental change to that format.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5767

Differential Revision: D17206963

Pulled By: pdillinger

fbshipit-source-id: 6cce8d55305ed0df051b4c58bdc98c8ad81d0553
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 3f2723a81b
commit 20dec1401f
  1. 2
      CMakeLists.txt
  2. 2
      TARGETS
  3. 50
      db/plain_table_db_test.cc
  4. 2
      src.mk
  5. 23
      table/bloom_block.cc
  6. 37
      table/bloom_block.h
  7. 78
      table/plain/plain_table_bloom.cc
  8. 161
      table/plain/plain_table_bloom.h
  9. 2
      table/plain/plain_table_builder.cc
  10. 2
      table/plain/plain_table_builder.h
  11. 2
      table/plain/plain_table_reader.cc
  12. 4
      table/plain/plain_table_reader.h
  13. 19
      util/dynamic_bloom.cc
  14. 24
      util/dynamic_bloom.h

@ -597,7 +597,6 @@ set(SOURCES
table/block_based/partitioned_filter_block.cc
table/block_based/uncompression_dict_reader.cc
table/block_fetcher.cc
table/bloom_block.cc
table/cuckoo/cuckoo_table_builder.cc
table/cuckoo/cuckoo_table_factory.cc
table/cuckoo/cuckoo_table_reader.cc
@ -607,6 +606,7 @@ set(SOURCES
table/merging_iterator.cc
table/meta_blocks.cc
table/persistent_cache_helper.cc
table/plain/plain_table_bloom.cc
table/plain/plain_table_builder.cc
table/plain/plain_table_factory.cc
table/plain/plain_table_index.cc

@ -229,7 +229,6 @@ cpp_library(
"table/block_based/partitioned_filter_block.cc",
"table/block_based/uncompression_dict_reader.cc",
"table/block_fetcher.cc",
"table/bloom_block.cc",
"table/cuckoo/cuckoo_table_builder.cc",
"table/cuckoo/cuckoo_table_factory.cc",
"table/cuckoo/cuckoo_table_reader.cc",
@ -239,6 +238,7 @@ cpp_library(
"table/merging_iterator.cc",
"table/meta_blocks.cc",
"table/persistent_cache_helper.cc",
"table/plain/plain_table_bloom.cc",
"table/plain/plain_table_builder.cc",
"table/plain/plain_table_factory.cc",
"table/plain/plain_table_index.cc",

@ -24,8 +24,8 @@
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
#include "table/bloom_block.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_factory.h"
#include "table/plain/plain_table_key_coding.h"
#include "table/plain/plain_table_reader.h"
@ -730,6 +730,54 @@ TEST_P(PlainTableDBTest, Iterator) {
}
}
namespace {
std::string NthKey(size_t n, char filler) {
std::string rv(16, filler);
rv[0] = n % 10;
rv[1] = (n / 10) % 10;
rv[2] = (n / 100) % 10;
rv[3] = (n / 1000) % 10;
return rv;
}
} // anonymous namespace
TEST_P(PlainTableDBTest, BloomSchema) {
Options options = CurrentOptions();
options.create_if_missing = true;
for (int bloom_locality = 0; bloom_locality <= 1; bloom_locality++) {
options.bloom_locality = bloom_locality;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 3; // high FP rate for test
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPlain;
bool expect_bloom_not_match = false;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName));
DestroyAndReopen(&options);
for (unsigned i = 0; i < 2345; ++i) {
ASSERT_OK(Put(NthKey(i, 'y'), "added"));
}
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("added", Get(NthKey(42, 'y')));
for (unsigned i = 0; i < 32; ++i) {
// Known pattern of Bloom filter false positives can detect schema change
// with high probability. Known FPs stuffed into bits:
bool expect_fp = (bloom_locality ? 2421694657UL : 1785868347UL)
& (1UL << i);
expect_bloom_not_match = !expect_fp;
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
}
}
}
namespace {
std::string MakeLongKey(size_t length, char c) {
return std::string(length, c);

@ -124,7 +124,6 @@ LIB_SOURCES = \
table/block_based/partitioned_filter_block.cc \
table/block_based/uncompression_dict_reader.cc \
table/block_fetcher.cc \
table/bloom_block.cc \
table/cuckoo/cuckoo_table_builder.cc \
table/cuckoo/cuckoo_table_factory.cc \
table/cuckoo/cuckoo_table_reader.cc \
@ -134,6 +133,7 @@ LIB_SOURCES = \
table/merging_iterator.cc \
table/meta_blocks.cc \
table/persistent_cache_helper.cc \
table/plain/plain_table_bloom.cc \
table/plain/plain_table_builder.cc \
table/plain/plain_table_factory.cc \
table/plain/plain_table_index.cc \

@ -1,23 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "table/bloom_block.h"
#include <string>
#include "rocksdb/slice.h"
#include "util/dynamic_bloom.h"
namespace rocksdb {
void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) {
for (auto hash : keys_hashes) {
bloom_.AddHash(hash);
}
}
Slice BloomBlockBuilder::Finish() { return bloom_.GetRawData(); }
const std::string BloomBlockBuilder::kBloomBlock = "kBloomBlock";
} // namespace rocksdb

@ -1,37 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <vector>
#include <string>
#include "util/dynamic_bloom.h"
namespace rocksdb {
class Logger;
class BloomBlockBuilder {
public:
static const std::string kBloomBlock;
explicit BloomBlockBuilder(uint32_t num_probes = 6) : bloom_(num_probes) {}
void SetTotalBits(Allocator* allocator, uint32_t total_bits,
uint32_t locality, size_t huge_page_tlb_size,
Logger* logger) {
bloom_.SetTotalBits(allocator, total_bits, locality, huge_page_tlb_size,
logger);
}
uint32_t GetNumBlocks() const { return bloom_.GetNumBlocks(); }
void AddKeysHashes(const std::vector<uint32_t>& keys_hashes);
Slice Finish();
private:
DynamicBloom bloom_;
};
}; // namespace rocksdb

@ -0,0 +1,78 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "table/plain/plain_table_bloom.h"
#include <string>
#include <algorithm>
#include "util/dynamic_bloom.h"
#include "memory/allocator.h"
namespace rocksdb {
namespace {
uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
uint32_t num_blocks =
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
// Make num_blocks an odd number to make sure more bits are involved
// when determining which block.
if (num_blocks % 2 == 0) {
num_blocks++;
}
return num_blocks * (CACHE_LINE_SIZE * 8);
}
}
PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes)
: kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}
void PlainTableBloomV1::SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks) {
data_ = reinterpret_cast<uint8_t*>(raw_data);
kTotalBits = total_bits;
kNumBlocks = num_blocks;
}
void PlainTableBloomV1::SetTotalBits(Allocator* allocator,
uint32_t total_bits, uint32_t locality,
size_t huge_page_tlb_size,
Logger* logger) {
kTotalBits = (locality > 0) ? GetTotalBitsForLocality(total_bits)
: (total_bits + 7) / 8 * 8;
kNumBlocks = (locality > 0) ? (kTotalBits / (CACHE_LINE_SIZE * 8)) : 0;
assert(kNumBlocks > 0 || kTotalBits > 0);
assert(kNumProbes > 0);
uint32_t sz = kTotalBits / 8;
if (kNumBlocks > 0) {
sz += CACHE_LINE_SIZE - 1;
}
assert(allocator);
char* raw = allocator->AllocateAligned(sz, huge_page_tlb_size, logger);
memset(raw, 0, sz);
auto cache_line_offset = reinterpret_cast<uintptr_t>(raw) % CACHE_LINE_SIZE;
if (kNumBlocks > 0 && cache_line_offset > 0) {
raw += CACHE_LINE_SIZE - cache_line_offset;
}
data_ = reinterpret_cast<uint8_t*>(raw);
}
void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) {
for (auto hash : keys_hashes) {
bloom_.AddHash(hash);
}
}
Slice BloomBlockBuilder::Finish() { return bloom_.GetRawData(); }
const std::string BloomBlockBuilder::kBloomBlock = "kBloomBlock";
} // namespace rocksdb

@ -0,0 +1,161 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <vector>
#include <string>
#include "rocksdb/slice.h"
#include "port/port.h"
#include "util/hash.h"
#include <memory>
namespace rocksdb {
class Slice;
class Allocator;
class Logger;
class PlainTableBloomV1 {
public:
// allocator: pass allocator to bloom filter, hence trace the usage of memory
// total_bits: fixed total bits for the bloom
// num_probes: number of hash probes for a single key
// locality: If positive, optimize for cache line locality, 0 otherwise.
// hash_func: customized hash function
// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
// within this page size. Need to reserve huge pages for
// it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
explicit PlainTableBloomV1(uint32_t num_probes = 6);
void SetTotalBits(Allocator* allocator, uint32_t total_bits,
uint32_t locality, size_t huge_page_tlb_size,
Logger* logger);
~PlainTableBloomV1() {}
// Assuming single threaded access to this function.
void AddHash(uint32_t hash);
// Multithreaded access to this function is OK
bool MayContainHash(uint32_t hash) const;
void Prefetch(uint32_t hash);
uint32_t GetNumBlocks() const { return kNumBlocks; }
Slice GetRawData() const {
return Slice(reinterpret_cast<char*>(data_), GetTotalBits() / 8);
}
void SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks = 0);
uint32_t GetTotalBits() const { return kTotalBits; }
bool IsInitialized() const { return kNumBlocks > 0 || kTotalBits > 0; }
private:
uint32_t kTotalBits;
uint32_t kNumBlocks;
const uint32_t kNumProbes;
uint8_t* data_;
};
#if defined(_MSC_VER)
#pragma warning(push)
// local variable is initialized but not referenced
#pragma warning(disable : 4189)
#endif
inline void PlainTableBloomV1::Prefetch(uint32_t h) {
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
PREFETCH(&(data_[b / 8]), 0, 3);
}
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
}
return true;
}
inline void PlainTableBloomV1::AddHash(uint32_t h) {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data_[bitpos / 8] |= (1 << (bitpos % 8));
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
}
class BloomBlockBuilder {
public:
static const std::string kBloomBlock;
explicit BloomBlockBuilder(uint32_t num_probes = 6) : bloom_(num_probes) {}
void SetTotalBits(Allocator* allocator, uint32_t total_bits,
uint32_t locality, size_t huge_page_tlb_size,
Logger* logger) {
bloom_.SetTotalBits(allocator, total_bits, locality, huge_page_tlb_size,
logger);
}
uint32_t GetNumBlocks() const { return bloom_.GetNumBlocks(); }
void AddKeysHashes(const std::vector<uint32_t>& keys_hashes);
Slice Finish();
private:
PlainTableBloomV1 bloom_;
};
}; // namespace rocksdb

@ -19,9 +19,9 @@
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "table/block_based/block_builder.h"
#include "table/bloom_block.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_factory.h"
#include "table/plain/plain_table_index.h"
#include "util/coding.h"

@ -13,7 +13,7 @@
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/bloom_block.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_index.h"
#include "table/plain/plain_table_key_coding.h"
#include "table/table_builder.h"

@ -21,11 +21,11 @@
#include "table/block_based/block.h"
#include "table/block_based/filter_block.h"
#include "table/bloom_block.h"
#include "table/format.h"
#include "table/get_context.h"
#include "table/internal_iterator.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_factory.h"
#include "table/plain/plain_table_key_coding.h"
#include "table/two_level_iterator.h"

@ -19,10 +19,10 @@
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_factory.h"
#include "table/plain/plain_table_index.h"
#include "table/table_reader.h"
#include "util/dynamic_bloom.h"
#include "util/file_reader_writer.h"
namespace rocksdb {
@ -155,7 +155,7 @@ class PlainTableReader: public TableReader {
// Bloom filter is used to rule out non-existent key
bool enable_bloom_;
DynamicBloom bloom_;
PlainTableBloomV1 bloom_;
PlainTableReaderFileInfo file_info_;
Arena arena_;
CacheAllocationPtr index_block_alloc_;

@ -33,24 +33,7 @@ uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
DynamicBloom::DynamicBloom(Allocator* allocator, uint32_t total_bits,
uint32_t locality, uint32_t num_probes,
size_t huge_page_tlb_size, Logger* logger)
: DynamicBloom(num_probes) {
SetTotalBits(allocator, total_bits, locality, huge_page_tlb_size, logger);
}
DynamicBloom::DynamicBloom(uint32_t num_probes)
: kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}
void DynamicBloom::SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks) {
data_ = reinterpret_cast<std::atomic<uint8_t>*>(raw_data);
kTotalBits = total_bits;
kNumBlocks = num_blocks;
}
void DynamicBloom::SetTotalBits(Allocator* allocator,
uint32_t total_bits, uint32_t locality,
size_t huge_page_tlb_size,
Logger* logger) {
: kNumProbes(num_probes) {
kTotalBits = (locality > 0) ? GetTotalBitsForLocality(total_bits)
: (total_bits + 7) / 8 * 8;
kNumBlocks = (locality > 0) ? (kTotalBits / (CACHE_LINE_SIZE * 8)) : 0;

@ -21,6 +21,9 @@ class Slice;
class Allocator;
class Logger;
// A Bloom filter intended only to be used in memory, never serialized in a way
// that could lead to schema incompatibility. Supports opt-in lock-free
// concurrent access.
class DynamicBloom {
public:
// allocator: pass allocator to bloom filter, hence trace the usage of memory
@ -39,12 +42,6 @@ class DynamicBloom {
size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
explicit DynamicBloom(uint32_t num_probes = 6);
void SetTotalBits(Allocator* allocator, uint32_t total_bits,
uint32_t locality, size_t huge_page_tlb_size,
Logger* logger);
~DynamicBloom() {}
// Assuming single threaded access to this function.
@ -69,17 +66,6 @@ class DynamicBloom {
uint32_t GetNumBlocks() const { return kNumBlocks; }
Slice GetRawData() const {
return Slice(reinterpret_cast<char*>(data_), GetTotalBits() / 8);
}
void SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks = 0);
uint32_t GetTotalBits() const { return kTotalBits; }
bool IsInitialized() const { return kNumBlocks > 0 || kTotalBits > 0; }
private:
uint32_t kTotalBits;
uint32_t kNumBlocks;
@ -126,7 +112,7 @@ inline bool DynamicBloom::MayContain(const Slice& key) const {
#if defined(_MSC_VER)
#pragma warning(push)
// local variable is initialized but not referenced
#pragma warning(disable : 4189)
#pragma warning(disable : 4189)
#endif
inline void DynamicBloom::Prefetch(uint32_t h) {
if (kNumBlocks != 0) {
@ -139,7 +125,6 @@ inline void DynamicBloom::Prefetch(uint32_t h) {
#endif
inline bool DynamicBloom::MayContainHash(uint32_t h) const {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
@ -171,7 +156,6 @@ inline bool DynamicBloom::MayContainHash(uint32_t h) const {
template <typename OrFunc>
inline void DynamicBloom::AddHash(uint32_t h, const OrFunc& or_func) {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);

Loading…
Cancel
Save