|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
#include <set>
|
|
|
|
|
|
|
|
#include "table/block_based/full_filter_block.h"
|
|
|
|
#include "rocksdb/filter_policy.h"
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
#include "table/block_based/block_based_table_reader.h"
|
|
|
|
#include "table/block_based/mock_block_based_table.h"
|
|
|
|
#include "table/block_based/filter_policy_internal.h"
|
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
#include "util/hash.h"
|
|
|
|
#include "util/string_util.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
class TestFilterBitsBuilder : public FilterBitsBuilder {
|
|
|
|
public:
|
|
|
|
explicit TestFilterBitsBuilder() {}
|
|
|
|
|
|
|
|
// Add Key to filter
|
|
|
|
void AddKey(const Slice& key) override {
|
|
|
|
hash_entries_.push_back(Hash(key.data(), key.size(), 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the filter using the keys that are added
|
|
|
|
Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
|
|
|
uint32_t len = static_cast<uint32_t>(hash_entries_.size()) * 4;
|
|
|
|
char* data = new char[len];
|
|
|
|
for (size_t i = 0; i < hash_entries_.size(); i++) {
|
|
|
|
EncodeFixed32(data + i * 4, hash_entries_[i]);
|
|
|
|
}
|
|
|
|
const char* const_data = data;
|
|
|
|
buf->reset(const_data);
|
|
|
|
return Slice(data, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<uint32_t> hash_entries_;
|
|
|
|
};
|
|
|
|
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
class MockBlockBasedTable : public BlockBasedTable {
|
|
|
|
public:
|
|
|
|
explicit MockBlockBasedTable(Rep* rep)
|
|
|
|
: BlockBasedTable(rep, nullptr /* block_cache_tracer */) {}
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
};
|
|
|
|
|
|
|
|
class TestFilterBitsReader : public FilterBitsReader {
|
|
|
|
public:
|
|
|
|
explicit TestFilterBitsReader(const Slice& contents)
|
|
|
|
: data_(contents.data()), len_(static_cast<uint32_t>(contents.size())) {}
|
|
|
|
|
Introduce a new MultiGet batching implementation (#5011)
Summary:
This PR introduces a new MultiGet() API, with the underlying implementation grouping keys based on SST file and batching lookups in a file. The reason for the new API is twofold - the definition allows callers to allocate storage for status and values on stack instead of std::vector, as well as return values as PinnableSlices in order to avoid copying, and it keeps the original MultiGet() implementation intact while we experiment with batching.
Batching is useful when there is some spatial locality to the keys being queries, as well as larger batch sizes. The main benefits are due to -
1. Fewer function calls, especially to BlockBasedTableReader::MultiGet() and FullFilterBlockReader::KeysMayMatch()
2. Bloom filter cachelines can be prefetched, hiding the cache miss latency
The next step is to optimize the binary searches in the level_storage_info, index blocks and data blocks, since we could reduce the number of key comparisons if the keys are relatively close to each other. The batching optimizations also need to be extended to other formats, such as PlainTable and filter formats. This also needs to be added to db_stress.
Benchmark results from db_bench for various batch size/locality of reference combinations are given below. Locality was simulated by offsetting the keys in a batch by a stride length. Each SST file is about 8.6MB uncompressed and key/value size is 16/100 uncompressed. To focus on the cpu benefit of batching, the runs were single threaded and bound to the same cpu to eliminate interference from other system events. The results show a 10-25% improvement in micros/op from smaller to larger batch sizes (4 - 32).
Batch Sizes
1 | 2 | 4 | 8 | 16 | 32
Random pattern (Stride length 0)
4.158 | 4.109 | 4.026 | 4.05 | 4.1 | 4.074 - Get
4.438 | 4.302 | 4.165 | 4.122 | 4.096 | 4.075 - MultiGet (no batching)
4.461 | 4.256 | 4.277 | 4.11 | 4.182 | 4.14 - MultiGet (w/ batching)
Good locality (Stride length 16)
4.048 | 3.659 | 3.248 | 2.99 | 2.84 | 2.753
4.429 | 3.728 | 3.406 | 3.053 | 2.911 | 2.781
4.452 | 3.45 | 2.833 | 2.451 | 2.233 | 2.135
Good locality (Stride length 256)
4.066 | 3.786 | 3.581 | 3.447 | 3.415 | 3.232
4.406 | 4.005 | 3.644 | 3.49 | 3.381 | 3.268
4.393 | 3.649 | 3.186 | 2.882 | 2.676 | 2.62
Medium locality (Stride length 4096)
4.012 | 3.922 | 3.768 | 3.61 | 3.582 | 3.555
4.364 | 4.057 | 3.791 | 3.65 | 3.57 | 3.465
4.479 | 3.758 | 3.316 | 3.077 | 2.959 | 2.891
dbbench command used (on a DB with 4 levels, 12 million keys)-
TEST_TMPDIR=/dev/shm numactl -C 10 ./db_bench.tmp -use_existing_db=true -benchmarks="readseq,multireadrandom" -write_buffer_size=4194304 -target_file_size_base=4194304 -max_bytes_for_level_base=16777216 -num=12000000 -reads=12000000 -duration=90 -threads=1 -compression_type=none -cache_size=4194304000 -batch_size=32 -disable_auto_compactions=true -bloom_bits=10 -cache_index_and_filter_blocks=true -pin_l0_filter_and_index_blocks_in_cache=true -multiread_batched=true -multiread_stride=4
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5011
Differential Revision: D14348703
Pulled By: anand1976
fbshipit-source-id: 774406dab3776d979c809522a67bedac6c17f84b
6 years ago
|
|
|
// Silence compiler warning about overloaded virtual
|
|
|
|
using FilterBitsReader::MayMatch;
|
|
|
|
bool MayMatch(const Slice& entry) override {
|
|
|
|
uint32_t h = Hash(entry.data(), entry.size(), 1);
|
|
|
|
for (size_t i = 0; i + 4 <= len_; i += 4) {
|
|
|
|
if (h == DecodeFixed32(data_ + i)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const char* data_;
|
|
|
|
uint32_t len_;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class TestHashFilter : public FilterPolicy {
|
|
|
|
public:
|
|
|
|
const char* Name() const override { return "TestHashFilter"; }
|
|
|
|
|
|
|
|
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
|
|
|
PutFixed32(dst, h);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
|
|
|
|
uint32_t h = Hash(key.data(), key.size(), 1);
|
|
|
|
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
|
|
|
|
if (h == DecodeFixed32(filter.data() + i)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
FilterBitsBuilder* GetFilterBitsBuilder() const override {
|
|
|
|
return new TestFilterBitsBuilder();
|
|
|
|
}
|
|
|
|
|
|
|
|
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
|
|
|
|
return new TestFilterBitsReader(contents);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class PluginFullFilterBlockTest : public mock::MockBlockBasedTableTester,
|
|
|
|
public testing::Test {
|
|
|
|
public:
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
PluginFullFilterBlockTest()
|
|
|
|
: mock::MockBlockBasedTableTester(new TestHashFilter) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) {
|
|
|
|
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
Slice slice = builder.Finish();
|
|
|
|
ASSERT_EQ("", EscapeString(slice));
|
|
|
|
|
|
|
|
CachableEntry<ParsedFullFilterBlock> block(
|
|
|
|
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
|
|
|
|
BlockContents(slice)),
|
|
|
|
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
|
|
|
|
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
FullFilterBlockReader reader(table_.get(), std::move(block));
|
|
|
|
// Remain same symantic with blockbased filter
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) {
|
|
|
|
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
|
|
|
|
builder.Add("foo");
|
|
|
|
builder.Add("bar");
|
|
|
|
builder.Add("box");
|
|
|
|
builder.Add("box");
|
|
|
|
builder.Add("hello");
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
Slice slice = builder.Finish();
|
|
|
|
|
|
|
|
CachableEntry<ParsedFullFilterBlock> block(
|
|
|
|
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
|
|
|
|
BlockContents(slice)),
|
|
|
|
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
|
|
|
|
FullFilterBlockReader reader(table_.get(), std::move(block));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(!reader.KeyMayMatch(
|
|
|
|
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(!reader.KeyMayMatch(
|
|
|
|
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
class FullFilterBlockTest : public mock::MockBlockBasedTableTester,
|
|
|
|
public testing::Test {
|
|
|
|
public:
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
FullFilterBlockTest()
|
|
|
|
: mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, false)) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(FullFilterBlockTest, EmptyBuilder) {
|
|
|
|
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
Slice slice = builder.Finish();
|
|
|
|
ASSERT_EQ("", EscapeString(slice));
|
|
|
|
|
|
|
|
CachableEntry<ParsedFullFilterBlock> block(
|
|
|
|
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
|
|
|
|
BlockContents(slice)),
|
|
|
|
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
|
|
|
|
FullFilterBlockReader reader(table_.get(), std::move(block));
|
|
|
|
// Remain same symantic with blockbased filter
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
|
|
|
|
std::unique_ptr<FilterBitsBuilder> b_;
|
|
|
|
std::set<std::string> uniq_;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
|
|
|
|
|
|
|
|
~CountUniqueFilterBitsBuilderWrapper() override {}
|
|
|
|
|
|
|
|
void AddKey(const Slice& key) override {
|
|
|
|
b_->AddKey(key);
|
|
|
|
uniq_.insert(key.ToString());
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
|
|
|
Slice rv = b_->Finish(buf);
|
|
|
|
uniq_.clear();
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
int CalculateNumEntry(const uint32_t bytes) override {
|
|
|
|
return b_->CalculateNumEntry(bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CountUnique() { return uniq_.size(); }
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
|
|
|
{ // empty prefixes
|
|
|
|
std::unique_ptr<const SliceTransform> prefix_extractor(
|
|
|
|
NewFixedPrefixTransform(0));
|
|
|
|
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder());
|
|
|
|
const bool WHOLE_KEY = true;
|
|
|
|
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
|
|
|
bits_builder);
|
|
|
|
ASSERT_EQ(0, builder.NumAdded());
|
|
|
|
ASSERT_EQ(0, bits_builder->CountUnique());
|
|
|
|
// adds key and empty prefix; both abstractions count them
|
|
|
|
builder.Add("key1");
|
|
|
|
ASSERT_EQ(2, builder.NumAdded());
|
|
|
|
ASSERT_EQ(2, bits_builder->CountUnique());
|
|
|
|
// Add different key (unique) and also empty prefix (not unique).
|
|
|
|
// From here in this test, it's immaterial whether the block builder
|
|
|
|
// can count unique keys.
|
|
|
|
builder.Add("key2");
|
|
|
|
ASSERT_EQ(3, bits_builder->CountUnique());
|
|
|
|
// Empty key -> nothing unique
|
|
|
|
builder.Add("");
|
|
|
|
ASSERT_EQ(3, bits_builder->CountUnique());
|
|
|
|
}
|
|
|
|
|
|
|
|
// mix of empty and non-empty
|
|
|
|
std::unique_ptr<const SliceTransform> prefix_extractor(
|
|
|
|
NewFixedPrefixTransform(7));
|
|
|
|
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder());
|
|
|
|
const bool WHOLE_KEY = true;
|
|
|
|
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
|
|
|
bits_builder);
|
|
|
|
ASSERT_EQ(0, builder.NumAdded());
|
|
|
|
builder.Add(""); // test with empty key too
|
|
|
|
builder.Add("prefix1key1");
|
|
|
|
builder.Add("prefix1key1");
|
|
|
|
builder.Add("prefix1key2");
|
|
|
|
builder.Add("prefix1key3");
|
|
|
|
builder.Add("prefix2key4");
|
|
|
|
// 1 empty, 2 non-empty prefixes, and 4 non-empty keys
|
|
|
|
ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(FullFilterBlockTest, SingleChunk) {
|
|
|
|
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
|
|
|
|
ASSERT_EQ(0, builder.NumAdded());
|
|
|
|
builder.Add("foo");
|
|
|
|
builder.Add("bar");
|
|
|
|
builder.Add("box");
|
|
|
|
builder.Add("box");
|
|
|
|
builder.Add("hello");
|
|
|
|
ASSERT_EQ(5, builder.NumAdded());
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
Slice slice = builder.Finish();
|
|
|
|
|
|
|
|
CachableEntry<ParsedFullFilterBlock> block(
|
|
|
|
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
|
|
|
|
BlockContents(slice)),
|
|
|
|
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
|
|
|
|
FullFilterBlockReader reader(table_.get(), std::move(block));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
|
|
|
|
/*block_offset=*/kNotValid,
|
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
|
|
|
|
/*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(!reader.KeyMayMatch(
|
|
|
|
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
ASSERT_TRUE(!reader.KeyMayMatch(
|
|
|
|
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
|
Move the filter readers out of the block cache (#5504)
Summary:
Currently, when the block cache is used for the filter block, it is not
really the block itself that is stored in the cache but a FilterBlockReader
object. Since this object is not pure data (it has, for instance, pointers that
might dangle, including in one case a back pointer to the TableReader), it's not
really sharable. To avoid the issues around this, the current code erases the
cache entries when the TableReader is closed (which, BTW, is not sufficient
since a concurrent TableReader might have picked up the object in the meantime).
Instead of doing this, the patch moves the FilterBlockReader out of the cache
altogether, and decouples the filter reader object from the filter block.
In particular, instead of the TableReader owning, or caching/pinning the
FilterBlockReader (based on the customer's settings), with the change the
TableReader unconditionally owns the FilterBlockReader, which in turn
owns/caches/pins the filter block. This change also enables us to reuse the code
paths historically used for data blocks for filters as well.
Note:
Eviction statistics for filter blocks are temporarily broken. We plan to fix this in a
separate phase.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5504
Test Plan: make asan_check
Differential Revision: D16036974
Pulled By: ltamasi
fbshipit-source-id: 770f543c5fb4ed126fd1e04bfd3809cf4ff9c091
5 years ago
|
|
|
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
|
|
|
|
/*lookup_context=*/nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|