Fix the bug with duplicate prefix in partition filters (#4024)

Summary:
https://github.com/facebook/rocksdb/pull/3764 introduced an optimization feature to skip duplicate prefix entires in full bloom filters. Unfortunately it also introduces a bug in partitioned full filters, where the duplicate prefix should still be inserted if it is in a new partition. The patch fixes the bug by resetting the duplicate detection logic each time a partition is cut.
This bug could result into false negatives, which means that DB could skip an existing key.
Closes https://github.com/facebook/rocksdb/pull/4024

Differential Revision: D8518866

Pulled By: maysamyabandeh

fbshipit-source-id: 044f4d988e606a330ecafd8c79daceb68b8796bf
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 92ee3350e0
commit 28a9d8910b
  1. 1
      HISTORY.md
  2. 2
      table/block_based_table_reader.h
  3. 6
      table/full_filter_block.cc
  4. 1
      table/full_filter_block.h
  5. 1
      table/partitioned_filter_block.cc
  6. 82
      table/partitioned_filter_block_test.cc

@ -10,6 +10,7 @@
### Bug Fixes
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
* Fix corruption in non-iterator reads when mmap is used for file reads
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
## 5.14.0 (5/16/2018)
### Public API Change

@ -347,7 +347,7 @@ class BlockBasedTable : public TableReader {
Status VerifyChecksumInBlocks(InternalIterator* index_iter);
// Create the filter from the filter block.
FilterBlockReader* ReadFilter(
virtual FilterBlockReader* ReadFilter(
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle,
const bool is_a_filter_partition,
const SliceTransform* prefix_extractor = nullptr) const;

@ -72,8 +72,14 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
}
}
void FullFilterBlockBuilder::Reset() {
last_whole_key_recorded_ = false;
last_prefix_recorded_ = false;
}
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
Reset();
// In this impl we ignore BlockHandle
*status = Status::OK();
if (num_added_ != 0) {

@ -52,6 +52,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
protected:
virtual void AddKey(const Slice& key);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
virtual void Reset();
private:
// important: all of these might point to invalid addresses

@ -49,6 +49,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
filters_in_partition_ = 0;
Reset();
}
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {

@ -30,13 +30,24 @@ class MockedBlockBasedTable : public BlockBasedTable {
virtual CachableEntry<FilterBlockReader> GetFilter(
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
const bool /* unused */, bool /* unused */, GetContext* /* unused */,
const SliceTransform* /* unused */) const override {
const SliceTransform* prefix_extractor) const override {
Slice slice = slices[filter_blk_handle.offset()];
auto obj = new FullFilterBlockReader(
nullptr, true, BlockContents(slice, false, kNoCompression),
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
return {obj, nullptr};
}
virtual FilterBlockReader* ReadFilter(
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
const bool /* unused */,
const SliceTransform* prefix_extractor) const override {
Slice slice = slices[filter_blk_handle.offset()];
auto obj = new FullFilterBlockReader(
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
return obj;
}
};
class PartitionedFilterBlockTest : public testing::Test {
@ -93,7 +104,8 @@ class PartitionedFilterBlockTest : public testing::Test {
}
PartitionedFilterBlockBuilder* NewBuilder(
PartitionedIndexBuilder* const p_index_builder) {
PartitionedIndexBuilder* const p_index_builder,
const SliceTransform* prefix_extractor = nullptr) {
assert(table_options_.block_size_deviation <= 100);
auto partition_size = static_cast<uint32_t>(
((table_options_.metadata_block_size *
@ -102,7 +114,7 @@ class PartitionedFilterBlockTest : public testing::Test {
100);
partition_size = std::max(partition_size, static_cast<uint32_t>(1));
return new PartitionedFilterBlockBuilder(
nullptr, table_options_.whole_key_filtering,
prefix_extractor, table_options_.whole_key_filtering,
table_options_.filter_policy->GetFilterBitsBuilder(),
table_options_.index_block_restart_interval, p_index_builder,
partition_size);
@ -111,7 +123,8 @@ class PartitionedFilterBlockTest : public testing::Test {
std::unique_ptr<MockedBlockBasedTable> table;
PartitionedFilterBlockReader* NewReader(
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib,
const SliceTransform* prefix_extractor) {
BlockHandle bh;
Status status;
Slice slice;
@ -126,41 +139,42 @@ class PartitionedFilterBlockTest : public testing::Test {
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
ioptions, env_options, table_options_, icomp, false)));
auto reader = new PartitionedFilterBlockReader(
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
nullptr, nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
return reader;
}
void VerifyReader(PartitionedFilterBlockBuilder* builder,
PartitionedIndexBuilder* pib, bool empty = false) {
PartitionedIndexBuilder* pib, bool empty = false,
const SliceTransform* prefix_extractor = nullptr) {
std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder, pib));
NewReader(builder, pib, prefix_extractor));
// Querying added keys
const bool no_io = true;
for (auto key : keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
&ikey_slice));
}
{
// querying a key twice
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(keys[0], nullptr, kNotValid, !no_io,
&ikey_slice));
ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid,
!no_io, &ikey_slice));
}
// querying missing keys
for (auto key : missing_keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
if (empty) {
ASSERT_TRUE(
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
!no_io, &ikey_slice));
} else {
// assuming a good hash function
ASSERT_FALSE(
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
!no_io, &ikey_slice));
}
}
}
@ -187,10 +201,10 @@ class PartitionedFilterBlockTest : public testing::Test {
return CountNumOfIndexPartitions(pib.get());
}
void TestBlockPerTwoKeys() {
void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
NewBuilder(pib.get()));
NewBuilder(pib.get(), prefix_extractor));
int i = 0;
builder->Add(keys[i]);
i++;
@ -203,7 +217,7 @@ class PartitionedFilterBlockTest : public testing::Test {
builder->Add(keys[i]);
CutABlock(pib.get(), keys[i]);
VerifyReader(builder.get(), pib.get());
VerifyReader(builder.get(), pib.get(), prefix_extractor);
}
void TestBlockPerAllKeys() {
@ -281,6 +295,34 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
}
}
// This reproduces the bug that a prefix is the same among multiple consecutive
// blocks but the bug would add it only to the first block.
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
// some small number to cause partition cuts
table_options_.metadata_block_size = 1;
std::unique_ptr<const SliceTransform> prefix_extractor
(rocksdb::NewFixedPrefixTransform(1));
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
NewBuilder(pib.get(), prefix_extractor.get()));
const std::string pkeys[3] = {"p-key1", "p-key2", "p-key3"};
builder->Add(pkeys[0]);
CutABlock(pib.get(), pkeys[0], pkeys[1]);
builder->Add(pkeys[1]);
CutABlock(pib.get(), pkeys[1], pkeys[2]);
builder->Add(pkeys[2]);
CutABlock(pib.get(), pkeys[2]);
std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder.get(), pib.get(), prefix_extractor.get()));
for (auto key : pkeys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
prefix_extractor.get(), kNotValid,
false /*no_io*/, &ikey_slice));
}
}
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {

Loading…
Cancel
Save