Fix bug in partition filters with format_version=4 (#4381)

Summary:
Value delta encoding in format_version 4 requires the differences between the size of two consecutive handles to be sent to BlockBuilder::Add. This applies not only to indexes on blocks but also the indexes on indexes and filters in partitioned indexes and filters respectively. The patch fixes a bug where the partitioned filters would encode the entire size of the handle rather than the difference of the size with the last size.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4381

Differential Revision: D9879505

Pulled By: maysamyabandeh

fbshipit-source-id: 27a22e49b482b927fbd5629dc310c46d63d4b6d1
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 1626f6ab6b
commit 65ac72edd9
  1. 37
      db/db_bloom_filter_test.cc
  2. 5
      db/db_test_util.cc
  3. 2
      db/db_test_util.h
  4. 7
      table/format.cc
  5. 1
      table/format.h
  6. 5
      table/partitioned_filter_block.cc
  7. 1
      table/partitioned_filter_block.h
  8. 23
      table/partitioned_filter_block_test.cc

@ -22,11 +22,12 @@ class DBBloomFilterTest : public DBTestBase {
class DBBloomFilterTestWithParam
: public DBTestBase,
public testing::WithParamInterface<std::tuple<bool, bool>> {
public testing::WithParamInterface<std::tuple<bool, bool, uint32_t>> {
// public testing::WithParamInterface<bool> {
protected:
bool use_block_based_filter_;
bool partition_filters_;
uint32_t format_version_;
public:
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
@ -36,9 +37,12 @@ class DBBloomFilterTestWithParam
void SetUp() override {
use_block_based_filter_ = std::get<0>(GetParam());
partition_filters_ = std::get<1>(GetParam());
format_version_ = std::get<2>(GetParam());
}
};
class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {};
class SliceTransformLimitedDomainGeneric : public SliceTransform {
const char* Name() const override {
return "SliceTransformLimitedDomainGeneric";
@ -62,7 +66,7 @@ class SliceTransformLimitedDomainGeneric : public SliceTransform {
// KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated
TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
do {
ReadOptions ropts;
std::string value;
@ -401,6 +405,11 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
}
table_options.format_version = format_version_;
if (format_version_ >= 4) {
// value delta encoding challenged more with index interval > 1
table_options.index_block_restart_interval = 8;
}
table_options.metadata_block_size = 32;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -456,10 +465,26 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
} while (ChangeCompactOptions());
}
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false),
std::make_tuple(false, true),
std::make_tuple(false, false)));
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestDefFormatVersion,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kLatestFormatVersion),
std::make_tuple(false, true, test::kLatestFormatVersion),
std::make_tuple(false, false,
test::kLatestFormatVersion)));
TEST_F(DBBloomFilterTest, BloomFilterRate) {
while (ChangeFilterOptions()) {

@ -451,13 +451,14 @@ Options DBTestBase::GetOptions(
}
case kBlockBasedTableWithPartitionedIndexFormat4: {
table_options.format_version = 4;
// Format 3 changes the binary index format. Since partitioned index is a
// Format 4 changes the binary index format. Since partitioned index is a
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to
// test this format.
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
// The top-level index in partition filters are also affected by format 3.
// The top-level index in partition filters are also affected by format 4.
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
table_options.partition_filters = true;
table_options.index_block_restart_interval = 8;
break;
}
case kBlockBasedTableWithIndexRestartInterval: {

@ -109,8 +109,6 @@ struct OptionsOverride {
// These will be used only if filter_policy is set
bool partition_filters = false;
uint64_t metadata_block_size = 1024;
BlockBasedTableOptions::IndexType index_type =
BlockBasedTableOptions::IndexType::kBinarySearch;
// Used as a bit mask of individual enums in which to skip an XF test point
int skip_policy = 0;

@ -54,13 +54,6 @@ void BlockHandle::EncodeTo(std::string* dst) const {
PutVarint64Varint64(dst, offset_, size_);
}
void BlockHandle::EncodeSizeTo(std::string* dst) const {
// Sanity check that all fields have been set
assert(offset_ != ~static_cast<uint64_t>(0));
assert(size_ != ~static_cast<uint64_t>(0));
PutVarint64(dst, size_);
}
Status BlockHandle::DecodeFrom(Slice* input) {
if (GetVarint64(input, &offset_) &&
GetVarint64(input, &size_)) {

@ -55,7 +55,6 @@ class BlockHandle {
void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input);
Status DecodeSizeFrom(uint64_t offset, Slice* input);
void EncodeSizeTo(std::string* dst) const;
// Return a string that contains the copy of handle.
std::string ToString(bool hex = true) const;

@ -79,7 +79,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding;
last_partition_block_handle.EncodeSizeTo(&handle_delta_encoding);
PutVarsignedint64(
&handle_delta_encoding,
last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
&handle_delta_encoding_slice);

@ -66,6 +66,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
uint32_t filters_in_partition_;
// Number of keys added
size_t num_added_;
BlockHandle last_encoded_handle_;
};
class PartitionedFilterBlockReader : public FilterBlockReader,

@ -50,7 +50,9 @@ class MockedBlockBasedTable : public BlockBasedTable {
}
};
class PartitionedFilterBlockTest : public testing::Test {
class PartitionedFilterBlockTest
: public testing::Test,
virtual public ::testing::WithParamInterface<uint32_t> {
public:
BlockBasedTableOptions table_options_;
InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator());
@ -60,6 +62,8 @@ class PartitionedFilterBlockTest : public testing::Test {
table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close
// will access variable that are not
// initialized in our mocked version
table_options_.format_version = GetParam();
table_options_.index_block_restart_interval = 3;
}
std::shared_ptr<Cache> cache_;
@ -279,14 +283,19 @@ class PartitionedFilterBlockTest : public testing::Test {
}
};
TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
testing::Values(test::kLatestFormatVersion));
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
const bool empty = true;
VerifyReader(builder.get(), pib.get(), empty);
}
TEST_F(PartitionedFilterBlockTest, OneBlock) {
TEST_P(PartitionedFilterBlockTest, OneBlock) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
@ -294,7 +303,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlock) {
}
}
TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
@ -304,7 +313,7 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
// This reproduces the bug that a prefix is the same among multiple consecutive
// blocks but the bug would add it only to the first block.
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
// some small number to cause partition cuts
table_options_.metadata_block_size = 1;
std::unique_ptr<const SliceTransform> prefix_extractor
@ -330,7 +339,7 @@ TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
}
}
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i;
@ -338,7 +347,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
}
}
TEST_F(PartitionedFilterBlockTest, PartitionCount) {
TEST_P(PartitionedFilterBlockTest, PartitionCount) {
int num_keys = sizeof(keys) / sizeof(*keys);
table_options_.metadata_block_size =
std::max(MaxIndexSize(), MaxFilterSize());

Loading…
Cancel
Save