Fix bug in partition filters with format_version=4 (#4381)

Summary:
Value delta encoding in format_version 4 requires the differences between the size of two consecutive handles to be sent to BlockBuilder::Add. This applies not only to indexes on blocks but also the indexes on indexes and filters in partitioned indexes and filters respectively. The patch fixes a bug where the partitioned filters would encode the entire size of the handle rather than the difference of the size with the last size.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4381

Differential Revision: D9879505

Pulled By: maysamyabandeh

fbshipit-source-id: 27a22e49b482b927fbd5629dc310c46d63d4b6d1
main
Maysam Yabandeh 6 years ago committed by Facebook Github Bot
parent 1626f6ab6b
commit 65ac72edd9
  1. 37
      db/db_bloom_filter_test.cc
  2. 5
      db/db_test_util.cc
  3. 2
      db/db_test_util.h
  4. 7
      table/format.cc
  5. 1
      table/format.h
  6. 5
      table/partitioned_filter_block.cc
  7. 1
      table/partitioned_filter_block.h
  8. 23
      table/partitioned_filter_block_test.cc

@ -22,11 +22,12 @@ class DBBloomFilterTest : public DBTestBase {
class DBBloomFilterTestWithParam class DBBloomFilterTestWithParam
: public DBTestBase, : public DBTestBase,
public testing::WithParamInterface<std::tuple<bool, bool>> { public testing::WithParamInterface<std::tuple<bool, bool, uint32_t>> {
// public testing::WithParamInterface<bool> { // public testing::WithParamInterface<bool> {
protected: protected:
bool use_block_based_filter_; bool use_block_based_filter_;
bool partition_filters_; bool partition_filters_;
uint32_t format_version_;
public: public:
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {} DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
@ -36,9 +37,12 @@ class DBBloomFilterTestWithParam
void SetUp() override { void SetUp() override {
use_block_based_filter_ = std::get<0>(GetParam()); use_block_based_filter_ = std::get<0>(GetParam());
partition_filters_ = std::get<1>(GetParam()); partition_filters_ = std::get<1>(GetParam());
format_version_ = std::get<2>(GetParam());
} }
}; };
class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {};
class SliceTransformLimitedDomainGeneric : public SliceTransform { class SliceTransformLimitedDomainGeneric : public SliceTransform {
const char* Name() const override { const char* Name() const override {
return "SliceTransformLimitedDomainGeneric"; return "SliceTransformLimitedDomainGeneric";
@ -62,7 +66,7 @@ class SliceTransformLimitedDomainGeneric : public SliceTransform {
// KeyMayExist can lead to a few false positives, but not false negatives. // KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than // To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated // bits in the key, so that false positives are eliminated
TEST_P(DBBloomFilterTestWithParam, KeyMayExist) { TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
do { do {
ReadOptions ropts; ReadOptions ropts;
std::string value; std::string value;
@ -401,6 +405,11 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
table_options.index_type = table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
} }
table_options.format_version = format_version_;
if (format_version_ >= 4) {
// value delta encoding challenged more with index interval > 1
table_options.index_block_restart_interval = 8;
}
table_options.metadata_block_size = 32; table_options.metadata_block_size = 32;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -456,10 +465,26 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
} while (ChangeCompactOptions()); } while (ChangeCompactOptions());
} }
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam, INSTANTIATE_TEST_CASE_P(
::testing::Values(std::make_tuple(true, false), FormatDef, DBBloomFilterTestDefFormatVersion,
std::make_tuple(false, true), ::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, false))); std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatDef, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
std::make_tuple(false, true, test::kDefaultFormatVersion),
std::make_tuple(false, false,
test::kDefaultFormatVersion)));
INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam,
::testing::Values(std::make_tuple(true, false, test::kLatestFormatVersion),
std::make_tuple(false, true, test::kLatestFormatVersion),
std::make_tuple(false, false,
test::kLatestFormatVersion)));
TEST_F(DBBloomFilterTest, BloomFilterRate) { TEST_F(DBBloomFilterTest, BloomFilterRate) {
while (ChangeFilterOptions()) { while (ChangeFilterOptions()) {

@ -451,13 +451,14 @@ Options DBTestBase::GetOptions(
} }
case kBlockBasedTableWithPartitionedIndexFormat4: { case kBlockBasedTableWithPartitionedIndexFormat4: {
table_options.format_version = 4; table_options.format_version = 4;
// Format 3 changes the binary index format. Since partitioned index is a // Format 4 changes the binary index format. Since partitioned index is a
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to // super-set of simple indexes, we are also using kTwoLevelIndexSearch to
// test this format. // test this format.
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
// The top-level index in partition filters are also affected by format 3. // The top-level index in partition filters are also affected by format 4.
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
table_options.partition_filters = true; table_options.partition_filters = true;
table_options.index_block_restart_interval = 8;
break; break;
} }
case kBlockBasedTableWithIndexRestartInterval: { case kBlockBasedTableWithIndexRestartInterval: {

@ -109,8 +109,6 @@ struct OptionsOverride {
// These will be used only if filter_policy is set // These will be used only if filter_policy is set
bool partition_filters = false; bool partition_filters = false;
uint64_t metadata_block_size = 1024; uint64_t metadata_block_size = 1024;
BlockBasedTableOptions::IndexType index_type =
BlockBasedTableOptions::IndexType::kBinarySearch;
// Used as a bit mask of individual enums in which to skip an XF test point // Used as a bit mask of individual enums in which to skip an XF test point
int skip_policy = 0; int skip_policy = 0;

@ -54,13 +54,6 @@ void BlockHandle::EncodeTo(std::string* dst) const {
PutVarint64Varint64(dst, offset_, size_); PutVarint64Varint64(dst, offset_, size_);
} }
void BlockHandle::EncodeSizeTo(std::string* dst) const {
// Sanity check that all fields have been set
assert(offset_ != ~static_cast<uint64_t>(0));
assert(size_ != ~static_cast<uint64_t>(0));
PutVarint64(dst, size_);
}
Status BlockHandle::DecodeFrom(Slice* input) { Status BlockHandle::DecodeFrom(Slice* input) {
if (GetVarint64(input, &offset_) && if (GetVarint64(input, &offset_) &&
GetVarint64(input, &size_)) { GetVarint64(input, &size_)) {

@ -55,7 +55,6 @@ class BlockHandle {
void EncodeTo(std::string* dst) const; void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input); Status DecodeFrom(Slice* input);
Status DecodeSizeFrom(uint64_t offset, Slice* input); Status DecodeSizeFrom(uint64_t offset, Slice* input);
void EncodeSizeTo(std::string* dst) const;
// Return a string that contains the copy of handle. // Return a string that contains the copy of handle.
std::string ToString(bool hex = true) const; std::string ToString(bool hex = true) const;

@ -79,7 +79,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
std::string handle_encoding; std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding); last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding; std::string handle_delta_encoding;
last_partition_block_handle.EncodeSizeTo(&handle_delta_encoding); PutVarsignedint64(
&handle_delta_encoding,
last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding); const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding, index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
&handle_delta_encoding_slice); &handle_delta_encoding_slice);

@ -66,6 +66,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
uint32_t filters_in_partition_; uint32_t filters_in_partition_;
// Number of keys added // Number of keys added
size_t num_added_; size_t num_added_;
BlockHandle last_encoded_handle_;
}; };
class PartitionedFilterBlockReader : public FilterBlockReader, class PartitionedFilterBlockReader : public FilterBlockReader,

@ -50,7 +50,9 @@ class MockedBlockBasedTable : public BlockBasedTable {
} }
}; };
class PartitionedFilterBlockTest : public testing::Test { class PartitionedFilterBlockTest
: public testing::Test,
virtual public ::testing::WithParamInterface<uint32_t> {
public: public:
BlockBasedTableOptions table_options_; BlockBasedTableOptions table_options_;
InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator()); InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator());
@ -60,6 +62,8 @@ class PartitionedFilterBlockTest : public testing::Test {
table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close
// will access variable that are not // will access variable that are not
// initialized in our mocked version // initialized in our mocked version
table_options_.format_version = GetParam();
table_options_.index_block_restart_interval = 3;
} }
std::shared_ptr<Cache> cache_; std::shared_ptr<Cache> cache_;
@ -279,14 +283,19 @@ class PartitionedFilterBlockTest : public testing::Test {
} }
}; };
TEST_F(PartitionedFilterBlockTest, EmptyBuilder) { INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
testing::Values(test::kLatestFormatVersion));
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder()); std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get())); std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
const bool empty = true; const bool empty = true;
VerifyReader(builder.get(), pib.get(), empty); VerifyReader(builder.get(), pib.get(), empty);
} }
TEST_F(PartitionedFilterBlockTest, OneBlock) { TEST_P(PartitionedFilterBlockTest, OneBlock) {
uint64_t max_index_size = MaxIndexSize(); uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) { for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i; table_options_.metadata_block_size = i;
@ -294,7 +303,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlock) {
} }
} }
TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) { TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
uint64_t max_index_size = MaxIndexSize(); uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) { for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i; table_options_.metadata_block_size = i;
@ -304,7 +313,7 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
// This reproduces the bug that a prefix is the same among multiple consecutive // This reproduces the bug that a prefix is the same among multiple consecutive
// blocks but the bug would add it only to the first block. // blocks but the bug would add it only to the first block.
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
// some small number to cause partition cuts // some small number to cause partition cuts
table_options_.metadata_block_size = 1; table_options_.metadata_block_size = 1;
std::unique_ptr<const SliceTransform> prefix_extractor std::unique_ptr<const SliceTransform> prefix_extractor
@ -330,7 +339,7 @@ TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
} }
} }
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) { TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
uint64_t max_index_size = MaxIndexSize(); uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) { for (uint64_t i = 1; i < max_index_size + 1; i++) {
table_options_.metadata_block_size = i; table_options_.metadata_block_size = i;
@ -338,7 +347,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
} }
} }
TEST_F(PartitionedFilterBlockTest, PartitionCount) { TEST_P(PartitionedFilterBlockTest, PartitionCount) {
int num_keys = sizeof(keys) / sizeof(*keys); int num_keys = sizeof(keys) / sizeof(*keys);
table_options_.metadata_block_size = table_options_.metadata_block_size =
std::max(MaxIndexSize(), MaxFilterSize()); std::max(MaxIndexSize(), MaxFilterSize());

Loading…
Cancel
Save