diff --git a/HISTORY.md b/HISTORY.md index 740210e4f..a87d06f90 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Bug Fixes * Fixed a number of data races in BlobDB. * Fix a bug where the compaction snapshot refresh feature is not disabled as advertised when `snap_refresh_nanos` is set to 0.. +* Fix bloom filter lookups by the MultiGet batching API when BlockBasedTableOptions::whole_key_filtering is false, by checking that a key is in the perfix_extractor domain and extracting the prefix before looking up. ### New Features * VerifyChecksum() by default will issue readahead. Allow ReadOptions to be passed in to those functions to override the readhead size. For checksum verifying before external SST file ingestion, a new option IngestExternalFileOptions.verify_checksums_readahead_size, is added for this readahead setting. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 6104b142a..828eaaaeb 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1287,6 +1287,53 @@ TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) { } } +// Test class for batched MultiGet with prefix extractor +// Param bool - If true, use partitioned filters +// If false, use full filter block +class MultiGetPrefixExtractorTest + : public DBBasicTest, + public ::testing::WithParamInterface {}; + +TEST_P(MultiGetPrefixExtractorTest, Batched) { + Options options = CurrentOptions(); + options.prefix_extractor.reset(NewFixedPrefixTransform(2)); + BlockBasedTableOptions bbto; + if (GetParam()) { + bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + bbto.partition_filters = true; + } + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.whole_key_filtering = false; + bbto.cache_index_and_filter_blocks = false; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + + // First key is not in the prefix_extractor domain + ASSERT_OK(Put("k", "v0")); + ASSERT_OK(Put("kk1", "v1")); + ASSERT_OK(Put("kk2", "v2")); + ASSERT_OK(Put("kk3", "v3")); + ASSERT_OK(Put("kk4", "v4")); + ASSERT_OK(Flush()); + + std::vector keys({"k", "kk1", "kk2", "kk3", "kk4"}); + std::vector values; + SetPerfLevel(kEnableCount); + get_perf_context()->Reset(); + values = MultiGet(keys, nullptr); + ASSERT_EQ(values[0], "v0"); + ASSERT_EQ(values[1], "v1"); + ASSERT_EQ(values[2], "v2"); + ASSERT_EQ(values[3], "v3"); + ASSERT_EQ(values[4], "v4"); + // Filter hits for 4 in-domain keys + ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4); +} + +INSTANTIATE_TEST_CASE_P( + MultiGetPrefix, MultiGetPrefixExtractorTest, + ::testing::Bool()); + #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, GetAllKeyVersions) { Options options = CurrentOptions(); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index ef3758740..bf0bed846 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -3155,13 +3155,6 @@ void BlockBasedTable::FullFilterKeysMayMatch( } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0) { - for (auto iter = range->begin(); iter != range->end(); ++iter) { - Slice user_key = iter->lkey->user_key(); - - if (!prefix_extractor->InDomain(user_key)) { - range->SkipKey(iter); - } - } filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, lookup_context); } diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index 936281bde..a2871e6c8 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -137,7 +137,8 @@ class FilterBlockReader { const Slice ukey = iter->ukey; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; - if (!KeyMayMatch(prefix_extractor->Transform(ukey), prefix_extractor, + if (prefix_extractor->InDomain(ukey) && + !PrefixMayMatch(prefix_extractor->Transform(ukey), prefix_extractor, block_offset, no_io, &ikey, get_context, lookup_context)) { range->SkipKey(iter); diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 29decc35b..cf1afb5d3 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -3,6 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include #include "table/block_based/full_filter_block.h" #ifdef ROCKSDB_MALLOC_USABLE_SIZE @@ -208,11 +209,11 @@ void FullFilterBlockReader::KeysMayMatch( // present return; } - MayMatch(range, no_io, lookup_context); + MayMatch(range, no_io, nullptr, lookup_context); } void FullFilterBlockReader::PrefixesMayMatch( - MultiGetRange* range, const SliceTransform* /* prefix_extractor */, + MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { #ifdef NDEBUG @@ -220,11 +221,12 @@ void FullFilterBlockReader::PrefixesMayMatch( (void)block_offset; #endif assert(block_offset == kNotValid); - MayMatch(range, no_io, lookup_context); + MayMatch(range, no_io, prefix_extractor, lookup_context); } void FullFilterBlockReader::MayMatch( MultiGetRange* range, bool no_io, + const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const { CachableEntry filter_block; @@ -252,18 +254,36 @@ void FullFilterBlockReader::MayMatch( // &may_match[0] doesn't work for autovector (compiler error). So // declare both keys and may_match as arrays, which is also slightly less // expensive compared to autovector - Slice* keys[MultiGetContext::MAX_BATCH_SIZE]; - bool may_match[MultiGetContext::MAX_BATCH_SIZE] = {false}; + std::array keys; + std::array may_match = {{true}}; + autovector prefixes; int num_keys = 0; - for (auto iter = range->begin(); iter != range->end(); ++iter) { - keys[num_keys++] = &iter->ukey; + MultiGetRange filter_range(*range, range->begin(), range->end()); + for (auto iter = filter_range.begin(); + iter != filter_range.end(); ++iter) { + if (!prefix_extractor) { + keys[num_keys++] = &iter->ukey; + } else if (prefix_extractor->InDomain(iter->ukey)) { + prefixes.emplace_back(prefix_extractor->Transform(iter->ukey)); + keys[num_keys++] = &prefixes.back(); + } else { + filter_range.SkipKey(iter); + } } filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]); int i = 0; - for (auto iter = range->begin(); iter != range->end(); ++iter) { + for (auto iter = filter_range.begin(); + iter != filter_range.end(); ++iter) { if (!may_match[i]) { + // Update original MultiGet range to skip this key. The filter_range + // was temporarily used just to skip keys not in prefix_extractor domain range->SkipKey(iter); + PERF_COUNTER_ADD(bloom_sst_miss_count, 1); + } else { + //PERF_COUNTER_ADD(bloom_sst_hit_count, 1); + PerfContext* perf_ctx = get_perf_context(); + perf_ctx->bloom_sst_hit_count++; } ++i; } diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 08a41706e..4d10a5a33 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -124,6 +124,7 @@ class FullFilterBlockReader : public FilterBlockReaderCommon { bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; void MayMatch(MultiGetRange* range, bool no_io, + const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const; bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix, const Comparator* comparator) const;