From 96d0773a111a5283ffd4d46d948971e1db25f883 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan Date: Wed, 15 Dec 2021 13:19:34 -0800 Subject: [PATCH] Update prepopulate_block_cache logic to support block-based filter (#9300) Summary: Update prepopulate_block_cache logic to support block-based filter during insertion in block cache Pull Request resolved: https://github.com/facebook/rocksdb/pull/9300 Test Plan: CircleCI tests, make crash_test -j64 Reviewed By: pdillinger Differential Revision: D33132018 Pulled By: akankshamahajan15 fbshipit-source-id: 241deabab8645bda704728e572d6de6354df18b2 --- HISTORY.md | 1 + db/db_block_cache_test.cc | 32 ++++++++++++------- .../block_based/block_based_table_builder.cc | 11 ++++++- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 96eb2ebc8..04635b339 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Fixed a bug in TableOptions.prepopulate_block_cache which causes segmentation fault when used with TableOptions.partition_filters = true and TableOptions.cache_index_and_filter_blocks = true. * Fixed a bug affecting custom memtable factories which are not registered with the `ObjectRegistry`. The bug could result in failure to save the OPTIONS file. * Fixed a bug causing two duplicate entries to be appended to a file opened in non-direct mode and tracked by `FaultInjectionTestFS`. +* Fixed a bug in TableOptions.prepopulate_block_cache to support block-based filters also. ### Behavior Changes * MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371. diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index cb571ebe8..b2814bf14 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -652,7 +652,7 @@ TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) { // This test cache data, index and filter blocks during flush. class DBBlockCacheTest1 : public DBTestBase, - public ::testing::WithParamInterface { + public ::testing::WithParamInterface { public: const size_t kNumBlocks = 10; const size_t kValueSize = 100; @@ -660,7 +660,7 @@ class DBBlockCacheTest1 : public DBTestBase, }; INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, - ::testing::Bool()); + ::testing::Values(1, 2, 3)); TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { Options options = CurrentOptions(); @@ -670,17 +670,27 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { BlockBasedTableOptions table_options; table_options.block_cache = NewLRUCache(1 << 25, 0, false); - bool use_partition = GetParam(); - if (use_partition) { - table_options.partition_filters = true; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + uint32_t filter_type = GetParam(); + switch (filter_type) { + case 1: // partition_filter + table_options.partition_filters = true; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + break; + case 2: // block-based filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + break; + case 3: // full filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + break; + default: + assert(false); } table_options.cache_index_and_filter_blocks = true; table_options.prepopulate_block_cache = BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); @@ -689,7 +699,7 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { ASSERT_OK(Put(ToString(i), value)); ASSERT_OK(Flush()); ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - if (use_partition) { + if (filter_type == 1) { ASSERT_EQ(2 * i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); ASSERT_EQ(2 * i, @@ -705,7 +715,7 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT)); - if (use_partition) { + if (filter_type == 1) { ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); } else { @@ -723,7 +733,7 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { // Index and filter blocks are automatically warmed when the new table file // is automatically opened at the end of compaction. This is not easily // disabled so results in the new index and filter blocks being warmed. - if (use_partition) { + if (filter_type == 1) { EXPECT_EQ(2 * (1 + kNumBlocks), options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); EXPECT_EQ(2 * (1 + kNumBlocks), diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 06cb006ad..40f5de94b 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1480,9 +1480,14 @@ Status BlockBasedTableBuilder::InsertBlockInCacheHelper( if (block_type == BlockType::kData || block_type == BlockType::kIndex) { s = InsertBlockInCache(block_contents, handle, block_type); } else if (block_type == BlockType::kFilter) { - if (rep_->filter_builder->IsBlockBased() || is_top_level_filter_block) { + if (rep_->filter_builder->IsBlockBased()) { + // for block-based filter which is deprecated. + s = InsertBlockInCache(block_contents, handle, block_type); + } else if (is_top_level_filter_block) { + // for top level filter block in partitioned filter. s = InsertBlockInCache(block_contents, handle, block_type); } else { + // for second level partitioned filters and full filters. s = InsertBlockInCache(block_contents, handle, block_type); } @@ -1567,6 +1572,10 @@ void BlockBasedTableBuilder::WriteFilterBlock( rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data); assert(s.ok() || s.IsIncomplete()); rep_->props.filter_size += filter_content.size(); + + // TODO: Refactor code so that BlockType can determine both the C++ type + // of a block cache entry (TBlocklike) and the CacheEntryRole while + // inserting blocks in cache. bool top_level_filter_block = false; if (s.ok() && rep_->table_options.partition_filters && !rep_->filter_builder->IsBlockBased()) {