From 9e4d56f2c98dc4788c2cec9c42abb6dbd5e1c8d6 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan Date: Wed, 8 Dec 2021 12:43:09 -0800 Subject: [PATCH] Fix segmentation fault in table_options.prepopulate_block_cache when used with partition_filters (#9263) Summary: When table_options.prepopulate_block_cache is set to BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly and table_options.partition_filters is also set true, then there is segmentation failure when top level filter is fetched because its entered with wrong type in cache. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9263 Test Plan: Updated unit tests; Ran db_stress: make crash_test -j32 Reviewed By: pdillinger Differential Revision: D32936566 Pulled By: akankshamahajan15 fbshipit-source-id: 8bd79e53830d3e3c1bb79787e1ffbc3cb46d4426 --- HISTORY.md | 1 + db/db_block_cache_test.cc | 59 +++++++++++++++---- db_stress_tool/db_stress_common.h | 2 + db_stress_tool/db_stress_gflags.cc | 5 ++ db_stress_tool/db_stress_test_base.cc | 3 + .../block_based/block_based_table_builder.cc | 21 +++++-- table/block_based/block_based_table_builder.h | 6 +- tools/db_crashtest.py | 1 + 8 files changed, 79 insertions(+), 19 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 8acc96d53..c795b62e9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### New Features ### Bug Fixes * Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next. +* Fixed a bug in TableOptions.prepopulate_block_cache which causes segmentation fault when used with TableOptions.partition_filters = true and TableOptions.cache_index_and_filter_blocks = true. ### Behavior Changes * MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371. diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 22b1003f9..c34e59260 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -650,13 +650,32 @@ TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) { } // This test cache data, index and filter blocks during flush. -TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) { +class DBBlockCacheTest1 : public DBTestBase, + public ::testing::WithParamInterface { + public: + const size_t kNumBlocks = 10; + const size_t kValueSize = 100; + DBBlockCacheTest1() : DBTestBase("db_block_cache_test1", true) {} +}; + +INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, + ::testing::Bool()); + +TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.block_cache = NewLRUCache(1 << 25, 0, false); + + bool use_partition = GetParam(); + if (use_partition) { + table_options.partition_filters = true; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + } + table_options.cache_index_and_filter_blocks = true; table_options.prepopulate_block_cache = BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; @@ -669,9 +688,15 @@ TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) { ASSERT_OK(Put(ToString(i), value)); ASSERT_OK(Flush()); ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - + if (use_partition) { + ASSERT_EQ(2 * i, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(2 * i, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } else { + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } ASSERT_EQ(value, Get(ToString(i))); ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); @@ -679,11 +704,16 @@ TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) { ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT)); - + if (use_partition) { + ASSERT_EQ(i * 3, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); + } else { + ASSERT_EQ(i * 2, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); + } ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(i * 2, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); } + // Verify compaction not counted ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, /*end=*/nullptr)); @@ -692,10 +722,17 @@ TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) { // Index and filter blocks are automatically warmed when the new table file // is automatically opened at the end of compaction. This is not easily // disabled so results in the new index and filter blocks being warmed. - EXPECT_EQ(1 + kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - EXPECT_EQ(1 + kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + if (use_partition) { + EXPECT_EQ(2 * (1 + kNumBlocks), + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(2 * (1 + kNumBlocks), + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } else { + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } } TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) { diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 387aa0204..7cc0a9110 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -269,6 +269,8 @@ DECLARE_uint64(user_timestamp_size); DECLARE_string(secondary_cache_uri); DECLARE_int32(secondary_cache_fault_one_in); +DECLARE_int32(prepopulate_block_cache); + constexpr long KB = 1024; constexpr int kRandomValueMaxFactor = 3; constexpr int kValueMaxLen = 100; diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index dc5d06cd3..4b8da962d 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -864,5 +864,10 @@ DEFINE_int32(injest_error_severity, 1, "The severity of the injested IO Error. 1 is soft error (e.g. " "retryable error), 2 is fatal error, and the default is " "retryable error."); +DEFINE_int32(prepopulate_block_cache, + static_cast(ROCKSDB_NAMESPACE::BlockBasedTableOptions:: + PrepopulateBlockCache::kDisable), + "Options related to cache warming (see `enum " + "PrepopulateBlockCache` in table.h)"); #endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b5cc7265e..7fc4fc000 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2236,6 +2236,9 @@ void StressTest::Open() { FLAGS_optimize_filters_for_memory; block_based_options.index_type = static_cast(FLAGS_index_type); + block_based_options.prepopulate_block_cache = + static_cast( + FLAGS_prepopulate_block_cache); options_.table_factory.reset( NewBlockBasedTableFactory(block_based_options)); options_.db_write_buffer_size = FLAGS_db_write_buffer_size; diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 2624b091a..44f3bf3d0 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1221,7 +1221,8 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, CompressionType type, BlockHandle* handle, BlockType block_type, - const Slice* raw_block_contents) { + const Slice* raw_block_contents, + bool is_top_level_filter_block) { Rep* r = rep_; bool is_data_block = block_type == BlockType::kData; Status s = Status::OK(); @@ -1262,9 +1263,11 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, } if (warm_cache) { if (type == kNoCompression) { - s = InsertBlockInCacheHelper(block_contents, handle, block_type); + s = InsertBlockInCacheHelper(block_contents, handle, block_type, + is_top_level_filter_block); } else if (raw_block_contents != nullptr) { - s = InsertBlockInCacheHelper(*raw_block_contents, handle, block_type); + s = InsertBlockInCacheHelper(*raw_block_contents, handle, block_type, + is_top_level_filter_block); } if (!s.ok()) { r->SetStatus(s); @@ -1472,12 +1475,12 @@ Status BlockBasedTableBuilder::InsertBlockInCompressedCache( Status BlockBasedTableBuilder::InsertBlockInCacheHelper( const Slice& block_contents, const BlockHandle* handle, - BlockType block_type) { + BlockType block_type, bool is_top_level_filter_block) { Status s; if (block_type == BlockType::kData || block_type == BlockType::kIndex) { s = InsertBlockInCache(block_contents, handle, block_type); } else if (block_type == BlockType::kFilter) { - if (rep_->filter_builder->IsBlockBased()) { + if (rep_->filter_builder->IsBlockBased() || is_top_level_filter_block) { s = InsertBlockInCache(block_contents, handle, block_type); } else { s = InsertBlockInCache(block_contents, handle, @@ -1564,8 +1567,14 @@ void BlockBasedTableBuilder::WriteFilterBlock( rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data); assert(s.ok() || s.IsIncomplete()); rep_->props.filter_size += filter_content.size(); + bool top_level_filter_block = false; + if (s.ok() && rep_->table_options.partition_filters && + !rep_->filter_builder->IsBlockBased()) { + top_level_filter_block = true; + } WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, - BlockType::kFilter); + BlockType::kFilter, nullptr /*raw_contents*/, + top_level_filter_block); } rep_->filter_builder->ResetFilterBitsBuilder(); } diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index 690ae46d7..c5498eebe 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -119,7 +119,8 @@ class BlockBasedTableBuilder : public TableBuilder { BlockType block_type); // Directly write data to the file. void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle, - BlockType block_type, const Slice* raw_data = nullptr); + BlockType block_type, const Slice* raw_data = nullptr, + bool is_top_level_filter_block = false); void SetupCacheKeyPrefix(const TableBuilderOptions& tbo); @@ -129,7 +130,8 @@ class BlockBasedTableBuilder : public TableBuilder { Status InsertBlockInCacheHelper(const Slice& block_contents, const BlockHandle* handle, - BlockType block_type); + BlockType block_type, + bool is_top_level_filter_block); Status InsertBlockInCompressedCache(const Slice& block_contents, const CompressionType type, diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 4498fac76..c34aa805e 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -154,6 +154,7 @@ default_params = { [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]), "user_timestamp_size": 0, "secondary_cache_fault_one_in" : lambda: random.choice([0, 0, 32]), + "prepopulate_block_cache" : lambda: random.choice([0, 1]), } _TEST_DIR_ENV_VAR = 'TEST_TMPDIR'