diff --git a/HISTORY.md b/HISTORY.md index daac871ab..a38c1fbc7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,7 @@ * Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations. * Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding. * Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates. +* Fix the implementation of `prepopulate_block_cache = kFlushOnly` to only apply to flushes rather than to all generated files. * Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_. * Add checks for validity of the IO uring completion queue entries, and fail the BlockBasedTableReader MultiGet sub-batch if there's an invalid completion diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 232e49270..5c43882ac 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -505,6 +505,11 @@ TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) { ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT)); } + // Verify compaction not counted + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + EXPECT_EQ(kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); } // This test cache data, index and filter blocks during flush. @@ -542,6 +547,18 @@ TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) { ASSERT_EQ(i * 2, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); } + // Verify compaction not counted + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + EXPECT_EQ(kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + // Index and filter blocks are automatically warmed when the new table file + // is automatically opened at the end of compaction. This is not easily + // disabled so results in the new index and filter blocks being warmed. + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); } TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) { diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index d56530965..afac09fc7 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -33,6 +33,7 @@ #include "rocksdb/flush_block_policy.h" #include "rocksdb/merge_operator.h" #include "rocksdb/table.h" +#include "rocksdb/types.h" #include "table/block_based/block.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" @@ -321,6 +322,7 @@ struct BlockBasedTableBuilder::Rep { size_t cache_key_prefix_size; char compressed_cache_key_prefix[BlockBasedTable::kMaxCacheKeyPrefixSize]; size_t compressed_cache_key_prefix_size; + const TableFileCreationReason reason; BlockHandle pending_handle; // Handle to add to index block @@ -433,6 +435,7 @@ struct BlockBasedTableBuilder::Rep { !table_opt.block_align), cache_key_prefix_size(0), compressed_cache_key_prefix_size(0), + reason(tbo.reason), flush_block_policy( table_options.flush_block_policy_factory->NewFlushBlockPolicy( table_options, data_block)), @@ -482,11 +485,11 @@ struct BlockBasedTableBuilder::Rep { filter_context.info_log = ioptions.logger; filter_context.column_family_name = tbo.column_family_name; - filter_context.reason = tbo.reason; + filter_context.reason = reason; // Only populate other fields if known to be in LSM rather than // generating external SST file - if (tbo.reason != TableFileCreationReason::kMisc) { + if (reason != TableFileCreationReason::kMisc) { filter_context.compaction_style = ioptions.compaction_style; filter_context.num_levels = ioptions.num_levels; filter_context.level_at_creation = tbo.level_at_creation; @@ -1263,8 +1266,20 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, io_s = r->file->Append(Slice(trailer, kBlockTrailerSize)); if (io_s.ok()) { assert(s.ok()); - if (r->table_options.prepopulate_block_cache == - BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly) { + bool warm_cache; + switch (r->table_options.prepopulate_block_cache) { + case BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly: + warm_cache = (r->reason == TableFileCreationReason::kFlush); + break; + case BlockBasedTableOptions::PrepopulateBlockCache::kDisable: + warm_cache = false; + break; + default: + // missing case + assert(false); + warm_cache = false; + } + if (warm_cache) { if (type == kNoCompression) { s = InsertBlockInCacheHelper(block_contents, handle, block_type); } else if (raw_block_contents != nullptr) { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index d0d3679c8..277c35a7d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -992,6 +992,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( ? pin_top_level_index : pin_unpartitioned; // prefetch the first level of index + // WART: this might be redundant (unnecessary cache hit) if !pin_index, + // depending on prepopulate_block_cache option const bool prefetch_index = prefetch_all || pin_index; std::unique_ptr index_reader; @@ -1020,6 +1022,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( ? pin_top_level_index : pin_unpartitioned; // prefetch the first level of filter + // WART: this might be redundant (unnecessary cache hit) if !pin_filter, + // depending on prepopulate_block_cache option const bool prefetch_filter = prefetch_all || pin_filter; if (rep_->filter_policy) {