Add block checksum mismatch ticker stat (#11438)

Summary:
Added a ticker stat, `BLOCK_CHECKSUM_MISMATCH_COUNT`, to count how many block checksum verifications detected a mismatch.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11438

Test Plan: new unit test

Reviewed By: pdillinger

Differential Revision: D45788179

Pulled By: ajkr

fbshipit-source-id: e2b44eba7c23b3e110ebe69eaa78a710dec2590f
oxigraph-8.3.2
Andrew Kryczka 2 years ago committed by Facebook GitHub Bot
parent 47235dda9e
commit 113f3250f1
  1. 1
      HISTORY.md
  2. 40
      db/db_statistics_test.cc
  3. 6
      include/rocksdb/statistics.h
  4. 4
      java/rocksjni/portal.h
  5. 10
      java/src/main/java/org/rocksdb/TickerType.java
  6. 1
      monitoring/statistics.cc
  7. 5
      table/block_based/block_based_table_reader_test.cc
  8. 3
      table/block_based/reader_common.h
  9. 4
      table/block_fetcher.cc

@ -5,6 +5,7 @@
* Added `JemallocAllocatorOptions::num_arenas`. Setting `num_arenas > 1` may mitigate mutex contention in the allocator, particularly in scenarios where block allocations commonly bypass jemalloc tcache. * Added `JemallocAllocatorOptions::num_arenas`. Setting `num_arenas > 1` may mitigate mutex contention in the allocator, particularly in scenarios where block allocations commonly bypass jemalloc tcache.
* Improve the operational safety of publishing a DB or SST files to many hosts by using different block cache hash seeds on different hosts. The exact behavior is controlled by new option `ShardedCacheOptions::hash_seed`, which also documents the solved problem in more detail. * Improve the operational safety of publishing a DB or SST files to many hosts by using different block cache hash seeds on different hosts. The exact behavior is controlled by new option `ShardedCacheOptions::hash_seed`, which also documents the solved problem in more detail.
* Introduced a new option `CompactionOptionsFIFO::file_temperature_age_thresholds` that allows FIFO compaction to compact files to different temperatures based on key age (#11428). * Introduced a new option `CompactionOptionsFIFO::file_temperature_age_thresholds` that allows FIFO compaction to compact files to different temperatures based on key age (#11428).
* Added a new ticker stat to count how many times RocksDB detected a corruption while verifying a block checksum: `BLOCK_CHECKSUM_MISMATCH_COUNT`.
### Public API Changes ### Public API Changes
* Add `MakeSharedCache()` construction functions to various cache Options objects, and deprecated the `NewWhateverCache()` functions with long parameter lists. * Add `MakeSharedCache()` construction functions to various cache Options objects, and deprecated the `NewWhateverCache()` functions with long parameter lists.

@ -242,6 +242,46 @@ TEST_F(DBStatisticsTest, VerifyChecksumReadStat) {
} }
} }
TEST_F(DBStatisticsTest, BlockChecksumStats) {
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
Reopen(options);
// Scenario 0: only WAL data. Not verified so require ticker to be zero.
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(db_->VerifyChecksum());
ASSERT_EQ(0,
options.statistics->getTickerCount(BLOCK_CHECKSUM_COMPUTE_COUNT));
ASSERT_EQ(0,
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
// Scenario 1: Flushed table verified in `VerifyChecksum()`. This opens a
// `TableReader` to verify each of the four blocks (meta-index, table
// properties, index, and data block).
ASSERT_OK(Flush());
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyChecksum());
ASSERT_EQ(4,
options.statistics->getTickerCount(BLOCK_CHECKSUM_COMPUTE_COUNT));
ASSERT_EQ(0,
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
// Scenario 2: Corrupted table verified in `VerifyChecksum()`. The corruption
// is in the fourth and final verified block, i.e., the data block.
std::unordered_map<std::string, uint64_t> table_files;
ASSERT_OK(GetAllDataFiles(kTableFile, &table_files));
ASSERT_EQ(1, table_files.size());
std::string table_name = table_files.begin()->first;
// Assumes the data block starts at offset zero.
ASSERT_OK(test::CorruptFile(options.env, table_name, 0 /* offset */,
3 /* bytes_to_corrupt */));
ASSERT_OK(options.statistics->Reset());
ASSERT_NOK(db_->VerifyChecksum());
ASSERT_EQ(4,
options.statistics->getTickerCount(BLOCK_CHECKSUM_COMPUTE_COUNT));
ASSERT_EQ(1,
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -394,7 +394,13 @@ enum Tickers : uint32_t {
NON_LAST_LEVEL_READ_BYTES, NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT, NON_LAST_LEVEL_READ_COUNT,
// Number of block checksum verifications
BLOCK_CHECKSUM_COMPUTE_COUNT, BLOCK_CHECKSUM_COMPUTE_COUNT,
// Number of times RocksDB detected a corruption while verifying a block
// checksum. RocksDB does not remember corruptions that happened during user
// reads so the same block corruption may be detected multiple times.
BLOCK_CHECKSUM_MISMATCH_COUNT,
MULTIGET_COROUTINE_COUNT, MULTIGET_COROUTINE_COUNT,
// Integrated BlobDB specific stats // Integrated BlobDB specific stats

@ -5129,6 +5129,8 @@ class TickerTypeJni {
return -0x3A; return -0x3A;
case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT: case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT:
return -0x3B; return -0x3B;
case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT:
return -0x3C;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
@ -5490,6 +5492,8 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS; return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS;
case -0x3B: case -0x3B:
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT; return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT;
case -0x3C:
return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep

@ -708,6 +708,9 @@ public enum TickerType {
NON_LAST_LEVEL_READ_BYTES((byte) -0x2C), NON_LAST_LEVEL_READ_BYTES((byte) -0x2C),
NON_LAST_LEVEL_READ_COUNT((byte) -0x2D), NON_LAST_LEVEL_READ_COUNT((byte) -0x2D),
/**
* Number of block checksum verifications
*/
BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E), BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E),
/** /**
@ -754,6 +757,13 @@ public enum TickerType {
*/ */
TABLE_OPEN_PREFETCH_TAIL_HIT((byte) -0x3B), TABLE_OPEN_PREFETCH_TAIL_HIT((byte) -0x3B),
/**
* Number of times RocksDB detected a corruption while verifying a block
* checksum. RocksDB does not remember corruptions that happened during user
* reads so the same block corruption may be detected multiple times.
*/
BLOCK_CHECKSUM_MISMATCH_COUNT((byte) -0x3C),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;

@ -205,6 +205,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
{BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}, {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"},
{BLOCK_CHECKSUM_MISMATCH_COUNT, "rocksdb.block.checksum.mismatch.count"},
{MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}, {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"},
{BLOB_DB_CACHE_MISS, "rocksdb.blobdb.cache.miss"}, {BLOB_DB_CACHE_MISS, "rocksdb.blobdb.cache.miss"},
{BLOB_DB_CACHE_HIT, "rocksdb.blobdb.cache.hit"}, {BLOB_DB_CACHE_HIT, "rocksdb.blobdb.cache.hit"},

@ -500,6 +500,7 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
std::unique_ptr<BlockBasedTable> table; std::unique_ptr<BlockBasedTable> table;
Options options; Options options;
options.statistics = CreateDBStatistics();
ImmutableOptions ioptions(options); ImmutableOptions ioptions(options);
FileOptions foptions; FileOptions foptions;
foptions.use_direct_reads = use_direct_reads_; foptions.use_direct_reads = use_direct_reads_;
@ -529,8 +530,12 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
static_cast<int>(handle.offset()), 128)); static_cast<int>(handle.offset()), 128));
NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table);
ASSERT_EQ(0,
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
Status s = table->VerifyChecksum(ReadOptions(), Status s = table->VerifyChecksum(ReadOptions(),
TableReaderCaller::kUserVerifyChecksum); TableReaderCaller::kUserVerifyChecksum);
ASSERT_EQ(1,
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
ASSERT_EQ(s.code(), Status::kCorruption); ASSERT_EQ(s.code(), Status::kCorruption);
} }

@ -24,6 +24,9 @@ inline MemoryAllocator* GetMemoryAllocator(
// Assumes block has a trailer as in format.h. file_name and offset provided // Assumes block has a trailer as in format.h. file_name and offset provided
// for generating a diagnostic message in returned status. // for generating a diagnostic message in returned status.
//
// Returns Status::OK() on checksum match, or Status::Corruption() on checksum
// mismatch.
extern Status VerifyBlockChecksum(ChecksumType type, const char* data, extern Status VerifyBlockChecksum(ChecksumType type, const char* data,
size_t block_size, size_t block_size,
const std::string& file_name, const std::string& file_name,

@ -37,6 +37,10 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
footer_.checksum_type(), slice_.data(), block_size_, footer_.checksum_type(), slice_.data(), block_size_,
file_->file_name(), handle_.offset())); file_->file_name(), handle_.offset()));
RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT); RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
if (!io_status_.ok()) {
assert(io_status_.IsCorruption());
RecordTick(ioptions_.stats, BLOCK_CHECKSUM_MISMATCH_COUNT);
}
} }
compression_type_ = compression_type_ =
BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_); BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);

Loading…
Cancel
Save