diff --git a/HISTORY.md b/HISTORY.md index 13c8aa6a5..db00424d0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ ### New Features * RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. +* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries. ### Public API change * Remove obsolete implementation details FullKey and ParseFullKey from public API diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 40df71a75..c05bde857 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4936,6 +4936,11 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, bool use_file_checksum) { + // `bytes_read` stat is enabled based on compile-time support and cannot + // be dynamically toggled. So we do not need to worry about `PerfLevel` + // here, unlike many other `IOStatsContext` / `PerfContext` stats. + uint64_t prev_bytes_read = IOSTATS(bytes_read); + Status s; if (use_file_checksum) { @@ -4990,6 +4995,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, read_options, fname); } + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); + prev_bytes_read = IOSTATS(bytes_read); } } @@ -5004,6 +5012,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, s = VerifyFullFileChecksum(meta->GetChecksumValue(), meta->GetChecksumMethod(), blob_file_name, read_options); + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); + prev_bytes_read = IOSTATS(bytes_read); if (!s.ok()) { break; } @@ -5035,6 +5046,8 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, cfd->UnrefAndTryDelete(); } } + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); return s; } diff --git a/db/db_statistics_test.cc b/db/db_statistics_test.cc index 6cd0440f4..91ae972cb 100644 --- a/db/db_statistics_test.cc +++ b/db/db_statistics_test.cc @@ -155,6 +155,55 @@ TEST_F(DBStatisticsTest, ExcludeTickers) { ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0); } +#ifndef ROCKSDB_LITE + +TEST_F(DBStatisticsTest, VerifyChecksumReadStat) { + Options options = CurrentOptions(); + options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + Reopen(options); + + // Expected to be populated regardless of `PerfLevel` in user thread + SetPerfLevel(kDisable); + + { + // Scenario 0: only WAL data. Not verified so require ticker to be zero. + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + ASSERT_OK(db_->VerifyChecksum()); + ASSERT_EQ(0, + options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES)); + } + + // Create one SST. + ASSERT_OK(Flush()); + std::unordered_map table_files; + uint64_t table_files_size = 0; + GetAllDataFiles(kTableFile, &table_files, &table_files_size); + + { + // Scenario 1: Table verified in `VerifyFileChecksums()`. This should read + // the whole file so we require the ticker stat exactly matches the file + // size. + ASSERT_OK(options.statistics->Reset()); + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + ASSERT_EQ(table_files_size, + options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES)); + } + + { + // Scenario 2: Table verified in `VerifyChecksum()`. This opens a + // `TableReader` to verify each block. It can involve duplicate reads of the + // same data so we set a lower-bound only. + ASSERT_OK(options.statistics->Reset()); + ASSERT_OK(db_->VerifyChecksum()); + ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES), + table_files_size); + } +} + +#endif // !ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 45da88121..83f6ddee0 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -18,12 +18,13 @@ namespace ROCKSDB_NAMESPACE { /** - * Keep adding ticker's here. - * 1. Any ticker should be added before TICKER_ENUM_MAX. + * Keep adding tickers here. + * 1. Any ticker should be added immediately before TICKER_ENUM_MAX, taking + * over its old value. * 2. Add a readable string in TickersNameMap below for the newly added ticker. * 3. Add a corresponding enum value to TickerType.java in the java API * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType - * and toCppTickers + * and toCppTickers */ enum Tickers : uint32_t { // total block cache misses @@ -404,6 +405,9 @@ enum Tickers : uint32_t { // Secondary cache statistics SECONDARY_CACHE_HITS, + // Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs. + VERIFY_CHECKSUM_READ_BYTES, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 5c9be330e..47a7714cd 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -4876,7 +4876,7 @@ class TickerTypeJni { case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND: return 0x5E; case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED: - // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. + // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX). return -0x01; case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED: return 0x60; @@ -5002,8 +5002,17 @@ class TickerTypeJni { return -0x1D; case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS: return -0x1E; + case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES: + return -0x1F; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: - // 0x5F for backwards compatibility on current minor version. + // 0x5F was the max value in the initial copy of tickers to Java. + // Since these values are exposed directly to Java clients, we keep + // the value the same forever. + // + // TODO: This particular case seems confusing and unnecessary to pin the + // value since it's meant to be the number of tickers, not an actual + // ticker value. But we aren't yet in a position to fix it since the + // number of tickers doesn't fit in the Java representation (jbyte). return 0x5F; default: // undefined/default @@ -5206,7 +5215,7 @@ class TickerTypeJni { case 0x5E: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND; case -0x01: - // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. + // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX). return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED; case 0x60: return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED; @@ -5334,8 +5343,17 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH; case -0x1E: return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS; + case -0x1F: + return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES; case 0x5F: - // 0x5F for backwards compatibility on current minor version. + // 0x5F was the max value in the initial copy of tickers to Java. + // Since these values are exposed directly to Java clients, we keep + // the value the same forever. + // + // TODO: This particular case seems confusing and unnecessary to pin the + // value since it's meant to be the number of tickers, not an actual + // ticker value. But we aren't yet in a position to fix it since the + // number of tickers doesn't fit in the Java representation (jbyte). return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX; default: diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 7b30c7fcc..d2e81ad96 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -206,6 +206,7 @@ const std::vector> TickersNameMap = { {MEMTABLE_GARBAGE_BYTES_AT_FLUSH, "rocksdb.memtable.garbage.bytes.at.flush"}, {SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"}, + {VERIFY_CHECKSUM_READ_BYTES, "rocksdb.verify_checksum.read.bytes"}, }; const std::vector> HistogramsNameMap = {