Bytes read stat for `VerifyChecksum()` and `VerifyFileChecksums()` APIs (#8741)

Summary:
- Clarified some comments on compatibility for adding new ticker stats
- Added read I/O stats for `VerifyChecksum()` and `VerifyFileChecksums()` APIs

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8741

Test Plan: new unit test

Reviewed By: zhichao-cao

Differential Revision: D30708578

Pulled By: ajkr

fbshipit-source-id: d06b961f7e199ae92c266b683e39870aa8f63449
main
Andrew Kryczka 3 years ago committed by Facebook GitHub Bot
parent 0ef88538c6
commit 941543721d
  1. 1
      HISTORY.md
  2. 13
      db/db_impl/db_impl.cc
  3. 49
      db/db_statistics_test.cc
  4. 8
      include/rocksdb/statistics.h
  5. 26
      java/rocksjni/portal.h
  6. 1
      monitoring/statistics.cc

@ -10,6 +10,7 @@
### New Features ### New Features
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. * RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries.
### Public API change ### Public API change
* Remove obsolete implementation details FullKey and ParseFullKey from public API * Remove obsolete implementation details FullKey and ParseFullKey from public API

@ -4936,6 +4936,11 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) {
Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
bool use_file_checksum) { bool use_file_checksum) {
// `bytes_read` stat is enabled based on compile-time support and cannot
// be dynamically toggled. So we do not need to worry about `PerfLevel`
// here, unlike many other `IOStatsContext` / `PerfContext` stats.
uint64_t prev_bytes_read = IOSTATS(bytes_read);
Status s; Status s;
if (use_file_checksum) { if (use_file_checksum) {
@ -4990,6 +4995,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
read_options, fname); read_options, fname);
} }
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
prev_bytes_read = IOSTATS(bytes_read);
} }
} }
@ -5004,6 +5012,9 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
s = VerifyFullFileChecksum(meta->GetChecksumValue(), s = VerifyFullFileChecksum(meta->GetChecksumValue(),
meta->GetChecksumMethod(), blob_file_name, meta->GetChecksumMethod(), blob_file_name,
read_options); read_options);
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
prev_bytes_read = IOSTATS(bytes_read);
if (!s.ok()) { if (!s.ok()) {
break; break;
} }
@ -5035,6 +5046,8 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options,
cfd->UnrefAndTryDelete(); cfd->UnrefAndTryDelete();
} }
} }
RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
IOSTATS(bytes_read) - prev_bytes_read);
return s; return s;
} }

@ -155,6 +155,55 @@ TEST_F(DBStatisticsTest, ExcludeTickers) {
ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0); ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0);
} }
#ifndef ROCKSDB_LITE
TEST_F(DBStatisticsTest, VerifyChecksumReadStat) {
Options options = CurrentOptions();
options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
Reopen(options);
// Expected to be populated regardless of `PerfLevel` in user thread
SetPerfLevel(kDisable);
{
// Scenario 0: only WAL data. Not verified so require ticker to be zero.
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_OK(db_->VerifyChecksum());
ASSERT_EQ(0,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}
// Create one SST.
ASSERT_OK(Flush());
std::unordered_map<std::string, uint64_t> table_files;
uint64_t table_files_size = 0;
GetAllDataFiles(kTableFile, &table_files, &table_files_size);
{
// Scenario 1: Table verified in `VerifyFileChecksums()`. This should read
// the whole file so we require the ticker stat exactly matches the file
// size.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyFileChecksums(ReadOptions()));
ASSERT_EQ(table_files_size,
options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES));
}
{
// Scenario 2: Table verified in `VerifyChecksum()`. This opens a
// `TableReader` to verify each block. It can involve duplicate reads of the
// same data so we set a lower-bound only.
ASSERT_OK(options.statistics->Reset());
ASSERT_OK(db_->VerifyChecksum());
ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES),
table_files_size);
}
}
#endif // !ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -18,8 +18,9 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
/** /**
* Keep adding ticker's here. * Keep adding tickers here.
* 1. Any ticker should be added before TICKER_ENUM_MAX. * 1. Any ticker should be added immediately before TICKER_ENUM_MAX, taking
* over its old value.
* 2. Add a readable string in TickersNameMap below for the newly added ticker. * 2. Add a readable string in TickersNameMap below for the newly added ticker.
* 3. Add a corresponding enum value to TickerType.java in the java API * 3. Add a corresponding enum value to TickerType.java in the java API
* 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType
@ -404,6 +405,9 @@ enum Tickers : uint32_t {
// Secondary cache statistics // Secondary cache statistics
SECONDARY_CACHE_HITS, SECONDARY_CACHE_HITS,
// Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
VERIFY_CHECKSUM_READ_BYTES,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

@ -4876,7 +4876,7 @@ class TickerTypeJni {
case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND: case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND:
return 0x5E; return 0x5E;
case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED: case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED:
// -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
return -0x01; return -0x01;
case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED: case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED:
return 0x60; return 0x60;
@ -5002,8 +5002,17 @@ class TickerTypeJni {
return -0x1D; return -0x1D;
case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS: case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS:
return -0x1E; return -0x1E;
case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES:
return -0x1F;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F for backwards compatibility on current minor version. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
// the value the same forever.
//
// TODO: This particular case seems confusing and unnecessary to pin the
// value since it's meant to be the number of tickers, not an actual
// ticker value. But we aren't yet in a position to fix it since the
// number of tickers doesn't fit in the Java representation (jbyte).
return 0x5F; return 0x5F;
default: default:
// undefined/default // undefined/default
@ -5206,7 +5215,7 @@ class TickerTypeJni {
case 0x5E: case 0x5E:
return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND; return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND;
case -0x01: case -0x01:
// -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX).
return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED; return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED;
case 0x60: case 0x60:
return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED; return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED;
@ -5334,8 +5343,17 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH; return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH;
case -0x1E: case -0x1E:
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS; return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS;
case -0x1F:
return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES;
case 0x5F: case 0x5F:
// 0x5F for backwards compatibility on current minor version. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
// the value the same forever.
//
// TODO: This particular case seems confusing and unnecessary to pin the
// value since it's meant to be the number of tickers, not an actual
// ticker value. But we aren't yet in a position to fix it since the
// number of tickers doesn't fit in the Java representation (jbyte).
return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX; return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;
default: default:

@ -206,6 +206,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{MEMTABLE_GARBAGE_BYTES_AT_FLUSH, {MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
"rocksdb.memtable.garbage.bytes.at.flush"}, "rocksdb.memtable.garbage.bytes.at.flush"},
{SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"}, {SECONDARY_CACHE_HITS, "rocksdb.secondary.cache.hits"},
{VERIFY_CHECKSUM_READ_BYTES, "rocksdb.verify_checksum.read.bytes"},
}; };
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

Loading…
Cancel
Save