diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 9d3e89009..51cc8079b 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -431,6 +431,8 @@ enum Tickers : uint32_t { NON_LAST_LEVEL_READ_BYTES, NON_LAST_LEVEL_READ_COUNT, + BLOCK_CHECKSUM_COMPUTE_COUNT, + TICKER_ENUM_MAX }; diff --git a/java/Makefile b/java/Makefile index 8bbd661e9..f580fda27 100644 --- a/java/Makefile +++ b/java/Makefile @@ -187,6 +187,7 @@ JAVA_TESTS = \ org.rocksdb.TtlDBTest\ org.rocksdb.StatisticsTest\ org.rocksdb.StatisticsCollectorTest\ + org.rocksdb.VerifyChecksumsTest\ org.rocksdb.WalFilterTest\ org.rocksdb.WALRecoveryModeTest\ org.rocksdb.WriteBatchHandlerTest\ diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index f030c947e..204370248 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5086,6 +5086,8 @@ class TickerTypeJni { return -0x2C; case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT: return -0x2D; + case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT: + return -0x2E; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep @@ -5455,6 +5457,8 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES; case -0x2D: return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT; + case -0x2E: + return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT; case 0x5F: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index 4f8ad1e40..a6ad31154 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -804,6 +804,8 @@ public enum TickerType { NON_LAST_LEVEL_READ_BYTES((byte) -0x2C), NON_LAST_LEVEL_READ_COUNT((byte) -0x2D), + BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E), + TICKER_ENUM_MAX((byte) 0x5F); private final byte value; diff --git a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java b/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java new file mode 100644 index 000000000..ddc2a456f --- /dev/null +++ b/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java @@ -0,0 +1,213 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class VerifyChecksumsTest { + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + /** + * Class to factor out the specific DB operations within the test + */ + abstract static class Operations { + final int kv_count; + final List elements = new ArrayList<>(); + final List sortedElements = new ArrayList<>(); + + Operations(final int kv_count) { + this.kv_count = kv_count; + for (int i = 0; i < kv_count; i++) elements.add(MessageFormat.format("{0,number,#}", i)); + sortedElements.addAll(elements); + Collections.sort(sortedElements); + } + + void fill(final RocksDB db) throws RocksDBException { + for (int i = 0; i < kv_count; i++) { + final String key = MessageFormat.format("key{0}", elements.get(i)); + final String value = MessageFormat.format("value{0}", elements.get(i)); + // noinspection ObjectAllocationInLoop + db.put(key.getBytes(), value.getBytes()); + } + db.flush(new FlushOptions()); + } + + @SuppressWarnings("ObjectAllocationInLoop") + void get(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + readOptions.setReadaheadSize(32 * 1024); + readOptions.setFillCache(false); + readOptions.setVerifyChecksums(verifyFlag); + + for (int i = 0; i < kv_count / 10; i++) { + @SuppressWarnings("UnsecureRandomNumberGeneration") + final int index = Double.valueOf(Math.random() * kv_count).intValue(); + final String key = MessageFormat.format("key{0}", sortedElements.get(index)); + final String expectedValue = MessageFormat.format("value{0}", sortedElements.get(index)); + + final byte[] value = db.get(readOptions, key.getBytes()); + assertThat(value).isEqualTo(expectedValue.getBytes()); + } + } + } + + @SuppressWarnings("ObjectAllocationInLoop") + void multiGet(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + readOptions.setReadaheadSize(32 * 1024); + readOptions.setFillCache(false); + readOptions.setVerifyChecksums(verifyFlag); + + final List keys = new ArrayList<>(); + final List expectedValues = new ArrayList<>(); + + for (int i = 0; i < kv_count / 10; i++) { + @SuppressWarnings("UnsecureRandomNumberGeneration") + final int index = Double.valueOf(Math.random() * kv_count).intValue(); + keys.add(MessageFormat.format("key{0}", sortedElements.get(index)).getBytes()); + + expectedValues.add(MessageFormat.format("value{0}", sortedElements.get(index))); + } + + final List values = db.multiGetAsList(readOptions, keys); + for (int i = 0; i < keys.size(); i++) { + assertThat(values.get(i)).isEqualTo(expectedValues.get(i).getBytes()); + } + } + } + + void iterate(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + final ReadOptions readOptions = new ReadOptions(); + readOptions.setReadaheadSize(32 * 1024); + readOptions.setFillCache(false); + readOptions.setVerifyChecksums(verifyFlag); + int i = 0; + try (final RocksIterator rocksIterator = db.newIterator(readOptions)) { + rocksIterator.seekToFirst(); + rocksIterator.status(); + while (rocksIterator.isValid()) { + final byte[] key = rocksIterator.key(); + final byte[] value = rocksIterator.value(); + // noinspection ObjectAllocationInLoop + assertThat(key).isEqualTo( + (MessageFormat.format("key{0}", sortedElements.get(i))).getBytes()); + // noinspection ObjectAllocationInLoop + assertThat(value).isEqualTo( + (MessageFormat.format("value{0}", sortedElements.get(i))).getBytes()); + rocksIterator.next(); + rocksIterator.status(); + i++; + } + } + assertThat(i).isEqualTo(kv_count); + } + + abstract void performOperations(final RocksDB db, final boolean verifyFlag) + throws RocksDBException; + } + + private static final int KV_COUNT = 10000; + + /** + * Run some operations and count the TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT before and after + * It should GO UP when the read options have checksum verification turned on. + * It shoulld REMAIN UNCHANGED when the read options have checksum verification turned off. + * As the read options refer only to the read operations, there are still a few checksums + * performed outside this (blocks are getting loaded for lots of reasons, not aways directly due + * to reads) but this test provides a good enough proxy for whether the flag is being noticed. + * + * @param operations the DB reading operations to perform which affect the checksum stats + * + * @throws RocksDBException + */ + private void verifyChecksums(final Operations operations) throws RocksDBException { + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + + // noinspection SingleStatementInBlock + try (final Statistics statistics = new Statistics(); + final Options options = new Options().setCreateIfMissing(true).setStatistics(statistics)) { + try (final RocksDB db = RocksDB.open(options, dbPath)) { + // 0 + System.out.println(MessageFormat.format( + "newly open {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); + operations.fill(db); + // + System.out.println(MessageFormat.format( + "flushed {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); + } + + // 2 + System.out.println(MessageFormat.format("closed-after-write {0}", + statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); + + for (final boolean verifyFlag : new boolean[] {false, true, false, true}) { + try (final RocksDB db = RocksDB.open(options, dbPath)) { + final long beforeOperationsCount = + statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT); + System.out.println(MessageFormat.format("re-opened {0}", beforeOperationsCount)); + operations.performOperations(db, verifyFlag); + final long afterOperationsCount = + statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT); + if (verifyFlag) { + // We don't need to be exact - we are checking that the checksums happen + // exactly how many depends on block size etc etc, so may not be entirely stable + System.out.println(MessageFormat.format("verify=true {0}", afterOperationsCount)); + assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount + 20); + } else { + System.out.println(MessageFormat.format("verify=false {0}", afterOperationsCount)); + assertThat(afterOperationsCount).isEqualTo(beforeOperationsCount); + } + } + } + } + } + + @Test + public void verifyChecksumsInIteration() throws RocksDBException { + // noinspection AnonymousInnerClassMayBeStatic + verifyChecksums(new Operations(KV_COUNT) { + @Override + void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + iterate(db, verifyFlag); + } + }); + } + + @Test + public void verifyChecksumsGet() throws RocksDBException { + // noinspection AnonymousInnerClassMayBeStatic + verifyChecksums(new Operations(KV_COUNT) { + @Override + void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + get(db, verifyFlag); + } + }); + } + + @Test + public void verifyChecksumsMultiGet() throws RocksDBException { + // noinspection AnonymousInnerClassMayBeStatic + verifyChecksums(new Operations(KV_COUNT) { + @Override + void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { + multiGet(db, verifyFlag); + } + }); + } +} diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 4f361a739..99f0a3e36 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -226,7 +226,7 @@ const std::vector> TickersNameMap = { {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, -}; + {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}}; const std::vector> HistogramsNameMap = { {DB_GET, "rocksdb.db.get.micros"}, diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 9c1ef31dc..a1186343e 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2597,6 +2597,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, uncompression_dict_status = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( nullptr /* prefetch_buffer */, no_io, + read_options.verify_checksums, sst_file_range.begin()->get_context, &lookup_context, &uncompression_dict); uncompression_dict_inited = true; @@ -3442,6 +3443,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { CachableEntry uncompression_dict; s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( nullptr /* prefetch_buffer */, false /* no_io */, + false, /* verify_checksums */ nullptr /* get_context */, nullptr /* lookup_context */, &uncompression_dict); if (!s.ok()) { diff --git a/table/block_based/block_based_table_reader_impl.h b/table/block_based/block_based_table_reader_impl.h index 39930e719..6bb384e52 100644 --- a/table/block_based/block_based_table_reader_impl.h +++ b/table/block_based/block_based_table_reader_impl.h @@ -39,8 +39,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( if (rep_->uncompression_dict_reader) { const bool no_io = (ro.read_tier == kBlockCacheTier); s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( - prefetch_buffer, no_io, get_context, lookup_context, - &uncompression_dict); + prefetch_buffer, no_io, ro.verify_checksums, get_context, + lookup_context, &uncompression_dict); if (!s.ok()) { iter->Invalidate(s); return iter; diff --git a/table/block_based/uncompression_dict_reader.cc b/table/block_based/uncompression_dict_reader.cc index e2ef575cc..756422af0 100644 --- a/table/block_based/uncompression_dict_reader.cc +++ b/table/block_based/uncompression_dict_reader.cc @@ -76,8 +76,8 @@ Status UncompressionDictReader::ReadUncompressionDictionary( } Status UncompressionDictReader::GetOrReadUncompressionDictionary( - FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, - BlockCacheLookupContext* lookup_context, + FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums, + GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict) const { assert(uncompression_dict); @@ -90,6 +90,7 @@ Status UncompressionDictReader::GetOrReadUncompressionDictionary( if (no_io) { read_options.read_tier = kBlockCacheTier; } + read_options.verify_checksums = verify_checksums; return ReadUncompressionDictionary(table_, prefetch_buffer, read_options, cache_dictionary_blocks(), get_context, diff --git a/table/block_based/uncompression_dict_reader.h b/table/block_based/uncompression_dict_reader.h index e8801e815..daac82cfb 100644 --- a/table/block_based/uncompression_dict_reader.h +++ b/table/block_based/uncompression_dict_reader.h @@ -31,8 +31,8 @@ class UncompressionDictReader { std::unique_ptr* uncompression_dict_reader); Status GetOrReadUncompressionDictionary( - FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, - BlockCacheLookupContext* lookup_context, + FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums, + GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict) const; size_t ApproximateMemoryUsage() const; diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index dbcdef74c..889ec8115 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -35,6 +35,7 @@ inline void BlockFetcher::ProcessTrailerIfPresent() { io_status_ = status_to_io_status(VerifyBlockChecksum( footer_.checksum_type(), slice_.data(), block_size_, file_->file_name(), handle_.offset())); + RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT); } compression_type_ = BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);