From 27c9705ac46e9f1f9855510bc7ec19eb167eb43d Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 21 Oct 2022 18:09:12 -0700 Subject: [PATCH] Use kXXH3 as default checksum (CPU efficiency) (#10778) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Since this has been supported for about a year, I think it's time to make it the default. This should improve CPU efficiency slightly on most hardware. A current DB performance comparison using buck+clang build: ``` TEST_TMPDIR=/dev/shm ./db_bench -checksum_type={1,4} -benchmarks=fillseq[-X1000] -num=3000000 -disable_wal ``` kXXH3 (+0.2% DB write throughput): `fillseq [AVG 1000 runs] : 822149 (± 1004) ops/sec; 91.0 (± 0.1) MB/sec` kCRC32c: `fillseq [AVG 1000 runs] : 820484 (± 1203) ops/sec; 90.8 (± 0.1) MB/sec` Micro benchmark comparison: ``` ./db_bench --benchmarks=xxh3[-X20],crc32c[-X20] ``` Machine 1, buck+clang build: `xxh3 [AVG 20 runs] : 3358616 (± 19091) ops/sec; 13119.6 (± 74.6) MB/sec` `crc32c [AVG 20 runs] : 2578725 (± 7742) ops/sec; 10073.1 (± 30.2) MB/sec` Machine 2, make+gcc build, DEBUG_LEVEL=0 PORTABLE=0: `xxh3 [AVG 20 runs] : 6182084 (± 137223) ops/sec; 24148.8 (± 536.0) MB/sec` `crc32c [AVG 20 runs] : 5032465 (± 42454) ops/sec; 19658.1 (± 165.8) MB/sec` Pull Request resolved: https://github.com/facebook/rocksdb/pull/10778 Test Plan: make check, unit tests updated Reviewed By: ajkr Differential Revision: D40112510 Pulled By: pdillinger fbshipit-source-id: e59a8d50a60346137732f8668ba7cfac93be2b37 --- HISTORY.md | 1 + db/db_test_util.cc | 6 ++++-- db/db_test_util.h | 2 +- include/rocksdb/table.h | 2 +- table/table_test.cc | 3 ++- tools/check_format_compatible.sh | 4 ++-- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d6b107594..338571fe1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -34,6 +34,7 @@ * Sanitize min_write_buffer_number_to_merge to 1 if atomic flush is enabled to prevent unexpected data loss when WAL is disabled in a multi-column-family setting (#10773). ### Public API changes +* Make kXXH3 checksum the new default, because it is faster on common hardware, especially with kCRC32c affected by a performance bug in some versions of clang (https://github.com/facebook/rocksdb/issues/9891). DBs written with this new setting can be read by RocksDB 6.27 and newer. * Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Introduced an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. More details in rocksdb/includb/block_cache_trace_writer.h. ## 7.7.0 (09/18/2022) diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 9ade373e5..a6e36ad4c 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -487,8 +487,10 @@ Options DBTestBase::GetOptions( case kInfiniteMaxOpenFiles: options.max_open_files = -1; break; - case kXXH3Checksum: { - table_options.checksum = kXXH3; + case kCRC32cChecksum: { + // Old default was CRC32c, but XXH3 (new default) is faster on common + // hardware + table_options.checksum = kCRC32c; // Thrown in here for basic coverage: options.DisableExtraChecks(); break; diff --git a/db/db_test_util.h b/db/db_test_util.h index 73051d032..de3541a3c 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1048,7 +1048,7 @@ class DBTestBase : public testing::Test { kUniversalCompactionMultiLevel = 20, kCompressedBlockCache = 21, kInfiniteMaxOpenFiles = 22, - kXXH3Checksum = 23, + kCRC32cChecksum = 23, kFIFOCompaction = 24, kOptimizeFiltersForHits = 25, kRowCache = 26, diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index ab6632045..c5a545eef 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -251,7 +251,7 @@ struct BlockBasedTableOptions { // Use the specified checksum type. Newly created table files will be // protected with this checksum type. Old table files will still be readable, // even though they have different checksum type. - ChecksumType checksum = kCRC32c; + ChecksumType checksum = kXXH3; // Disable block cache. If this is set to true, // then no block cache should be used, and the block_cache should diff --git a/table/table_test.cc b/table/table_test.cc index 5759a4515..65b98ffba 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2256,7 +2256,8 @@ TEST_P(BlockBasedTableTest, BadChecksumType) { // Corrupt checksum type (123 is invalid) auto& sink = *c.TEST_GetSink(); size_t len = sink.contents_.size(); - ASSERT_EQ(sink.contents_[len - Footer::kNewVersionsEncodedLength], kCRC32c); + ASSERT_EQ(sink.contents_[len - Footer::kNewVersionsEncodedLength], + table_options.checksum); sink.contents_[len - Footer::kNewVersionsEncodedLength] = char{123}; // (Re-)Open table file with bad checksum type diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index 0d6694042..aaf21a0d7 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb" "6.18.fb" "6.19.fb" "6.20.fb" "6.21.fb" "6.22.fb" "6.23.fb" "6.24.fb" "6.25.fb" "6.26.fb" "6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment @@ -133,7 +133,7 @@ declare -a db_forward_no_options_refs=() # N/A at the moment # To check for SST ingestion backward compatibility (new version reading # data from old) (ldb ingest_extern_sst added in 5.16.x, back-ported to # 5.14.x, 5.15.x) -declare -a ext_backward_only_refs=("5.14.fb" "5.15.fb" "5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb") +declare -a ext_backward_only_refs=("5.14.fb" "5.15.fb" "5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb" "6.18.fb" "6.19.fb" "6.20.fb" "6.21.fb" "6.22.fb" "6.23.fb" "6.24.fb" "6.25.fb" "6.26.fb") # To check for SST ingestion forward compatibility (old version reading # data from new) as well as backward compatibility declare -a ext_forward_refs=("${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}")