diff --git a/HISTORY.md b/HISTORY.md index c4e9f43a0..79f4baf3e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ # Rocksdb Change Log ## Unreleased +### New Features +* Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware. + ### Bug Fixes * Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 5d1e12c6a..3571ce0ec 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -10,6 +10,7 @@ #include #include "db/db_test_util.h" +#include "options/options_helper.h" #include "port/stack_trace.h" #include "rocksdb/flush_block_policy.h" #include "rocksdb/merge_operator.h" @@ -974,13 +975,14 @@ TEST_F(DBBasicTest, MultiGetEmpty) { TEST_F(DBBasicTest, ChecksumTest) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); - // change when new checksum type added - int max_checksum = static_cast(kxxHash64); const int kNumPerFile = 2; + const auto algs = GetSupportedChecksums(); + const int algs_size = static_cast(algs.size()); + // generate one table with each type of checksum - for (int i = 0; i <= max_checksum; ++i) { - table_options.checksum = static_cast(i); + for (int i = 0; i < algs_size; ++i) { + table_options.checksum = algs[i]; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); for (int j = 0; j < kNumPerFile; ++j) { @@ -990,15 +992,20 @@ TEST_F(DBBasicTest, ChecksumTest) { } // with each valid checksum type setting... - for (int i = 0; i <= max_checksum; ++i) { - table_options.checksum = static_cast(i); + for (int i = 0; i < algs_size; ++i) { + table_options.checksum = algs[i]; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); // verify every type of checksum (should be regardless of that setting) - for (int j = 0; j < (max_checksum + 1) * kNumPerFile; ++j) { + for (int j = 0; j < algs_size * kNumPerFile; ++j) { ASSERT_EQ(Key(j), Get(Key(j))); } } + + // Now test invalid checksum type + table_options.checksum = static_cast(123); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); } // On Windows you can have either memory mapped file or a file diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 1cc3c8737..61daaa446 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -475,12 +475,8 @@ Options DBTestBase::GetOptions( case kInfiniteMaxOpenFiles: options.max_open_files = -1; break; - case kxxHashChecksum: { - table_options.checksum = kxxHash; - break; - } - case kxxHash64Checksum: { - table_options.checksum = kxxHash64; + case kXXH3Checksum: { + table_options.checksum = kXXH3; break; } case kFIFOCompaction: { diff --git a/db/db_test_util.h b/db/db_test_util.h index 6693fa2f8..a552ea355 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -854,7 +854,7 @@ class DBTestBase : public testing::Test { kUniversalCompactionMultiLevel = 20, kCompressedBlockCache = 21, kInfiniteMaxOpenFiles = 22, - kxxHashChecksum = 23, + kXXH3Checksum = 23, kFIFOCompaction = 24, kOptimizeFiltersForHits = 25, kRowCache = 26, @@ -869,7 +869,6 @@ class DBTestBase : public testing::Test { kBlockBasedTableWithPartitionedIndexFormat4, kPartitionedFilterWithNewTableReaderForCompactions, kUniversalSubcompactions, - kxxHash64Checksum, kUnorderedWrite, // This must be the last line kEnd, diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index ab5064ead..38beb32d2 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -10,6 +10,7 @@ #include "db/db_test_util.h" #include "db/dbformat.h" #include "file/filename.h" +#include "options/options_helper.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/sst_file_reader.h" @@ -2383,10 +2384,9 @@ TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) { // Very slow, not worth the cost to run regularly TEST_F(ExternalSSTFileTest, DISABLED_HugeBlockChecksum) { - int max_checksum = static_cast(kxxHash64); - for (int i = 0; i <= max_checksum; ++i) { + for (auto t : GetSupportedChecksums()) { BlockBasedTableOptions table_options; - table_options.checksum = static_cast(i); + table_options.checksum = t; Options options = CurrentOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 0090b7e15..498365256 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -49,6 +49,7 @@ enum ChecksumType : char { kCRC32c = 0x1, kxxHash = 0x2, kxxHash64 = 0x3, + kXXH3 = 0x4, // Supported since RocksDB 6.27 }; // `PinningTier` is used to specify which tier of block-based tables should diff --git a/options/options_helper.cc b/options/options_helper.cc index 9a59b0810..be70463f7 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -329,7 +330,8 @@ std::unordered_map OptionsHelper::checksum_type_string_map = {{"kNoChecksum", kNoChecksum}, {"kCRC32c", kCRC32c}, {"kxxHash", kxxHash}, - {"kxxHash64", kxxHash64}}; + {"kxxHash64", kxxHash64}, + {"kXXH3", kXXH3}}; std::unordered_map OptionsHelper::compression_type_string_map = { @@ -345,25 +347,37 @@ std::unordered_map {"kDisableCompressionOption", kDisableCompressionOption}}; std::vector GetSupportedCompressions() { - std::vector supported_compressions; + // std::set internally to deduplicate potential name aliases + std::set supported_compressions; for (const auto& comp_to_name : OptionsHelper::compression_type_string_map) { CompressionType t = comp_to_name.second; if (t != kDisableCompressionOption && CompressionTypeSupported(t)) { - supported_compressions.push_back(t); + supported_compressions.insert(t); } } - return supported_compressions; + return std::vector(supported_compressions.begin(), + supported_compressions.end()); } std::vector GetSupportedDictCompressions() { - std::vector dict_compression_types; + std::set dict_compression_types; for (const auto& comp_to_name : OptionsHelper::compression_type_string_map) { CompressionType t = comp_to_name.second; if (t != kDisableCompressionOption && DictCompressionTypeSupported(t)) { - dict_compression_types.push_back(t); + dict_compression_types.insert(t); } } - return dict_compression_types; + return std::vector(dict_compression_types.begin(), + dict_compression_types.end()); +} + +std::vector GetSupportedChecksums() { + std::set checksum_types; + for (const auto& e : OptionsHelper::checksum_type_string_map) { + checksum_types.insert(e.second); + } + return std::vector(checksum_types.begin(), + checksum_types.end()); } #ifndef ROCKSDB_LITE diff --git a/options/options_helper.h b/options/options_helper.h index e3e9cea1f..488ca9e23 100644 --- a/options/options_helper.h +++ b/options/options_helper.h @@ -28,6 +28,8 @@ std::vector GetSupportedCompressions(); std::vector GetSupportedDictCompressions(); +std::vector GetSupportedChecksums(); + // Checks that the combination of DBOptions and ColumnFamilyOptions are valid Status ValidateOptions(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 8fb51e17c..9dec5c910 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1207,6 +1207,60 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock( } } +void BlockBasedTableBuilder::ComputeBlockTrailer( + const Slice& block_contents, CompressionType compression_type, + ChecksumType checksum_type, std::array* trailer) { + (*trailer)[0] = compression_type; + uint32_t checksum = 0; + switch (checksum_type) { + case kNoChecksum: + break; + case kCRC32c: { + uint32_t crc = + crc32c::Value(block_contents.data(), block_contents.size()); + // Extend to cover compression type + crc = crc32c::Extend(crc, trailer->data(), 1); + checksum = crc32c::Mask(crc); + break; + } + case kxxHash: { + XXH32_state_t* const state = XXH32_createState(); + XXH32_reset(state, 0); + XXH32_update(state, block_contents.data(), block_contents.size()); + // Extend to cover compression type + XXH32_update(state, trailer->data(), 1); + checksum = XXH32_digest(state); + XXH32_freeState(state); + break; + } + case kxxHash64: { + XXH64_state_t* const state = XXH64_createState(); + XXH64_reset(state, 0); + XXH64_update(state, block_contents.data(), block_contents.size()); + // Extend to cover compression type + XXH64_update(state, trailer->data(), 1); + checksum = Lower32of64(XXH64_digest(state)); + XXH64_freeState(state); + break; + } + case kXXH3: { + // XXH3 is a complicated hash function that is extremely fast on + // contiguous input, but that makes its streaming support rather + // complex. It is worth custom handling of the last byte (`type`) + // in order to avoid allocating a large state object and bringing + // that code complexity into CPU working set. + checksum = Lower32of64( + XXH3_64bits(block_contents.data(), block_contents.size())); + checksum = ModifyChecksumForCompressionType(checksum, compression_type); + break; + } + default: + assert(false); + break; + } + EncodeFixed32(trailer->data() + 1, checksum); +} + void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, CompressionType type, BlockHandle* handle, @@ -1223,50 +1277,14 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, assert(io_status().ok()); io_s = r->file->Append(block_contents); if (io_s.ok()) { - char trailer[kBlockTrailerSize]; - trailer[0] = type; - uint32_t checksum = 0; - switch (r->table_options.checksum) { - case kNoChecksum: - break; - case kCRC32c: { - uint32_t crc = - crc32c::Value(block_contents.data(), block_contents.size()); - // Extend to cover compression type - crc = crc32c::Extend(crc, trailer, 1); - checksum = crc32c::Mask(crc); - break; - } - case kxxHash: { - XXH32_state_t* const state = XXH32_createState(); - XXH32_reset(state, 0); - XXH32_update(state, block_contents.data(), block_contents.size()); - // Extend to cover compression type - XXH32_update(state, trailer, 1); - checksum = XXH32_digest(state); - XXH32_freeState(state); - break; - } - case kxxHash64: { - XXH64_state_t* const state = XXH64_createState(); - XXH64_reset(state, 0); - XXH64_update(state, block_contents.data(), block_contents.size()); - // Extend to cover compression type - XXH64_update(state, trailer, 1); - checksum = Lower32of64(XXH64_digest(state)); - XXH64_freeState(state); - break; - } - default: - assert(false); - break; - } - EncodeFixed32(trailer + 1, checksum); + std::array trailer; + ComputeBlockTrailer(block_contents, type, r->table_options.checksum, + &trailer); assert(io_s.ok()); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", - static_cast(trailer)); - io_s = r->file->Append(Slice(trailer, kBlockTrailerSize)); + trailer.data()); + io_s = r->file->Append(Slice(trailer.data(), trailer.size())); if (io_s.ok()) { assert(s.ok()); bool warm_cache; diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index ed91dbf32..e34d644a4 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -10,6 +10,7 @@ #pragma once #include +#include #include #include #include @@ -20,6 +21,7 @@ #include "rocksdb/listener.h" #include "rocksdb/options.h" #include "rocksdb/status.h" +#include "rocksdb/table.h" #include "table/meta_blocks.h" #include "table/table_builder.h" #include "util/compression.h" @@ -98,6 +100,12 @@ class BlockBasedTableBuilder : public TableBuilder { // Get file checksum function name const char* GetFileChecksumFuncName() const override; + // Computes and populates block trailer for a block + static void ComputeBlockTrailer(const Slice& block_contents, + CompressionType compression_type, + ChecksumType checksum_type, + std::array* trailer); + private: bool ok() const { return status().ok(); } @@ -117,7 +125,6 @@ class BlockBasedTableBuilder : public TableBuilder { BlockType block_type); // Directly write data to the file. void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle, - BlockType block_type, const Slice* raw_data = nullptr); void SetupCacheKeyPrefix(const TableBuilderOptions& tbo); diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 3df8ea77c..c8ef7a97c 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -16,11 +16,13 @@ #include #include "logging/logging.h" +#include "options/options_helper.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/flush_block_policy.h" +#include "rocksdb/rocksdb_namespace.h" #include "rocksdb/utilities/options_type.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_reader.h" @@ -564,6 +566,14 @@ Status BlockBasedTableFactory::ValidateOptions( "max_successive_merges larger than 0 is currently inconsistent with " "unordered_write"); } + std::string garbage; + if (!SerializeEnum(checksum_type_string_map, + table_options_.checksum, &garbage)) { + return Status::InvalidArgument( + "Unrecognized ChecksumType for checksum: " + + ROCKSDB_NAMESPACE::ToString( + static_cast(table_options_.checksum))); + } return TableFactory::ValidateOptions(db_opts, cf_opts); } diff --git a/table/block_based/reader_common.cc b/table/block_based/reader_common.cc index bb2376afa..b64b6e425 100644 --- a/table/block_based/reader_common.cc +++ b/table/block_based/reader_common.cc @@ -9,6 +9,7 @@ #include "table/block_based/reader_common.h" #include "monitoring/perf_context_imp.h" +#include "table/format.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/hash.h" @@ -47,6 +48,11 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data, case kxxHash64: computed = Lower32of64(XXH64(data, len, 0)); break; + case kXXH3: + computed = Lower32of64(XXH3_64bits(data, block_size)); + // Treat compression type separately for speed in building table files + computed = ModifyChecksumForCompressionType(computed, data[block_size]); + break; default: s = Status::Corruption( "unknown checksum type " + ToString(type) + " from footer of " + @@ -56,8 +62,9 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data, if (s.ok() && stored != computed) { s = Status::Corruption( "block checksum mismatch: stored = " + ToString(stored) + - ", computed = " + ToString(computed) + " in " + file_name + - " offset " + ToString(offset) + " size " + ToString(block_size)); + ", computed = " + ToString(computed) + ", type = " + ToString(type) + + " in " + file_name + " offset " + ToString(offset) + " size " + + ToString(block_size)); } return s; } diff --git a/table/format.h b/table/format.h index 7d7962a4d..c6b6e3bb7 100644 --- a/table/format.h +++ b/table/format.h @@ -134,6 +134,7 @@ class Footer { // Use this constructor when you plan to write out the footer using // EncodeTo(). Never use this constructor with DecodeFrom(). + // `version` is same as `format_version` for block-based table. Footer(uint64_t table_magic_number, uint32_t version); // The version of the footer in this file @@ -225,6 +226,18 @@ inline CompressionType get_block_compression_type(const char* block_data, return static_cast(block_data[block_size]); } +// Custom handling for the last byte of a block, to avoid invoking streaming +// API to get an effective block checksum. This function is its own inverse +// because it uses xor. +inline uint32_t ModifyChecksumForCompressionType(uint32_t checksum, + char compression_type) { + // This strategy bears some resemblance to extending a CRC checksum by one + // more byte, except we don't need to re-mix the input checksum as long as + // we do this step only once (per checksum). + const uint32_t kRandomPrime = 0x6b9083d9; + return checksum ^ static_cast(compression_type) * kRandomPrime; +} + // Represents the contents of a block read from an SST file. Depending on how // it's created, it may or may not own the actual block bytes. As an example, // BlockContents objects representing data read from mmapped files only point diff --git a/table/table_test.cc b/table/table_test.cc index 9827c95d5..45f9d950a 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -7,6 +7,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "rocksdb/table.h" + +#include #include #include @@ -26,8 +29,10 @@ #include "memtable/stl_wrappers.h" #include "meta_blocks.h" #include "monitoring/statistics.h" +#include "options/options_helper.h" #include "port/port.h" #include "rocksdb/cache.h" +#include "rocksdb/compression_type.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" @@ -2193,6 +2198,115 @@ TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) { } } +TEST_P(BlockBasedTableTest, BadChecksumType) { + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + + Options options; + options.comparator = BytewiseComparator(); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(options.comparator); + InternalKey key("abc", 1, kTypeValue); + c.Add(key.Encode().ToString(), "test"); + std::vector keys; + stl_wrappers::KVMap kvmap; + const ImmutableOptions ioptions(options); + const MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + // Corrupt checksum type (123 is invalid) + auto& sink = *c.TEST_GetSink(); + size_t len = sink.contents_.size(); + ASSERT_EQ(sink.contents_[len - Footer::kNewVersionsEncodedLength], kCRC32c); + sink.contents_[len - Footer::kNewVersionsEncodedLength] = char{123}; + + // (Re-)Open table file with bad checksum type + const ImmutableOptions new_ioptions(options); + const MutableCFOptions new_moptions(options); + Status s = c.Reopen(new_ioptions, new_moptions); + ASSERT_NOK(s); + ASSERT_MATCHES_REGEX(s.ToString(), "Corruption: unknown checksum type 123.*"); +} + +namespace { +std::string TrailerAsString(const std::string& contents, + CompressionType compression_type, + ChecksumType checksum_type) { + std::array trailer; + BlockBasedTableBuilder::ComputeBlockTrailer(contents, compression_type, + checksum_type, &trailer); + return Slice(trailer.data(), trailer.size()).ToString(/*hex*/ true); +} +} // namespace + +// Make sure that checksum values don't change in later versions, even if +// consistent within current version. (Other tests check for consistency +// between written checksums and read-time validation, so here we only +// have to verify the writer side.) +TEST_P(BlockBasedTableTest, ChecksumSchemas) { + std::string b1 = "This is a short block!"; + std::string b2; + for (int i = 0; i < 100; ++i) { + b2.append("This is a long block!"); + } + CompressionType ct1 = kNoCompression; + CompressionType ct2 = kSnappyCompression; + CompressionType ct3 = kZSTD; + + // Note: first byte of trailer is compression type, last 4 are checksum + + for (ChecksumType t : GetSupportedChecksums()) { + switch (t) { + case kNoChecksum: + EXPECT_EQ(TrailerAsString(b1, ct1, t), "0000000000"); + EXPECT_EQ(TrailerAsString(b1, ct2, t), "0100000000"); + EXPECT_EQ(TrailerAsString(b1, ct3, t), "0700000000"); + EXPECT_EQ(TrailerAsString(b2, ct1, t), "0000000000"); + EXPECT_EQ(TrailerAsString(b2, ct2, t), "0100000000"); + EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700000000"); + break; + case kCRC32c: + EXPECT_EQ(TrailerAsString(b1, ct1, t), "00583F0355"); + EXPECT_EQ(TrailerAsString(b1, ct2, t), "012F9B0A57"); + EXPECT_EQ(TrailerAsString(b1, ct3, t), "07ECE7DA1D"); + EXPECT_EQ(TrailerAsString(b2, ct1, t), "00943EF0AB"); + EXPECT_EQ(TrailerAsString(b2, ct2, t), "0143A2EDB1"); + EXPECT_EQ(TrailerAsString(b2, ct3, t), "0700E53D63"); + break; + case kxxHash: + EXPECT_EQ(TrailerAsString(b1, ct1, t), "004A2E5FB0"); + EXPECT_EQ(TrailerAsString(b1, ct2, t), "010BD9F652"); + EXPECT_EQ(TrailerAsString(b1, ct3, t), "07B4107E50"); + EXPECT_EQ(TrailerAsString(b2, ct1, t), "0020F4D4BA"); + EXPECT_EQ(TrailerAsString(b2, ct2, t), "018F1A1F99"); + EXPECT_EQ(TrailerAsString(b2, ct3, t), "07A191A338"); + break; + case kxxHash64: + EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B74655EF"); + EXPECT_EQ(TrailerAsString(b1, ct2, t), "01B6C8BBBE"); + EXPECT_EQ(TrailerAsString(b1, ct3, t), "07AED9E3B4"); + EXPECT_EQ(TrailerAsString(b2, ct1, t), "000D4999FE"); + EXPECT_EQ(TrailerAsString(b2, ct2, t), "01F5932423"); + EXPECT_EQ(TrailerAsString(b2, ct3, t), "076B31BAB1"); + break; + case kXXH3: + EXPECT_EQ(TrailerAsString(b1, ct1, t), "00B37FB5E6"); + EXPECT_EQ(TrailerAsString(b1, ct2, t), "016AFC258D"); + EXPECT_EQ(TrailerAsString(b1, ct3, t), "075CE54616"); + EXPECT_EQ(TrailerAsString(b2, ct1, t), "00FA2D482E"); + EXPECT_EQ(TrailerAsString(b2, ct2, t), "0123AED845"); + EXPECT_EQ(TrailerAsString(b2, ct3, t), "0715B7BBDE"); + break; + default: + // Force this test to be updated on new ChecksumTypes + assert(false); + break; + } + } +} + void AddInternalKey(TableConstructor* c, const std::string& prefix, std::string value = "v", int /*suffix_len*/ = 800) { static Random rnd(1023); @@ -4036,40 +4150,20 @@ TEST(TableTest, FooterTests) { ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 0U); } - { - // xxhash block based - std::string encoded; - Footer footer(kBlockBasedTableMagicNumber, 1); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.set_checksum(kxxHash); - footer.EncodeTo(&encoded); - Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); - ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kxxHash); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 1U); - } - { - // xxhash64 block based + for (auto t : GetSupportedChecksums()) { + // block based, various checksums std::string encoded; Footer footer(kBlockBasedTableMagicNumber, 1); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); - footer.set_checksum(kxxHash64); + footer.set_checksum(t); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kxxHash64); + ASSERT_EQ(decoded_footer.checksum(), t); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); @@ -4098,7 +4192,7 @@ TEST(TableTest, FooterTests) { ASSERT_EQ(decoded_footer.version(), 0U); } { - // xxhash block based + // xxhash plain table (not currently used) std::string encoded; Footer footer(kPlainTableMagicNumber, 1); BlockHandle meta_index(10, 5), index(20, 15); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 9aebf7dd7..c15e8dc5d 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -59,6 +59,7 @@ #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/stats_history.h" +#include "rocksdb/table.h" #include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/options_type.h" @@ -147,6 +148,8 @@ IF_ROCKSDB_LITE("", "fill100K," "crc32c," "xxhash," + "xxhash64," + "xxh3," "compress," "uncompress," "acquireload," @@ -205,8 +208,10 @@ IF_ROCKSDB_LITE("", "overwrite\n" "\tseekrandomwhilemerging -- seekrandom and 1 thread doing " "merge\n" - "\tcrc32c -- repeated crc32c of 4K of data\n" - "\txxhash -- repeated xxHash of 4K of data\n" + "\tcrc32c -- repeated crc32c of data\n" + "\txxhash -- repeated xxHash of data\n" + "\txxhash64 -- repeated xxHash64 of data\n" + "\txxh3 -- repeated XXH3 of data\n" "\tacquireload -- load N*1000 times\n" "\tfillseekseq -- write N values in sequential key, then read " "them by seeking to each key\n" @@ -733,6 +738,10 @@ DEFINE_bool(verify_checksum, true, "Verify checksum for every block read" " from storage"); +DEFINE_int32(checksum_type, + ROCKSDB_NAMESPACE::BlockBasedTableOptions().checksum, + "ChecksumType as an int"); + DEFINE_bool(statistics, false, "Database statistics"); DEFINE_int32(stats_level, ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers, "stats level for statistics"); @@ -3434,6 +3443,10 @@ class Benchmark { method = &Benchmark::Crc32c; } else if (name == "xxhash") { method = &Benchmark::xxHash; + } else if (name == "xxhash64") { + method = &Benchmark::xxHash64; + } else if (name == "xxh3") { + method = &Benchmark::xxh3; } else if (name == "acquireload") { method = &Benchmark::AcquireLoad; } else if (name == "compress") { @@ -3778,44 +3791,42 @@ class Benchmark { return merge_stats; } - void Crc32c(ThreadState* thread) { - // Checksum about 500MB of data total + template + static inline void ChecksumBenchmark(FnType fn, ThreadState* thread, + Args... args) { const int size = FLAGS_block_size; // use --block_size option for db_bench std::string labels = "(" + ToString(FLAGS_block_size) + " per op)"; const char* label = labels.c_str(); std::string data(size, 'x'); - int64_t bytes = 0; - uint32_t crc = 0; - while (bytes < 500 * 1048576) { - crc = crc32c::Value(data.data(), size); - thread->stats.FinishedOps(nullptr, nullptr, 1, kCrc); + uint64_t bytes = 0; + uint32_t val = 0; + while (bytes < 5000U * uint64_t{1048576}) { // ~5GB + val += static_cast(fn(data.data(), size, args...)); + thread->stats.FinishedOps(nullptr, nullptr, 1, kOpType); bytes += size; } // Print so result is not dead - fprintf(stderr, "... crc=0x%x\r", static_cast(crc)); + fprintf(stderr, "... val=0x%x\r", static_cast(val)); thread->stats.AddBytes(bytes); thread->stats.AddMessage(label); } + void Crc32c(ThreadState* thread) { + ChecksumBenchmark(crc32c::Value, thread); + } + void xxHash(ThreadState* thread) { - // Checksum about 500MB of data total - const int size = 4096; - const char* label = "(4K per op)"; - std::string data(size, 'x'); - int64_t bytes = 0; - unsigned int xxh32 = 0; - while (bytes < 500 * 1048576) { - xxh32 = XXH32(data.data(), size, 0); - thread->stats.FinishedOps(nullptr, nullptr, 1, kHash); - bytes += size; - } - // Print so result is not dead - fprintf(stderr, "... xxh32=0x%x\r", static_cast(xxh32)); + ChecksumBenchmark(XXH32, thread, /*seed*/ 0); + } - thread->stats.AddBytes(bytes); - thread->stats.AddMessage(label); + void xxHash64(ThreadState* thread) { + ChecksumBenchmark(XXH64, thread, /*seed*/ 0); + } + + void xxh3(ThreadState* thread) { + ChecksumBenchmark(XXH3_64bits, thread); } void AcquireLoad(ThreadState* thread) { @@ -4067,6 +4078,8 @@ class Benchmark { #endif // ROCKSDB_LITE } else { BlockBasedTableOptions block_based_options; + block_based_options.checksum = + static_cast(FLAGS_checksum_type); if (FLAGS_use_hash_search) { if (FLAGS_prefix_size == 0) { fprintf(stderr, diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 5838304f0..8ba3b2298 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -45,7 +45,7 @@ default_params = { random.choice( ["none", "snappy", "zlib", "bzip2", "lz4", "lz4hc", "xpress", "zstd"]), - "checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64"]), + "checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]), "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1), "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1), # Disabled compression_parallel_threads as the feature is not stable