From 7fbb9eccafb2421a020a60b26e0413d3cff1cdf0 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 23 Aug 2017 19:31:40 -0700 Subject: [PATCH] support disabling checksum in block-based table Summary: store a zero as the checksum when disabled since it's easier to keep block trailer a fixed length. Closes https://github.com/facebook/rocksdb/pull/2781 Differential Revision: D5694702 Pulled By: ajkr fbshipit-source-id: 69cea9da415778ba2b600dfd9d0dfc8cb5188ecd --- HISTORY.md | 1 + db/db_basic_test.cc | 52 +++++++++++++----------------- include/rocksdb/table.h | 2 +- table/block_based_table_builder.cc | 5 ++- table/format.cc | 4 ++- tools/db_stress.cc | 30 ++++++++++++++++- 6 files changed, 59 insertions(+), 35 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index a63d9d628..581e6b3ad 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ * Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`. * Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`. * Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB. +* Block-based table support for disabling checksums by setting `BlockBasedTableOptions::checksum = kNoChecksum`. ### Bug Fixes * Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index a2604081b..654a457ef 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -792,36 +792,30 @@ TEST_F(DBBasicTest, MultiGetEmpty) { TEST_F(DBBasicTest, ChecksumTest) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); + // change when new checksum type added + int max_checksum = static_cast(kxxHash); + const int kNumPerFile = 2; + + // generate one table with each type of checksum + for (int i = 0; i <= max_checksum; ++i) { + table_options.checksum = static_cast(i); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + for (int j = 0; j < kNumPerFile; ++j) { + ASSERT_OK(Put(Key(i * kNumPerFile + j), Key(i * kNumPerFile + j))); + } + ASSERT_OK(Flush()); + } - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Flush()); // table with crc checksum - - table_options.checksum = kxxHash; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put("e", "f")); - ASSERT_OK(Put("g", "h")); - ASSERT_OK(Flush()); // table with xxhash checksum - - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_EQ("b", Get("a")); - ASSERT_EQ("d", Get("c")); - ASSERT_EQ("f", Get("e")); - ASSERT_EQ("h", Get("g")); - - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_EQ("b", Get("a")); - ASSERT_EQ("d", Get("c")); - ASSERT_EQ("f", Get("e")); - ASSERT_EQ("h", Get("g")); + // verify data with each type of checksum + for (int i = 0; i <= kxxHash; ++i) { + table_options.checksum = static_cast(i); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + for (int j = 0; j < (max_checksum + 1) * kNumPerFile; ++j) { + ASSERT_EQ(Key(j), Get(Key(j))); + } + } } // On Windows you can have either memory mapped file or a file diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 04e532e16..1b4c0ced9 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -43,7 +43,7 @@ struct Options; using std::unique_ptr; enum ChecksumType : char { - kNoChecksum = 0x0, // not yet supported. Will fail + kNoChecksum = 0x0, kCRC32c = 0x1, kxxHash = 0x2, }; diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index e87def73e..d42e0f8b7 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -551,9 +551,8 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, char* trailer_without_type = trailer + 1; switch (r->table_options.checksum) { case kNoChecksum: - // we don't support no checksum yet - assert(false); - // intentional fallthrough + EncodeFixed32(trailer_without_type, 0); + break; case kCRC32c: { auto crc = crc32c::Value(block_contents.data(), block_contents.size()); crc = crc32c::Extend(crc, trailer, 1); // Extend to cover block type diff --git a/table/format.cc b/table/format.cc index e5f2df007..364766e9a 100644 --- a/table/format.cc +++ b/table/format.cc @@ -102,7 +102,7 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) { // to make the total size 2 * BlockHandle::kMaxEncodedLength // table_magic_number (8 bytes) // new footer format: -// checksum (char, 1 byte) +// checksum type (char, 1 byte) // metaindex handle (varint64 offset, varint64 size) // index handle (varint64 offset, varint64 size) // to make the total size 2 * BlockHandle::kMaxEncodedLength + 1 @@ -278,6 +278,8 @@ Status CheckBlockChecksum(const ReadOptions& options, const Footer& footer, uint32_t value = DecodeFixed32(data + block_size + 1); uint32_t actual = 0; switch (footer.checksum()) { + case kNoChecksum: + break; case kCRC32c: value = crc32c::Unmask(value); actual = crc32c::Value(data, block_size + 1); diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 86776cf97..d18eeab0c 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -44,6 +44,7 @@ int main() { #include "db/version_set.h" #include "hdfs/env_hdfs.h" #include "monitoring/histogram.h" +#include "options/options_helper.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/env.h" @@ -426,10 +427,30 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { else if (!strcasecmp(ctype, "zstd")) return rocksdb::kZSTD; - fprintf(stdout, "Cannot parse compression type '%s'\n", ctype); + fprintf(stderr, "Cannot parse compression type '%s'\n", ctype); return rocksdb::kSnappyCompression; //default value } +enum rocksdb::ChecksumType StringToChecksumType(const char* ctype) { + assert(ctype); + auto iter = rocksdb::checksum_type_string_map.find(ctype); + if (iter != rocksdb::checksum_type_string_map.end()) { + return iter->second; + } + fprintf(stderr, "Cannot parse checksum type '%s'\n", ctype); + return rocksdb::kCRC32c; +} + +std::string ChecksumTypeToString(rocksdb::ChecksumType ctype) { + auto iter = std::find_if( + rocksdb::checksum_type_string_map.begin(), + rocksdb::checksum_type_string_map.end(), + [&](const std::pair& + name_and_enum_val) { return name_and_enum_val.second == ctype; }); + assert(iter != rocksdb::checksum_type_string_map.end()); + return iter->first; +} + std::vector SplitString(std::string src) { std::vector ret; if (src.empty()) { @@ -451,6 +472,9 @@ DEFINE_string(compression_type, "snappy", static enum rocksdb::CompressionType FLAGS_compression_type_e = rocksdb::kSnappyCompression; +DEFINE_string(checksum_type, "kCRC32c", "Algorithm to use to checksum blocks"); +static enum rocksdb::ChecksumType FLAGS_checksum_type_e = rocksdb::kCRC32c; + DEFINE_string(hdfs, "", "Name of hdfs environment"); // posix or hdfs environment static rocksdb::Env* FLAGS_env = rocksdb::Env::Default(); @@ -2128,6 +2152,8 @@ class StressTest { 1 << FLAGS_log2_keys_per_lock); std::string compression = CompressionTypeToString(FLAGS_compression_type_e); fprintf(stdout, "Compression : %s\n", compression.c_str()); + std::string checksum = ChecksumTypeToString(FLAGS_checksum_type_e); + fprintf(stdout, "Checksum type : %s\n", checksum.c_str()); fprintf(stdout, "Max subcompactions : %" PRIu64 "\n", FLAGS_subcompactions); @@ -2162,6 +2188,7 @@ class StressTest { BlockBasedTableOptions block_based_options; block_based_options.block_cache = cache_; block_based_options.block_cache_compressed = compressed_cache_; + block_based_options.checksum = FLAGS_checksum_type_e; block_based_options.block_size = FLAGS_block_size; block_based_options.format_version = 2; block_based_options.filter_policy = filter_policy_; @@ -2412,6 +2439,7 @@ int main(int argc, char** argv) { } FLAGS_compression_type_e = StringToCompressionType(FLAGS_compression_type.c_str()); + FLAGS_checksum_type_e = StringToChecksumType(FLAGS_checksum_type.c_str()); if (!FLAGS_hdfs.empty()) { FLAGS_env = new rocksdb::HdfsEnv(FLAGS_hdfs); }