From e8d332d97e2c3dcb9d85ff6be033875781da9ed4 Mon Sep 17 00:00:00 2001 From: Zhichao Cao Date: Sun, 29 Mar 2020 15:57:02 -0700 Subject: [PATCH] Use FileChecksumGenFactory for SST file checksum (#6600) Summary: In the current implementation, sst file checksum is calculated by a shared checksum function object, which may make some checksum function hard to be applied here such as SHA1. In this implementation, each sst file will have its own checksum generator obejct, created by FileChecksumGenFactory. User needs to implement its own FilechecksumGenerator and Factory to plugin the in checksum calculation method. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6600 Test Plan: tested with make asan_check Reviewed By: riversand963 Differential Revision: D20717670 Pulled By: zhichao-cao fbshipit-source-id: 2a74c1c280ac11a07a1980185b43b671acaa71c6 --- HISTORY.md | 2 + db/builder.cc | 11 ++-- db/compaction/compaction_job.cc | 13 ++-- file/writable_file_writer.cc | 27 +++++--- file/writable_file_writer.h | 24 ++++--- include/rocksdb/file_checksum.h | 44 ++++++++----- include/rocksdb/options.h | 10 +-- options/cf_options.cc | 2 +- options/cf_options.h | 2 +- options/db_options.cc | 8 +-- options/db_options.h | 2 +- options/options_helper.cc | 3 +- options/options_settable_test.cc | 4 +- .../block_based/block_based_table_builder.cc | 11 +++- table/block_based/block_based_table_builder.h | 5 +- table/cuckoo/cuckoo_table_builder.cc | 12 ++-- table/cuckoo/cuckoo_table_builder.h | 5 +- table/mock_table.h | 3 +- table/plain/plain_table_builder.cc | 12 ++-- table/plain/plain_table_builder.h | 5 +- table/table_builder.h | 2 +- table/table_test.cc | 65 ++++++++++++------- tools/ldb_cmd.cc | 3 +- tools/ldb_cmd_test.cc | 29 ++++----- util/file_checksum_helper.cc | 5 -- util/file_checksum_helper.h | 36 ++++++---- 26 files changed, 203 insertions(+), 142 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 85dce2cc6..b276a9e7d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,8 @@ * Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks. * `Cache::Insert` now expects clients to pass in function objects implementing the `Cache::Deleter` interface as deleters instead of plain function pointers. * A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`. +* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string. +* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string. ### Bug Fixes * Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed. diff --git a/db/builder.cc b/db/builder.cc index c78b3b618..0583aae3b 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -134,7 +134,7 @@ Status BuildTable( file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, env, ioptions.statistics, - ioptions.listeners, ioptions.sst_file_checksum_func)); + ioptions.listeners, ioptions.file_checksum_gen_factory)); builder = NewTableBuilder( ioptions, mutable_cf_options, internal_comparator, @@ -203,9 +203,6 @@ Status BuildTable( if (table_properties) { *table_properties = tp; } - // Add the checksum information to file metadata. - meta->file_checksum = builder->GetFileChecksum(); - meta->file_checksum_func_name = builder->GetFileChecksumFuncName(); } delete builder; @@ -217,6 +214,12 @@ Status BuildTable( if (io_status->ok() && !empty) { *io_status = file_writer->Close(); } + if (io_status->ok() && !empty) { + // Add the checksum information to file metadata. + meta->file_checksum = file_writer->GetFileChecksum(); + meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName(); + } + if (!io_status->ok()) { s = *io_status; } diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 84c8d08bf..a417585dd 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1309,11 +1309,6 @@ Status CompactionJob::FinishCompactionOutputFile( } const uint64_t current_bytes = sub_compact->builder->FileSize(); if (s.ok()) { - // Add the checksum information to file metadata. - meta->file_checksum = sub_compact->builder->GetFileChecksum(); - meta->file_checksum_func_name = - sub_compact->builder->GetFileChecksumFuncName(); - meta->fd.file_size = current_bytes; } sub_compact->current_output()->finished = true; @@ -1328,6 +1323,12 @@ Status CompactionJob::FinishCompactionOutputFile( if (io_s.ok()) { io_s = sub_compact->outfile->Close(); } + if (io_s.ok()) { + // Add the checksum information to file metadata. + meta->file_checksum = sub_compact->outfile->GetFileChecksum(); + meta->file_checksum_func_name = + sub_compact->outfile->GetFileChecksumFuncName(); + } if (!io_s.ok()) { io_status_ = io_s; s = io_s; @@ -1532,7 +1533,7 @@ Status CompactionJob::OpenCompactionOutputFile( sub_compact->outfile.reset( new WritableFileWriter(std::move(writable_file), fname, file_options_, env_, db_options_.statistics.get(), listeners, - db_options_.sst_file_checksum_func.get())); + db_options_.file_checksum_gen_factory.get())); // If the Column family flag is to only optimize filters for hits, // we can skip creating filters if this is the bottommost_level where diff --git a/file/writable_file_writer.cc b/file/writable_file_writer.cc index 87eef04ec..ec4e7c8c3 100644 --- a/file/writable_file_writer.cc +++ b/file/writable_file_writer.cc @@ -155,6 +155,11 @@ IOStatus WritableFileWriter::Close() { writable_file_.reset(); TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds); + if (s.ok() && checksum_generator_ != nullptr && !checksum_finalized_) { + checksum_generator_->Finalize(); + checksum_finalized_ = true; + } + return s; } @@ -216,9 +221,17 @@ IOStatus WritableFileWriter::Flush() { return s; } +std::string WritableFileWriter::GetFileChecksum() { + if (checksum_generator_ != nullptr) { + return checksum_generator_->GetChecksum(); + } else { + return kUnknownFileChecksum; + } +} + const char* WritableFileWriter::GetFileChecksumFuncName() const { - if (checksum_func_ != nullptr) { - return checksum_func_->Name(); + if (checksum_generator_ != nullptr) { + return checksum_generator_->Name(); } else { return kUnknownFileChecksumFuncName.c_str(); } @@ -332,14 +345,8 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) { } void WritableFileWriter::CalculateFileChecksum(const Slice& data) { - if (checksum_func_ != nullptr) { - if (is_first_checksum_) { - file_checksum_ = checksum_func_->Value(data.data(), data.size()); - is_first_checksum_ = false; - } else { - file_checksum_ = - checksum_func_->Extend(file_checksum_, data.data(), data.size()); - } + if (checksum_generator_ != nullptr) { + checksum_generator_->Update(data.data(), data.size()); } } diff --git a/file/writable_file_writer.h b/file/writable_file_writer.h index 1a76bc1f8..6f1d8987b 100644 --- a/file/writable_file_writer.h +++ b/file/writable_file_writer.h @@ -72,9 +72,8 @@ class WritableFileWriter { RateLimiter* rate_limiter_; Statistics* stats_; std::vector> listeners_; - FileChecksumFunc* checksum_func_; - std::string file_checksum_ = kUnknownFileChecksum; - bool is_first_checksum_ = true; + std::unique_ptr checksum_generator_; + bool checksum_finalized_; public: WritableFileWriter( @@ -82,7 +81,7 @@ class WritableFileWriter { const FileOptions& options, Env* env = nullptr, Statistics* stats = nullptr, const std::vector>& listeners = {}, - FileChecksumFunc* checksum_func = nullptr) + FileChecksumGenFactory* file_checksum_gen_factory = nullptr) : writable_file_(std::move(file)), file_name_(_file_name), env_(env), @@ -98,7 +97,8 @@ class WritableFileWriter { rate_limiter_(options.rate_limiter), stats_(stats), listeners_(), - checksum_func_(checksum_func) { + checksum_generator_(nullptr), + checksum_finalized_(false) { TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0", reinterpret_cast(max_buffer_size_)); buf_.Alignment(writable_file_->GetRequiredBufferAlignment()); @@ -113,6 +113,13 @@ class WritableFileWriter { #else // !ROCKSDB_LITE (void)listeners; #endif + if (file_checksum_gen_factory != nullptr) { + FileChecksumGenContext checksum_gen_context; + checksum_gen_context.file_name = _file_name; + checksum_generator_ = + file_checksum_gen_factory->CreateFileChecksumGenerator( + checksum_gen_context); + } } WritableFileWriter(const WritableFileWriter&) = delete; @@ -150,11 +157,12 @@ class WritableFileWriter { bool TEST_BufferIsEmpty() { return buf_.CurrentSize() == 0; } - void TEST_SetFileChecksumFunc(FileChecksumFunc* checksum_func) { - checksum_func_ = checksum_func; + void TEST_SetFileChecksumGenerator( + FileChecksumGenerator* checksum_generator) { + checksum_generator_.reset(checksum_generator); } - const std::string& GetFileChecksum() const { return file_checksum_; } + std::string GetFileChecksum(); const char* GetFileChecksumFuncName() const; diff --git a/include/rocksdb/file_checksum.h b/include/rocksdb/file_checksum.h index 35f54f40b..61975f0f3 100644 --- a/include/rocksdb/file_checksum.h +++ b/include/rocksdb/file_checksum.h @@ -18,27 +18,44 @@ namespace ROCKSDB_NAMESPACE { -// FileChecksumFunc is the function class to generates the checksum value +struct FileChecksumGenContext { + std::string file_name; +}; + +// FileChecksumGenerator is the class to generates the checksum value // for each file when the file is written to the file system. -class FileChecksumFunc { +class FileChecksumGenerator { public: - virtual ~FileChecksumFunc() {} - // Return the checksum of concat (A, data[0,n-1]) where init_checksum is the - // returned value of some string A. It is used to maintain the checksum of a - // stream of data - virtual std::string Extend(const std::string& init_checksum, const char* data, - size_t n) = 0; + virtual ~FileChecksumGenerator() {} + + // Update the current result after process the data. For different checksum + // functions, the temporal results may be stored and used in Update to + // include the new data. + virtual void Update(const char* data, size_t n) = 0; - // Return the checksum value of data[0,n-1] - virtual std::string Value(const char* data, size_t n) = 0; + // Generate the final results if no further new data will be updated. + virtual void Finalize() = 0; - // Return a processed value of the checksum for store in somewhere - virtual std::string ProcessChecksum(const std::string& checksum) = 0; + // Get the checksum + virtual std::string GetChecksum() const = 0; // Returns a name that identifies the current file checksum function. virtual const char* Name() const = 0; }; +// Create the FileChecksumGenerator object for each SST file. +class FileChecksumGenFactory { + public: + virtual ~FileChecksumGenFactory() {} + + // Create a new FileChecksumGenerator. + virtual std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) = 0; + + // Return the name of this FileChecksumGenFactory. + virtual const char* Name() const = 0; +}; + // FileChecksumList stores the checksum information of a list of files (e.g., // SST files). The FileChecksumLIst can be used to store the checksum // information of all SST file getting from the MANIFEST, which are @@ -80,7 +97,4 @@ class FileChecksumList { // Create a new file checksum list. extern FileChecksumList* NewFileChecksumList(); -// Create a Crc32c based file checksum function -extern FileChecksumFunc* CreateFileChecksumFuncCrc32c(); - } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index aa1245b9f..dbc68b7cf 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1126,12 +1126,14 @@ struct DBOptions { // Default: 0 size_t log_readahead_size = 0; - // If user does NOT provide SST file checksum function, the SST file checksum - // will NOT be used. The single checksum instance are shared by options and - // file writers. Make sure the algorithm is thread safe. + // If user does NOT provide the checksum generator factory, the file checksum + // will NOT be used. A new file checksum generator object will be created + // when a SST file is created. Therefore, each created FileChecksumGenerator + // will only be used from a single thread and so does not need to be + // thread-safe. // // Default: nullptr - std::shared_ptr sst_file_checksum_func = nullptr; + std::shared_ptr file_checksum_gen_factory = nullptr; // By default, RocksDB recovery fails if any table file referenced in // MANIFEST are missing after scanning the MANIFEST. diff --git a/options/cf_options.cc b/options/cf_options.cc index bc9e301e6..e34e1065d 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -78,7 +78,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options, cf_options.memtable_insert_with_hint_prefix_extractor.get()), cf_paths(cf_options.cf_paths), compaction_thread_limiter(cf_options.compaction_thread_limiter), - sst_file_checksum_func(db_options.sst_file_checksum_func.get()) {} + file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()) {} // Multiple two operands. If they overflow, return op1. uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) { diff --git a/options/cf_options.h b/options/cf_options.h index b53ed840f..3c2a62409 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -126,7 +126,7 @@ struct ImmutableCFOptions { std::shared_ptr compaction_thread_limiter; - FileChecksumFunc* sst_file_checksum_func; + FileChecksumGenFactory* file_checksum_gen_factory; }; struct MutableCFOptions { diff --git a/options/db_options.cc b/options/db_options.cc index 2e572963f..8ef147c5e 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -95,7 +95,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) persist_stats_to_disk(options.persist_stats_to_disk), write_dbid_to_manifest(options.write_dbid_to_manifest), log_readahead_size(options.log_readahead_size), - sst_file_checksum_func(options.sst_file_checksum_func), + file_checksum_gen_factory(options.file_checksum_gen_factory), best_efforts_recovery(options.best_efforts_recovery) { } @@ -247,9 +247,9 @@ void ImmutableDBOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER( log, " Options.log_readahead_size: %" ROCKSDB_PRIszt, log_readahead_size); - ROCKS_LOG_HEADER(log, " Options.sst_file_checksum_func: %s", - sst_file_checksum_func - ? sst_file_checksum_func->Name() + ROCKS_LOG_HEADER(log, " Options.file_checksum_gen_factory: %s", + file_checksum_gen_factory + ? file_checksum_gen_factory->Name() : kUnknownFileChecksumFuncName.c_str()); ROCKS_LOG_HEADER(log, " Options.best_efforts_recovery: %d", static_cast(best_efforts_recovery)); diff --git a/options/db_options.h b/options/db_options.h index 3ab6b73eb..66945f912 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -87,7 +87,7 @@ struct ImmutableDBOptions { bool persist_stats_to_disk; bool write_dbid_to_manifest; size_t log_readahead_size; - std::shared_ptr sst_file_checksum_func; + std::shared_ptr file_checksum_gen_factory; bool best_efforts_recovery; }; diff --git a/options/options_helper.cc b/options/options_helper.cc index 70b56460d..568f45281 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -143,7 +143,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.avoid_unnecessary_blocking_io = immutable_db_options.avoid_unnecessary_blocking_io; options.log_readahead_size = immutable_db_options.log_readahead_size; - options.sst_file_checksum_func = immutable_db_options.sst_file_checksum_func; + options.file_checksum_gen_factory = + immutable_db_options.file_checksum_gen_factory; options.best_efforts_recovery = immutable_db_options.best_efforts_recovery; return options; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 10c32707f..12a0ec9d8 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -197,8 +197,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { sizeof(std::vector>)}, {offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, - {offsetof(struct DBOptions, sst_file_checksum_func), - sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, file_checksum_gen_factory), + sizeof(std::shared_ptr)}, }; char* options_ptr = new char[sizeof(DBOptions)]; diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 9db331de9..2c2d5c967 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1167,9 +1167,6 @@ Status BlockBasedTableBuilder::Finish() { if (ok()) { WriteFooter(metaindex_block_handle, index_block_handle); } - if (r->file != nullptr) { - file_checksum_ = r->file->GetFileChecksum(); - } r->state = Rep::State::kClosed; return r->status; } @@ -1205,6 +1202,14 @@ TableProperties BlockBasedTableBuilder::GetTableProperties() const { return ret; } +std::string BlockBasedTableBuilder::GetFileChecksum() const { + if (rep_->file != nullptr) { + return rep_->file->GetFileChecksum(); + } else { + return kUnknownFileChecksum; + } +} + const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const { if (rep_->file != nullptr) { return rep_->file->GetFileChecksumFuncName(); diff --git a/table/block_based/block_based_table_builder.h b/table/block_based/block_based_table_builder.h index b85be99d8..353bc80ae 100644 --- a/table/block_based/block_based_table_builder.h +++ b/table/block_based/block_based_table_builder.h @@ -96,7 +96,7 @@ class BlockBasedTableBuilder : public TableBuilder { TableProperties GetTableProperties() const override; // Get file checksum - const std::string& GetFileChecksum() const override { return file_checksum_; } + std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; @@ -146,9 +146,6 @@ class BlockBasedTableBuilder : public TableBuilder { // Some compression libraries fail when the raw size is bigger than int. If // uncompressed size is bigger than kCompressionSizeLimit, don't compress it const uint64_t kCompressionSizeLimit = std::numeric_limits::max(); - - // Store file checksum. If checksum is disabled, its value is "0". - std::string file_checksum_ = kUnknownFileChecksum; }; Slice CompressBlock(const Slice& raw, const CompressionInfo& info, diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 7d3086a61..11a081274 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -390,10 +390,6 @@ Status CuckooTableBuilder::Finish() { std::string footer_encoding; footer.EncodeTo(&footer_encoding); io_status_ = file_->Append(footer_encoding); - - if (file_ != nullptr) { - file_checksum_ = file_->GetFileChecksum(); - } status_ = io_status_; return status_; } @@ -520,6 +516,14 @@ bool CuckooTableBuilder::MakeSpaceForKey( return null_found; } +std::string CuckooTableBuilder::GetFileChecksum() const { + if (file_ != nullptr) { + return file_->GetFileChecksum(); + } else { + return kUnknownFileChecksum; + } +} + const char* CuckooTableBuilder::GetFileChecksumFuncName() const { if (file_ != nullptr) { return file_->GetFileChecksumFuncName(); diff --git a/table/cuckoo/cuckoo_table_builder.h b/table/cuckoo/cuckoo_table_builder.h index ab2ff3c5f..bf2f78ee3 100644 --- a/table/cuckoo/cuckoo_table_builder.h +++ b/table/cuckoo/cuckoo_table_builder.h @@ -71,7 +71,7 @@ class CuckooTableBuilder: public TableBuilder { TableProperties GetTableProperties() const override { return properties_; } // Get file checksum - const std::string& GetFileChecksum() const override { return file_checksum_; } + std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; @@ -130,9 +130,6 @@ class CuckooTableBuilder: public TableBuilder { std::string smallest_user_key_ = ""; bool closed_; // Either Finish() or Abandon() has been called. - - // Store file checksum. If checksum is disabled, its value is "0" - std::string file_checksum_ = kUnknownFileChecksum; }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/mock_table.h b/table/mock_table.h index a99d6e4a9..798acb025 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -159,7 +159,7 @@ class MockTableBuilder : public TableBuilder { } // Get file checksum - const std::string& GetFileChecksum() const override { return file_checksum_; } + std::string GetFileChecksum() const override { return kUnknownFileChecksum; } // Get file checksum function name const char* GetFileChecksumFuncName() const override { return kUnknownFileChecksumFuncName.c_str(); @@ -169,7 +169,6 @@ class MockTableBuilder : public TableBuilder { uint32_t id_; MockTableFileSystem* file_system_; stl_wrappers::KVMap table_; - std::string file_checksum_ = kUnknownFileChecksum; }; class MockTableFactory : public TableFactory { diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index bc4a04d74..8a1fc5dd1 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -285,10 +285,6 @@ Status PlainTableBuilder::Finish() { if (io_status_.ok()) { offset_ += footer_encoding.size(); } - - if (file_ != nullptr) { - file_checksum_ = file_->GetFileChecksum(); - } status_ = io_status_; return status_; } @@ -305,6 +301,14 @@ uint64_t PlainTableBuilder::FileSize() const { return offset_; } +std::string PlainTableBuilder::GetFileChecksum() const { + if (file_ != nullptr) { + return file_->GetFileChecksum(); + } else { + return kUnknownFileChecksum; + } +} + const char* PlainTableBuilder::GetFileChecksumFuncName() const { if (file_ != nullptr) { return file_->GetFileChecksumFuncName(); diff --git a/table/plain/plain_table_builder.h b/table/plain/plain_table_builder.h index d1d9e7785..cf2c7582b 100644 --- a/table/plain/plain_table_builder.h +++ b/table/plain/plain_table_builder.h @@ -88,7 +88,7 @@ class PlainTableBuilder: public TableBuilder { bool SaveIndexInFile() const { return store_index_in_file_; } // Get file checksum - const std::string& GetFileChecksum() const override { return file_checksum_; } + std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; @@ -119,9 +119,6 @@ class PlainTableBuilder: public TableBuilder { const SliceTransform* prefix_extractor_; - // Store file checksum. If checksum is disabled, its value is "0". - std::string file_checksum_ = kUnknownFileChecksum; - Slice GetPrefix(const Slice& target) const { assert(target.size() >= 8); // target is internal key return GetPrefixFromUserKey(GetUserKey(target)); diff --git a/table/table_builder.h b/table/table_builder.h index 3e2aacb86..bb8dc4df2 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -164,7 +164,7 @@ class TableBuilder { virtual TableProperties GetTableProperties() const = 0; // Return file checksum - virtual const std::string& GetFileChecksum() const = 0; + virtual std::string GetFileChecksum() const = 0; // Return file checksum function name virtual const char* GetFileChecksumFuncName() const = 0; diff --git a/table/table_test.cc b/table/table_test.cc index e679f6c6e..0a6a810fc 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1187,9 +1187,9 @@ class FileChecksumTestHelper { file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */)); } - void SetFileChecksumFunc(FileChecksumFunc* checksum_func) { + void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) { if (file_writer_ != nullptr) { - file_writer_->TEST_SetFileChecksumFunc(checksum_func); + file_writer_->TEST_SetFileChecksumGenerator(checksum_generator); } } @@ -1230,15 +1230,18 @@ class FileChecksumTestHelper { return s; } - std::string GetFileChecksum() { return table_builder_->GetFileChecksum(); } + std::string GetFileChecksum() { + file_writer_->Close(); + return table_builder_->GetFileChecksum(); + } const char* GetFileChecksumFuncName() { return table_builder_->GetFileChecksumFuncName(); } - Status CalculateFileChecksum(FileChecksumFunc* file_checksum_func, + Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator, std::string* checksum) { - assert(file_checksum_func != nullptr); + assert(file_checksum_generator != nullptr); cur_uniq_id_ = checksum_uniq_id_++; test::StringSink* ss_rw = ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter( @@ -1248,8 +1251,6 @@ class FileChecksumTestHelper { std::unique_ptr scratch(new char[2048]); Slice result; uint64_t offset = 0; - std::string tmp_checksum; - bool first_read = true; Status s; s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr, false); @@ -1257,13 +1258,7 @@ class FileChecksumTestHelper { return s; } while (result.size() != 0) { - if (first_read) { - first_read = false; - tmp_checksum = file_checksum_func->Value(scratch.get(), result.size()); - } else { - tmp_checksum = file_checksum_func->Extend(tmp_checksum, scratch.get(), - result.size()); - } + file_checksum_generator->Update(scratch.get(), result.size()); offset += static_cast(result.size()); s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr, false); @@ -1272,7 +1267,8 @@ class FileChecksumTestHelper { } } EXPECT_EQ(offset, static_cast(table_builder_->FileSize())); - *checksum = tmp_checksum; + file_checksum_generator->Finalize(); + *checksum = file_checksum_generator->GetChecksum(); return Status::OK(); } @@ -3279,9 +3275,10 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) { } TEST_P(BlockBasedTableTest, Crc32FileChecksum) { + FileChecksumGenCrc32cFactory* file_checksum_gen_factory = + new FileChecksumGenCrc32cFactory(); Options options; - options.sst_file_checksum_func = - std::shared_ptr(CreateFileChecksumFuncCrc32c()); + options.file_checksum_gen_factory.reset(file_checksum_gen_factory); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); @@ -3300,9 +3297,14 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) { } std::string column_family_name; + FileChecksumGenContext gen_context; + gen_context.file_name = "db/tmp"; + std::unique_ptr checksum_crc32_gen1 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); - f.SetFileChecksumFunc(options.sst_file_checksum_func.get()); + f.SetFileChecksumGenerator(checksum_crc32_gen1.release()); std::unique_ptr builder; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, *comparator, @@ -3316,9 +3318,12 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) { f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); + + std::unique_ptr checksum_crc32_gen2 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); std::string checksum; - ASSERT_OK( - f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum)); + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum)); ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); } @@ -3420,9 +3425,10 @@ TEST_F(PlainTableTest, Crc32FileChecksum) { plain_table_options.hash_table_ratio = 0; PlainTableFactory factory(plain_table_options); + FileChecksumGenCrc32cFactory* file_checksum_gen_factory = + new FileChecksumGenCrc32cFactory(); Options options; - options.sst_file_checksum_func = - std::shared_ptr(CreateFileChecksumFuncCrc32c()); + options.file_checksum_gen_factory.reset(file_checksum_gen_factory); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); @@ -3430,9 +3436,15 @@ TEST_F(PlainTableTest, Crc32FileChecksum) { int_tbl_prop_collector_factories; std::string column_family_name; int unknown_level = -1; + + FileChecksumGenContext gen_context; + gen_context.file_name = "db/tmp"; + std::unique_ptr checksum_crc32_gen1 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); - f.SetFileChecksumFunc(options.sst_file_checksum_func.get()); + f.SetFileChecksumGenerator(checksum_crc32_gen1.release()); std::unique_ptr builder(factory.NewTableBuilder( TableBuilderOptions( @@ -3445,9 +3457,12 @@ TEST_F(PlainTableTest, Crc32FileChecksum) { f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); + + std::unique_ptr checksum_crc32_gen2 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); std::string checksum; - ASSERT_OK( - f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum)); + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum)); EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); } diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 0271c9027..bfcd118bf 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -48,7 +48,8 @@ namespace ROCKSDB_NAMESPACE { -class FileChecksumFuncCrc32c; +class FileChecksumGenCrc32c; +class FileChecksumGenCrc32cFactory; const std::string LDBCommand::ARG_ENV_URI = "env_uri"; const std::string LDBCommand::ARG_DB = "db"; diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index cc370f4c8..db3f7e50a 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -126,33 +126,31 @@ class FileChecksumTestHelper { return s; } std::unique_ptr scratch(new char[2048]); - bool first_read = true; Slice result; - FileChecksumFunc* file_checksum_func = - options_.sst_file_checksum_func.get(); - if (file_checksum_func == nullptr) { + FileChecksumGenFactory* file_checksum_gen_factory = + options_.file_checksum_gen_factory.get(); + if (file_checksum_gen_factory == nullptr) { cur_checksum = kUnknownFileChecksum; checksum_func_name = kUnknownFileChecksumFuncName; } else { - checksum_func_name = file_checksum_func->Name(); + FileChecksumGenContext gen_context; + gen_context.file_name = file_meta.name; + std::unique_ptr file_checksum_gen = + file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context); + checksum_func_name = file_checksum_gen->Name(); s = file_reader->Read(2048, &result, scratch.get()); if (!s.ok()) { return s; } while (result.size() != 0) { - if (first_read) { - first_read = false; - cur_checksum = - file_checksum_func->Value(scratch.get(), result.size()); - } else { - cur_checksum = file_checksum_func->Extend(cur_checksum, scratch.get(), - result.size()); - } + file_checksum_gen->Update(scratch.get(), result.size()); s = file_reader->Read(2048, &result, scratch.get()); if (!s.ok()) { return s; } } + file_checksum_gen->Finalize(); + cur_checksum = file_checksum_gen->GetChecksum(); } std::string stored_checksum = file_meta.file_checksum; @@ -346,8 +344,9 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) { Options opts; opts.env = env.get(); opts.create_if_missing = true; - opts.sst_file_checksum_func = - std::shared_ptr(CreateFileChecksumFuncCrc32c()); + FileChecksumGenCrc32cFactory* file_checksum_gen_factory = + new FileChecksumGenCrc32cFactory(); + opts.file_checksum_gen_factory.reset(file_checksum_gen_factory); DB* db = nullptr; std::string dbname = test::TmpDir(); diff --git a/util/file_checksum_helper.cc b/util/file_checksum_helper.cc index 51dcf6a82..5a8b6ded9 100644 --- a/util/file_checksum_helper.cc +++ b/util/file_checksum_helper.cc @@ -77,9 +77,4 @@ FileChecksumList* NewFileChecksumList() { return checksum_list; } -FileChecksumFunc* CreateFileChecksumFuncCrc32c() { - FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c(); - return file_checksum_crc32c; -} - } // namespace ROCKSDB_NAMESPACE diff --git a/util/file_checksum_helper.h b/util/file_checksum_helper.h index 7ad9ea732..f83b80912 100644 --- a/util/file_checksum_helper.h +++ b/util/file_checksum_helper.h @@ -16,24 +16,19 @@ namespace ROCKSDB_NAMESPACE { // This is the class to generate the file checksum based on Crc32. It // will be used as the default checksum method for SST file checksum -class FileChecksumFuncCrc32c : public FileChecksumFunc { +class FileChecksumGenCrc32c : public FileChecksumGenerator { public: - std::string Extend(const std::string& init_checksum, const char* data, - size_t n) override { - assert(data != nullptr); - uint32_t checksum_value = StringToUint32(init_checksum); - return Uint32ToString(crc32c::Extend(checksum_value, data, n)); + FileChecksumGenCrc32c(const FileChecksumGenContext& /*context*/) { + checksum_ = 0; } - std::string Value(const char* data, size_t n) override { - assert(data != nullptr); - return Uint32ToString(crc32c::Value(data, n)); + void Update(const char* data, size_t n) override { + checksum_ = crc32c::Extend(checksum_, data, n); } - std::string ProcessChecksum(const std::string& checksum) override { - uint32_t checksum_value = StringToUint32(checksum); - return Uint32ToString(crc32c::Mask(checksum_value)); - } + void Finalize() override { checksum_str_ = Uint32ToString(checksum_); } + + std::string GetChecksum() const override { return checksum_str_; } const char* Name() const override { return "FileChecksumCrc32c"; } @@ -84,6 +79,21 @@ class FileChecksumFuncCrc32c : public FileChecksumFunc { } return v; } + + private: + uint32_t checksum_; + std::string checksum_str_; +}; + +class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + return std::unique_ptr( + new FileChecksumGenCrc32c(context)); + } + + const char* Name() const override { return "FileChecksumGenCrc32cFactory"; } }; // The default implementaion of FileChecksumList