Use FileChecksumGenFactory for SST file checksum (#6600)

Summary:
In the current implementation, sst file checksum is calculated by a shared checksum function object, which may make some checksum function hard to be applied here such as SHA1. In this implementation, each sst file will have its own checksum generator obejct, created by FileChecksumGenFactory. User needs to implement its own FilechecksumGenerator and Factory to plugin the in checksum calculation method.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6600

Test Plan: tested with make asan_check

Reviewed By: riversand963

Differential Revision: D20717670

Pulled By: zhichao-cao

fbshipit-source-id: 2a74c1c280ac11a07a1980185b43b671acaa71c6
main
Zhichao Cao 5 years ago committed by Facebook GitHub Bot
parent ee50b8d499
commit e8d332d97e
  1. 2
      HISTORY.md
  2. 11
      db/builder.cc
  3. 13
      db/compaction/compaction_job.cc
  4. 27
      file/writable_file_writer.cc
  5. 24
      file/writable_file_writer.h
  6. 44
      include/rocksdb/file_checksum.h
  7. 10
      include/rocksdb/options.h
  8. 2
      options/cf_options.cc
  9. 2
      options/cf_options.h
  10. 8
      options/db_options.cc
  11. 2
      options/db_options.h
  12. 3
      options/options_helper.cc
  13. 4
      options/options_settable_test.cc
  14. 11
      table/block_based/block_based_table_builder.cc
  15. 5
      table/block_based/block_based_table_builder.h
  16. 12
      table/cuckoo/cuckoo_table_builder.cc
  17. 5
      table/cuckoo/cuckoo_table_builder.h
  18. 3
      table/mock_table.h
  19. 12
      table/plain/plain_table_builder.cc
  20. 5
      table/plain/plain_table_builder.h
  21. 2
      table/table_builder.h
  22. 65
      table/table_test.cc
  23. 3
      tools/ldb_cmd.cc
  24. 29
      tools/ldb_cmd_test.cc
  25. 5
      util/file_checksum_helper.cc
  26. 36
      util/file_checksum_helper.h

@ -5,6 +5,8 @@
* Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks. * Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks.
* `Cache::Insert` now expects clients to pass in function objects implementing the `Cache::Deleter` interface as deleters instead of plain function pointers. * `Cache::Insert` now expects clients to pass in function objects implementing the `Cache::Deleter` interface as deleters instead of plain function pointers.
* A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`. * A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`.
* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string.
* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string.
### Bug Fixes ### Bug Fixes
* Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed. * Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed.

@ -134,7 +134,7 @@ Status BuildTable(
file_writer.reset(new WritableFileWriter( file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, env, ioptions.statistics, std::move(file), fname, file_options, env, ioptions.statistics,
ioptions.listeners, ioptions.sst_file_checksum_func)); ioptions.listeners, ioptions.file_checksum_gen_factory));
builder = NewTableBuilder( builder = NewTableBuilder(
ioptions, mutable_cf_options, internal_comparator, ioptions, mutable_cf_options, internal_comparator,
@ -203,9 +203,6 @@ Status BuildTable(
if (table_properties) { if (table_properties) {
*table_properties = tp; *table_properties = tp;
} }
// Add the checksum information to file metadata.
meta->file_checksum = builder->GetFileChecksum();
meta->file_checksum_func_name = builder->GetFileChecksumFuncName();
} }
delete builder; delete builder;
@ -217,6 +214,12 @@ Status BuildTable(
if (io_status->ok() && !empty) { if (io_status->ok() && !empty) {
*io_status = file_writer->Close(); *io_status = file_writer->Close();
} }
if (io_status->ok() && !empty) {
// Add the checksum information to file metadata.
meta->file_checksum = file_writer->GetFileChecksum();
meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName();
}
if (!io_status->ok()) { if (!io_status->ok()) {
s = *io_status; s = *io_status;
} }

@ -1309,11 +1309,6 @@ Status CompactionJob::FinishCompactionOutputFile(
} }
const uint64_t current_bytes = sub_compact->builder->FileSize(); const uint64_t current_bytes = sub_compact->builder->FileSize();
if (s.ok()) { if (s.ok()) {
// Add the checksum information to file metadata.
meta->file_checksum = sub_compact->builder->GetFileChecksum();
meta->file_checksum_func_name =
sub_compact->builder->GetFileChecksumFuncName();
meta->fd.file_size = current_bytes; meta->fd.file_size = current_bytes;
} }
sub_compact->current_output()->finished = true; sub_compact->current_output()->finished = true;
@ -1328,6 +1323,12 @@ Status CompactionJob::FinishCompactionOutputFile(
if (io_s.ok()) { if (io_s.ok()) {
io_s = sub_compact->outfile->Close(); io_s = sub_compact->outfile->Close();
} }
if (io_s.ok()) {
// Add the checksum information to file metadata.
meta->file_checksum = sub_compact->outfile->GetFileChecksum();
meta->file_checksum_func_name =
sub_compact->outfile->GetFileChecksumFuncName();
}
if (!io_s.ok()) { if (!io_s.ok()) {
io_status_ = io_s; io_status_ = io_s;
s = io_s; s = io_s;
@ -1532,7 +1533,7 @@ Status CompactionJob::OpenCompactionOutputFile(
sub_compact->outfile.reset( sub_compact->outfile.reset(
new WritableFileWriter(std::move(writable_file), fname, file_options_, new WritableFileWriter(std::move(writable_file), fname, file_options_,
env_, db_options_.statistics.get(), listeners, env_, db_options_.statistics.get(), listeners,
db_options_.sst_file_checksum_func.get())); db_options_.file_checksum_gen_factory.get()));
// If the Column family flag is to only optimize filters for hits, // If the Column family flag is to only optimize filters for hits,
// we can skip creating filters if this is the bottommost_level where // we can skip creating filters if this is the bottommost_level where

@ -155,6 +155,11 @@ IOStatus WritableFileWriter::Close() {
writable_file_.reset(); writable_file_.reset();
TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds); TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds);
if (s.ok() && checksum_generator_ != nullptr && !checksum_finalized_) {
checksum_generator_->Finalize();
checksum_finalized_ = true;
}
return s; return s;
} }
@ -216,9 +221,17 @@ IOStatus WritableFileWriter::Flush() {
return s; return s;
} }
std::string WritableFileWriter::GetFileChecksum() {
if (checksum_generator_ != nullptr) {
return checksum_generator_->GetChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* WritableFileWriter::GetFileChecksumFuncName() const { const char* WritableFileWriter::GetFileChecksumFuncName() const {
if (checksum_func_ != nullptr) { if (checksum_generator_ != nullptr) {
return checksum_func_->Name(); return checksum_generator_->Name();
} else { } else {
return kUnknownFileChecksumFuncName.c_str(); return kUnknownFileChecksumFuncName.c_str();
} }
@ -332,14 +345,8 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
} }
void WritableFileWriter::CalculateFileChecksum(const Slice& data) { void WritableFileWriter::CalculateFileChecksum(const Slice& data) {
if (checksum_func_ != nullptr) { if (checksum_generator_ != nullptr) {
if (is_first_checksum_) { checksum_generator_->Update(data.data(), data.size());
file_checksum_ = checksum_func_->Value(data.data(), data.size());
is_first_checksum_ = false;
} else {
file_checksum_ =
checksum_func_->Extend(file_checksum_, data.data(), data.size());
}
} }
} }

@ -72,9 +72,8 @@ class WritableFileWriter {
RateLimiter* rate_limiter_; RateLimiter* rate_limiter_;
Statistics* stats_; Statistics* stats_;
std::vector<std::shared_ptr<EventListener>> listeners_; std::vector<std::shared_ptr<EventListener>> listeners_;
FileChecksumFunc* checksum_func_; std::unique_ptr<FileChecksumGenerator> checksum_generator_;
std::string file_checksum_ = kUnknownFileChecksum; bool checksum_finalized_;
bool is_first_checksum_ = true;
public: public:
WritableFileWriter( WritableFileWriter(
@ -82,7 +81,7 @@ class WritableFileWriter {
const FileOptions& options, Env* env = nullptr, const FileOptions& options, Env* env = nullptr,
Statistics* stats = nullptr, Statistics* stats = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {}, const std::vector<std::shared_ptr<EventListener>>& listeners = {},
FileChecksumFunc* checksum_func = nullptr) FileChecksumGenFactory* file_checksum_gen_factory = nullptr)
: writable_file_(std::move(file)), : writable_file_(std::move(file)),
file_name_(_file_name), file_name_(_file_name),
env_(env), env_(env),
@ -98,7 +97,8 @@ class WritableFileWriter {
rate_limiter_(options.rate_limiter), rate_limiter_(options.rate_limiter),
stats_(stats), stats_(stats),
listeners_(), listeners_(),
checksum_func_(checksum_func) { checksum_generator_(nullptr),
checksum_finalized_(false) {
TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0", TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0",
reinterpret_cast<void*>(max_buffer_size_)); reinterpret_cast<void*>(max_buffer_size_));
buf_.Alignment(writable_file_->GetRequiredBufferAlignment()); buf_.Alignment(writable_file_->GetRequiredBufferAlignment());
@ -113,6 +113,13 @@ class WritableFileWriter {
#else // !ROCKSDB_LITE #else // !ROCKSDB_LITE
(void)listeners; (void)listeners;
#endif #endif
if (file_checksum_gen_factory != nullptr) {
FileChecksumGenContext checksum_gen_context;
checksum_gen_context.file_name = _file_name;
checksum_generator_ =
file_checksum_gen_factory->CreateFileChecksumGenerator(
checksum_gen_context);
}
} }
WritableFileWriter(const WritableFileWriter&) = delete; WritableFileWriter(const WritableFileWriter&) = delete;
@ -150,11 +157,12 @@ class WritableFileWriter {
bool TEST_BufferIsEmpty() { return buf_.CurrentSize() == 0; } bool TEST_BufferIsEmpty() { return buf_.CurrentSize() == 0; }
void TEST_SetFileChecksumFunc(FileChecksumFunc* checksum_func) { void TEST_SetFileChecksumGenerator(
checksum_func_ = checksum_func; FileChecksumGenerator* checksum_generator) {
checksum_generator_.reset(checksum_generator);
} }
const std::string& GetFileChecksum() const { return file_checksum_; } std::string GetFileChecksum();
const char* GetFileChecksumFuncName() const; const char* GetFileChecksumFuncName() const;

@ -18,27 +18,44 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// FileChecksumFunc is the function class to generates the checksum value struct FileChecksumGenContext {
std::string file_name;
};
// FileChecksumGenerator is the class to generates the checksum value
// for each file when the file is written to the file system. // for each file when the file is written to the file system.
class FileChecksumFunc { class FileChecksumGenerator {
public: public:
virtual ~FileChecksumFunc() {} virtual ~FileChecksumGenerator() {}
// Return the checksum of concat (A, data[0,n-1]) where init_checksum is the
// returned value of some string A. It is used to maintain the checksum of a // Update the current result after process the data. For different checksum
// stream of data // functions, the temporal results may be stored and used in Update to
virtual std::string Extend(const std::string& init_checksum, const char* data, // include the new data.
size_t n) = 0; virtual void Update(const char* data, size_t n) = 0;
// Return the checksum value of data[0,n-1] // Generate the final results if no further new data will be updated.
virtual std::string Value(const char* data, size_t n) = 0; virtual void Finalize() = 0;
// Return a processed value of the checksum for store in somewhere // Get the checksum
virtual std::string ProcessChecksum(const std::string& checksum) = 0; virtual std::string GetChecksum() const = 0;
// Returns a name that identifies the current file checksum function. // Returns a name that identifies the current file checksum function.
virtual const char* Name() const = 0; virtual const char* Name() const = 0;
}; };
// Create the FileChecksumGenerator object for each SST file.
class FileChecksumGenFactory {
public:
virtual ~FileChecksumGenFactory() {}
// Create a new FileChecksumGenerator.
virtual std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) = 0;
// Return the name of this FileChecksumGenFactory.
virtual const char* Name() const = 0;
};
// FileChecksumList stores the checksum information of a list of files (e.g., // FileChecksumList stores the checksum information of a list of files (e.g.,
// SST files). The FileChecksumLIst can be used to store the checksum // SST files). The FileChecksumLIst can be used to store the checksum
// information of all SST file getting from the MANIFEST, which are // information of all SST file getting from the MANIFEST, which are
@ -80,7 +97,4 @@ class FileChecksumList {
// Create a new file checksum list. // Create a new file checksum list.
extern FileChecksumList* NewFileChecksumList(); extern FileChecksumList* NewFileChecksumList();
// Create a Crc32c based file checksum function
extern FileChecksumFunc* CreateFileChecksumFuncCrc32c();
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -1126,12 +1126,14 @@ struct DBOptions {
// Default: 0 // Default: 0
size_t log_readahead_size = 0; size_t log_readahead_size = 0;
// If user does NOT provide SST file checksum function, the SST file checksum // If user does NOT provide the checksum generator factory, the file checksum
// will NOT be used. The single checksum instance are shared by options and // will NOT be used. A new file checksum generator object will be created
// file writers. Make sure the algorithm is thread safe. // when a SST file is created. Therefore, each created FileChecksumGenerator
// will only be used from a single thread and so does not need to be
// thread-safe.
// //
// Default: nullptr // Default: nullptr
std::shared_ptr<FileChecksumFunc> sst_file_checksum_func = nullptr; std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
// By default, RocksDB recovery fails if any table file referenced in // By default, RocksDB recovery fails if any table file referenced in
// MANIFEST are missing after scanning the MANIFEST. // MANIFEST are missing after scanning the MANIFEST.

@ -78,7 +78,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
cf_options.memtable_insert_with_hint_prefix_extractor.get()), cf_options.memtable_insert_with_hint_prefix_extractor.get()),
cf_paths(cf_options.cf_paths), cf_paths(cf_options.cf_paths),
compaction_thread_limiter(cf_options.compaction_thread_limiter), compaction_thread_limiter(cf_options.compaction_thread_limiter),
sst_file_checksum_func(db_options.sst_file_checksum_func.get()) {} file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()) {}
// Multiple two operands. If they overflow, return op1. // Multiple two operands. If they overflow, return op1.
uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) { uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) {

@ -126,7 +126,7 @@ struct ImmutableCFOptions {
std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter; std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
FileChecksumFunc* sst_file_checksum_func; FileChecksumGenFactory* file_checksum_gen_factory;
}; };
struct MutableCFOptions { struct MutableCFOptions {

@ -95,7 +95,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
persist_stats_to_disk(options.persist_stats_to_disk), persist_stats_to_disk(options.persist_stats_to_disk),
write_dbid_to_manifest(options.write_dbid_to_manifest), write_dbid_to_manifest(options.write_dbid_to_manifest),
log_readahead_size(options.log_readahead_size), log_readahead_size(options.log_readahead_size),
sst_file_checksum_func(options.sst_file_checksum_func), file_checksum_gen_factory(options.file_checksum_gen_factory),
best_efforts_recovery(options.best_efforts_recovery) { best_efforts_recovery(options.best_efforts_recovery) {
} }
@ -247,9 +247,9 @@ void ImmutableDBOptions::Dump(Logger* log) const {
ROCKS_LOG_HEADER( ROCKS_LOG_HEADER(
log, " Options.log_readahead_size: %" ROCKSDB_PRIszt, log, " Options.log_readahead_size: %" ROCKSDB_PRIszt,
log_readahead_size); log_readahead_size);
ROCKS_LOG_HEADER(log, " Options.sst_file_checksum_func: %s", ROCKS_LOG_HEADER(log, " Options.file_checksum_gen_factory: %s",
sst_file_checksum_func file_checksum_gen_factory
? sst_file_checksum_func->Name() ? file_checksum_gen_factory->Name()
: kUnknownFileChecksumFuncName.c_str()); : kUnknownFileChecksumFuncName.c_str());
ROCKS_LOG_HEADER(log, " Options.best_efforts_recovery: %d", ROCKS_LOG_HEADER(log, " Options.best_efforts_recovery: %d",
static_cast<int>(best_efforts_recovery)); static_cast<int>(best_efforts_recovery));

@ -87,7 +87,7 @@ struct ImmutableDBOptions {
bool persist_stats_to_disk; bool persist_stats_to_disk;
bool write_dbid_to_manifest; bool write_dbid_to_manifest;
size_t log_readahead_size; size_t log_readahead_size;
std::shared_ptr<FileChecksumFunc> sst_file_checksum_func; std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
bool best_efforts_recovery; bool best_efforts_recovery;
}; };

@ -143,7 +143,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.avoid_unnecessary_blocking_io = options.avoid_unnecessary_blocking_io =
immutable_db_options.avoid_unnecessary_blocking_io; immutable_db_options.avoid_unnecessary_blocking_io;
options.log_readahead_size = immutable_db_options.log_readahead_size; options.log_readahead_size = immutable_db_options.log_readahead_size;
options.sst_file_checksum_func = immutable_db_options.sst_file_checksum_func; options.file_checksum_gen_factory =
immutable_db_options.file_checksum_gen_factory;
options.best_efforts_recovery = immutable_db_options.best_efforts_recovery; options.best_efforts_recovery = immutable_db_options.best_efforts_recovery;
return options; return options;
} }

@ -197,8 +197,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
sizeof(std::vector<std::shared_ptr<EventListener>>)}, sizeof(std::vector<std::shared_ptr<EventListener>>)},
{offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr<Cache>)}, {offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr<Cache>)},
{offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)},
{offsetof(struct DBOptions, sst_file_checksum_func), {offsetof(struct DBOptions, file_checksum_gen_factory),
sizeof(std::shared_ptr<FileChecksumFunc>)}, sizeof(std::shared_ptr<FileChecksumGenFactory>)},
}; };
char* options_ptr = new char[sizeof(DBOptions)]; char* options_ptr = new char[sizeof(DBOptions)];

@ -1167,9 +1167,6 @@ Status BlockBasedTableBuilder::Finish() {
if (ok()) { if (ok()) {
WriteFooter(metaindex_block_handle, index_block_handle); WriteFooter(metaindex_block_handle, index_block_handle);
} }
if (r->file != nullptr) {
file_checksum_ = r->file->GetFileChecksum();
}
r->state = Rep::State::kClosed; r->state = Rep::State::kClosed;
return r->status; return r->status;
} }
@ -1205,6 +1202,14 @@ TableProperties BlockBasedTableBuilder::GetTableProperties() const {
return ret; return ret;
} }
std::string BlockBasedTableBuilder::GetFileChecksum() const {
if (rep_->file != nullptr) {
return rep_->file->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const { const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const {
if (rep_->file != nullptr) { if (rep_->file != nullptr) {
return rep_->file->GetFileChecksumFuncName(); return rep_->file->GetFileChecksumFuncName();

@ -96,7 +96,7 @@ class BlockBasedTableBuilder : public TableBuilder {
TableProperties GetTableProperties() const override; TableProperties GetTableProperties() const override;
// Get file checksum // Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; } std::string GetFileChecksum() const override;
// Get file checksum function name // Get file checksum function name
const char* GetFileChecksumFuncName() const override; const char* GetFileChecksumFuncName() const override;
@ -146,9 +146,6 @@ class BlockBasedTableBuilder : public TableBuilder {
// Some compression libraries fail when the raw size is bigger than int. If // Some compression libraries fail when the raw size is bigger than int. If
// uncompressed size is bigger than kCompressionSizeLimit, don't compress it // uncompressed size is bigger than kCompressionSizeLimit, don't compress it
const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max(); const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max();
// Store file checksum. If checksum is disabled, its value is "0".
std::string file_checksum_ = kUnknownFileChecksum;
}; };
Slice CompressBlock(const Slice& raw, const CompressionInfo& info, Slice CompressBlock(const Slice& raw, const CompressionInfo& info,

@ -390,10 +390,6 @@ Status CuckooTableBuilder::Finish() {
std::string footer_encoding; std::string footer_encoding;
footer.EncodeTo(&footer_encoding); footer.EncodeTo(&footer_encoding);
io_status_ = file_->Append(footer_encoding); io_status_ = file_->Append(footer_encoding);
if (file_ != nullptr) {
file_checksum_ = file_->GetFileChecksum();
}
status_ = io_status_; status_ = io_status_;
return status_; return status_;
} }
@ -520,6 +516,14 @@ bool CuckooTableBuilder::MakeSpaceForKey(
return null_found; return null_found;
} }
std::string CuckooTableBuilder::GetFileChecksum() const {
if (file_ != nullptr) {
return file_->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* CuckooTableBuilder::GetFileChecksumFuncName() const { const char* CuckooTableBuilder::GetFileChecksumFuncName() const {
if (file_ != nullptr) { if (file_ != nullptr) {
return file_->GetFileChecksumFuncName(); return file_->GetFileChecksumFuncName();

@ -71,7 +71,7 @@ class CuckooTableBuilder: public TableBuilder {
TableProperties GetTableProperties() const override { return properties_; } TableProperties GetTableProperties() const override { return properties_; }
// Get file checksum // Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; } std::string GetFileChecksum() const override;
// Get file checksum function name // Get file checksum function name
const char* GetFileChecksumFuncName() const override; const char* GetFileChecksumFuncName() const override;
@ -130,9 +130,6 @@ class CuckooTableBuilder: public TableBuilder {
std::string smallest_user_key_ = ""; std::string smallest_user_key_ = "";
bool closed_; // Either Finish() or Abandon() has been called. bool closed_; // Either Finish() or Abandon() has been called.
// Store file checksum. If checksum is disabled, its value is "0"
std::string file_checksum_ = kUnknownFileChecksum;
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -159,7 +159,7 @@ class MockTableBuilder : public TableBuilder {
} }
// Get file checksum // Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; } std::string GetFileChecksum() const override { return kUnknownFileChecksum; }
// Get file checksum function name // Get file checksum function name
const char* GetFileChecksumFuncName() const override { const char* GetFileChecksumFuncName() const override {
return kUnknownFileChecksumFuncName.c_str(); return kUnknownFileChecksumFuncName.c_str();
@ -169,7 +169,6 @@ class MockTableBuilder : public TableBuilder {
uint32_t id_; uint32_t id_;
MockTableFileSystem* file_system_; MockTableFileSystem* file_system_;
stl_wrappers::KVMap table_; stl_wrappers::KVMap table_;
std::string file_checksum_ = kUnknownFileChecksum;
}; };
class MockTableFactory : public TableFactory { class MockTableFactory : public TableFactory {

@ -285,10 +285,6 @@ Status PlainTableBuilder::Finish() {
if (io_status_.ok()) { if (io_status_.ok()) {
offset_ += footer_encoding.size(); offset_ += footer_encoding.size();
} }
if (file_ != nullptr) {
file_checksum_ = file_->GetFileChecksum();
}
status_ = io_status_; status_ = io_status_;
return status_; return status_;
} }
@ -305,6 +301,14 @@ uint64_t PlainTableBuilder::FileSize() const {
return offset_; return offset_;
} }
std::string PlainTableBuilder::GetFileChecksum() const {
if (file_ != nullptr) {
return file_->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* PlainTableBuilder::GetFileChecksumFuncName() const { const char* PlainTableBuilder::GetFileChecksumFuncName() const {
if (file_ != nullptr) { if (file_ != nullptr) {
return file_->GetFileChecksumFuncName(); return file_->GetFileChecksumFuncName();

@ -88,7 +88,7 @@ class PlainTableBuilder: public TableBuilder {
bool SaveIndexInFile() const { return store_index_in_file_; } bool SaveIndexInFile() const { return store_index_in_file_; }
// Get file checksum // Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; } std::string GetFileChecksum() const override;
// Get file checksum function name // Get file checksum function name
const char* GetFileChecksumFuncName() const override; const char* GetFileChecksumFuncName() const override;
@ -119,9 +119,6 @@ class PlainTableBuilder: public TableBuilder {
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
// Store file checksum. If checksum is disabled, its value is "0".
std::string file_checksum_ = kUnknownFileChecksum;
Slice GetPrefix(const Slice& target) const { Slice GetPrefix(const Slice& target) const {
assert(target.size() >= 8); // target is internal key assert(target.size() >= 8); // target is internal key
return GetPrefixFromUserKey(GetUserKey(target)); return GetPrefixFromUserKey(GetUserKey(target));

@ -164,7 +164,7 @@ class TableBuilder {
virtual TableProperties GetTableProperties() const = 0; virtual TableProperties GetTableProperties() const = 0;
// Return file checksum // Return file checksum
virtual const std::string& GetFileChecksum() const = 0; virtual std::string GetFileChecksum() const = 0;
// Return file checksum function name // Return file checksum function name
virtual const char* GetFileChecksumFuncName() const = 0; virtual const char* GetFileChecksumFuncName() const = 0;

@ -1187,9 +1187,9 @@ class FileChecksumTestHelper {
file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */)); file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */));
} }
void SetFileChecksumFunc(FileChecksumFunc* checksum_func) { void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
if (file_writer_ != nullptr) { if (file_writer_ != nullptr) {
file_writer_->TEST_SetFileChecksumFunc(checksum_func); file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
} }
} }
@ -1230,15 +1230,18 @@ class FileChecksumTestHelper {
return s; return s;
} }
std::string GetFileChecksum() { return table_builder_->GetFileChecksum(); } std::string GetFileChecksum() {
file_writer_->Close();
return table_builder_->GetFileChecksum();
}
const char* GetFileChecksumFuncName() { const char* GetFileChecksumFuncName() {
return table_builder_->GetFileChecksumFuncName(); return table_builder_->GetFileChecksumFuncName();
} }
Status CalculateFileChecksum(FileChecksumFunc* file_checksum_func, Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
std::string* checksum) { std::string* checksum) {
assert(file_checksum_func != nullptr); assert(file_checksum_generator != nullptr);
cur_uniq_id_ = checksum_uniq_id_++; cur_uniq_id_ = checksum_uniq_id_++;
test::StringSink* ss_rw = test::StringSink* ss_rw =
ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter( ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
@ -1248,8 +1251,6 @@ class FileChecksumTestHelper {
std::unique_ptr<char[]> scratch(new char[2048]); std::unique_ptr<char[]> scratch(new char[2048]);
Slice result; Slice result;
uint64_t offset = 0; uint64_t offset = 0;
std::string tmp_checksum;
bool first_read = true;
Status s; Status s;
s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr, s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false); false);
@ -1257,13 +1258,7 @@ class FileChecksumTestHelper {
return s; return s;
} }
while (result.size() != 0) { while (result.size() != 0) {
if (first_read) { file_checksum_generator->Update(scratch.get(), result.size());
first_read = false;
tmp_checksum = file_checksum_func->Value(scratch.get(), result.size());
} else {
tmp_checksum = file_checksum_func->Extend(tmp_checksum, scratch.get(),
result.size());
}
offset += static_cast<uint64_t>(result.size()); offset += static_cast<uint64_t>(result.size());
s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr, s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false); false);
@ -1272,7 +1267,8 @@ class FileChecksumTestHelper {
} }
} }
EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize())); EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
*checksum = tmp_checksum; file_checksum_generator->Finalize();
*checksum = file_checksum_generator->GetChecksum();
return Status::OK(); return Status::OK();
} }
@ -3279,9 +3275,10 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) {
} }
TEST_P(BlockBasedTableTest, Crc32FileChecksum) { TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
new FileChecksumGenCrc32cFactory();
Options options; Options options;
options.sst_file_checksum_func = options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c());
ImmutableCFOptions ioptions(options); ImmutableCFOptions ioptions(options);
MutableCFOptions moptions(options); MutableCFOptions moptions(options);
BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
@ -3300,9 +3297,14 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
} }
std::string column_family_name; std::string column_family_name;
FileChecksumGenContext gen_context;
gen_context.file_name = "db/tmp";
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
FileChecksumTestHelper f(true); FileChecksumTestHelper f(true);
f.CreateWriteableFile(); f.CreateWriteableFile();
f.SetFileChecksumFunc(options.sst_file_checksum_func.get()); f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
std::unique_ptr<TableBuilder> builder; std::unique_ptr<TableBuilder> builder;
builder.reset(ioptions.table_factory->NewTableBuilder( builder.reset(ioptions.table_factory->NewTableBuilder(
TableBuilderOptions(ioptions, moptions, *comparator, TableBuilderOptions(ioptions, moptions, *comparator,
@ -3316,9 +3318,12 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
f.AddKVtoKVMap(1000); f.AddKVtoKVMap(1000);
f.WriteKVAndFlushTable(); f.WriteKVAndFlushTable();
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
std::string checksum; std::string checksum;
ASSERT_OK( ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum));
ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
} }
@ -3420,9 +3425,10 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
plain_table_options.hash_table_ratio = 0; plain_table_options.hash_table_ratio = 0;
PlainTableFactory factory(plain_table_options); PlainTableFactory factory(plain_table_options);
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
new FileChecksumGenCrc32cFactory();
Options options; Options options;
options.sst_file_checksum_func = options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c());
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
const MutableCFOptions moptions(options); const MutableCFOptions moptions(options);
InternalKeyComparator ikc(options.comparator); InternalKeyComparator ikc(options.comparator);
@ -3430,9 +3436,15 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
std::string column_family_name; std::string column_family_name;
int unknown_level = -1; int unknown_level = -1;
FileChecksumGenContext gen_context;
gen_context.file_name = "db/tmp";
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
FileChecksumTestHelper f(true); FileChecksumTestHelper f(true);
f.CreateWriteableFile(); f.CreateWriteableFile();
f.SetFileChecksumFunc(options.sst_file_checksum_func.get()); f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder( std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
TableBuilderOptions( TableBuilderOptions(
@ -3445,9 +3457,12 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
f.AddKVtoKVMap(1000); f.AddKVtoKVMap(1000);
f.WriteKVAndFlushTable(); f.WriteKVAndFlushTable();
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
std::string checksum; std::string checksum;
ASSERT_OK( ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum));
EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
} }

@ -48,7 +48,8 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class FileChecksumFuncCrc32c; class FileChecksumGenCrc32c;
class FileChecksumGenCrc32cFactory;
const std::string LDBCommand::ARG_ENV_URI = "env_uri"; const std::string LDBCommand::ARG_ENV_URI = "env_uri";
const std::string LDBCommand::ARG_DB = "db"; const std::string LDBCommand::ARG_DB = "db";

@ -126,33 +126,31 @@ class FileChecksumTestHelper {
return s; return s;
} }
std::unique_ptr<char[]> scratch(new char[2048]); std::unique_ptr<char[]> scratch(new char[2048]);
bool first_read = true;
Slice result; Slice result;
FileChecksumFunc* file_checksum_func = FileChecksumGenFactory* file_checksum_gen_factory =
options_.sst_file_checksum_func.get(); options_.file_checksum_gen_factory.get();
if (file_checksum_func == nullptr) { if (file_checksum_gen_factory == nullptr) {
cur_checksum = kUnknownFileChecksum; cur_checksum = kUnknownFileChecksum;
checksum_func_name = kUnknownFileChecksumFuncName; checksum_func_name = kUnknownFileChecksumFuncName;
} else { } else {
checksum_func_name = file_checksum_func->Name(); FileChecksumGenContext gen_context;
gen_context.file_name = file_meta.name;
std::unique_ptr<FileChecksumGenerator> file_checksum_gen =
file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context);
checksum_func_name = file_checksum_gen->Name();
s = file_reader->Read(2048, &result, scratch.get()); s = file_reader->Read(2048, &result, scratch.get());
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
while (result.size() != 0) { while (result.size() != 0) {
if (first_read) { file_checksum_gen->Update(scratch.get(), result.size());
first_read = false;
cur_checksum =
file_checksum_func->Value(scratch.get(), result.size());
} else {
cur_checksum = file_checksum_func->Extend(cur_checksum, scratch.get(),
result.size());
}
s = file_reader->Read(2048, &result, scratch.get()); s = file_reader->Read(2048, &result, scratch.get());
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
} }
file_checksum_gen->Finalize();
cur_checksum = file_checksum_gen->GetChecksum();
} }
std::string stored_checksum = file_meta.file_checksum; std::string stored_checksum = file_meta.file_checksum;
@ -346,8 +344,9 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
Options opts; Options opts;
opts.env = env.get(); opts.env = env.get();
opts.create_if_missing = true; opts.create_if_missing = true;
opts.sst_file_checksum_func = FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c()); new FileChecksumGenCrc32cFactory();
opts.file_checksum_gen_factory.reset(file_checksum_gen_factory);
DB* db = nullptr; DB* db = nullptr;
std::string dbname = test::TmpDir(); std::string dbname = test::TmpDir();

@ -77,9 +77,4 @@ FileChecksumList* NewFileChecksumList() {
return checksum_list; return checksum_list;
} }
FileChecksumFunc* CreateFileChecksumFuncCrc32c() {
FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c();
return file_checksum_crc32c;
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -16,24 +16,19 @@ namespace ROCKSDB_NAMESPACE {
// This is the class to generate the file checksum based on Crc32. It // This is the class to generate the file checksum based on Crc32. It
// will be used as the default checksum method for SST file checksum // will be used as the default checksum method for SST file checksum
class FileChecksumFuncCrc32c : public FileChecksumFunc { class FileChecksumGenCrc32c : public FileChecksumGenerator {
public: public:
std::string Extend(const std::string& init_checksum, const char* data, FileChecksumGenCrc32c(const FileChecksumGenContext& /*context*/) {
size_t n) override { checksum_ = 0;
assert(data != nullptr);
uint32_t checksum_value = StringToUint32(init_checksum);
return Uint32ToString(crc32c::Extend(checksum_value, data, n));
} }
std::string Value(const char* data, size_t n) override { void Update(const char* data, size_t n) override {
assert(data != nullptr); checksum_ = crc32c::Extend(checksum_, data, n);
return Uint32ToString(crc32c::Value(data, n));
} }
std::string ProcessChecksum(const std::string& checksum) override { void Finalize() override { checksum_str_ = Uint32ToString(checksum_); }
uint32_t checksum_value = StringToUint32(checksum);
return Uint32ToString(crc32c::Mask(checksum_value)); std::string GetChecksum() const override { return checksum_str_; }
}
const char* Name() const override { return "FileChecksumCrc32c"; } const char* Name() const override { return "FileChecksumCrc32c"; }
@ -84,6 +79,21 @@ class FileChecksumFuncCrc32c : public FileChecksumFunc {
} }
return v; return v;
} }
private:
uint32_t checksum_;
std::string checksum_str_;
};
class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
return std::unique_ptr<FileChecksumGenerator>(
new FileChecksumGenCrc32c(context));
}
const char* Name() const override { return "FileChecksumGenCrc32cFactory"; }
}; };
// The default implementaion of FileChecksumList // The default implementaion of FileChecksumList

Loading…
Cancel
Save