Use FileChecksumGenFactory for SST file checksum (#6600)

Summary:
In the current implementation, sst file checksum is calculated by a shared checksum function object, which may make some checksum function hard to be applied here such as SHA1. In this implementation, each sst file will have its own checksum generator obejct, created by FileChecksumGenFactory. User needs to implement its own FilechecksumGenerator and Factory to plugin the in checksum calculation method.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6600

Test Plan: tested with make asan_check

Reviewed By: riversand963

Differential Revision: D20717670

Pulled By: zhichao-cao

fbshipit-source-id: 2a74c1c280ac11a07a1980185b43b671acaa71c6
main
Zhichao Cao 5 years ago committed by Facebook GitHub Bot
parent ee50b8d499
commit e8d332d97e
  1. 2
      HISTORY.md
  2. 11
      db/builder.cc
  3. 13
      db/compaction/compaction_job.cc
  4. 27
      file/writable_file_writer.cc
  5. 24
      file/writable_file_writer.h
  6. 44
      include/rocksdb/file_checksum.h
  7. 10
      include/rocksdb/options.h
  8. 2
      options/cf_options.cc
  9. 2
      options/cf_options.h
  10. 8
      options/db_options.cc
  11. 2
      options/db_options.h
  12. 3
      options/options_helper.cc
  13. 4
      options/options_settable_test.cc
  14. 11
      table/block_based/block_based_table_builder.cc
  15. 5
      table/block_based/block_based_table_builder.h
  16. 12
      table/cuckoo/cuckoo_table_builder.cc
  17. 5
      table/cuckoo/cuckoo_table_builder.h
  18. 3
      table/mock_table.h
  19. 12
      table/plain/plain_table_builder.cc
  20. 5
      table/plain/plain_table_builder.h
  21. 2
      table/table_builder.h
  22. 65
      table/table_test.cc
  23. 3
      tools/ldb_cmd.cc
  24. 29
      tools/ldb_cmd_test.cc
  25. 5
      util/file_checksum_helper.cc
  26. 36
      util/file_checksum_helper.h

@ -5,6 +5,8 @@
* Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks.
* `Cache::Insert` now expects clients to pass in function objects implementing the `Cache::Deleter` interface as deleters instead of plain function pointers.
* A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`.
* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string.
* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string.
### Bug Fixes
* Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed.

@ -134,7 +134,7 @@ Status BuildTable(
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, env, ioptions.statistics,
ioptions.listeners, ioptions.sst_file_checksum_func));
ioptions.listeners, ioptions.file_checksum_gen_factory));
builder = NewTableBuilder(
ioptions, mutable_cf_options, internal_comparator,
@ -203,9 +203,6 @@ Status BuildTable(
if (table_properties) {
*table_properties = tp;
}
// Add the checksum information to file metadata.
meta->file_checksum = builder->GetFileChecksum();
meta->file_checksum_func_name = builder->GetFileChecksumFuncName();
}
delete builder;
@ -217,6 +214,12 @@ Status BuildTable(
if (io_status->ok() && !empty) {
*io_status = file_writer->Close();
}
if (io_status->ok() && !empty) {
// Add the checksum information to file metadata.
meta->file_checksum = file_writer->GetFileChecksum();
meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName();
}
if (!io_status->ok()) {
s = *io_status;
}

@ -1309,11 +1309,6 @@ Status CompactionJob::FinishCompactionOutputFile(
}
const uint64_t current_bytes = sub_compact->builder->FileSize();
if (s.ok()) {
// Add the checksum information to file metadata.
meta->file_checksum = sub_compact->builder->GetFileChecksum();
meta->file_checksum_func_name =
sub_compact->builder->GetFileChecksumFuncName();
meta->fd.file_size = current_bytes;
}
sub_compact->current_output()->finished = true;
@ -1328,6 +1323,12 @@ Status CompactionJob::FinishCompactionOutputFile(
if (io_s.ok()) {
io_s = sub_compact->outfile->Close();
}
if (io_s.ok()) {
// Add the checksum information to file metadata.
meta->file_checksum = sub_compact->outfile->GetFileChecksum();
meta->file_checksum_func_name =
sub_compact->outfile->GetFileChecksumFuncName();
}
if (!io_s.ok()) {
io_status_ = io_s;
s = io_s;
@ -1532,7 +1533,7 @@ Status CompactionJob::OpenCompactionOutputFile(
sub_compact->outfile.reset(
new WritableFileWriter(std::move(writable_file), fname, file_options_,
env_, db_options_.statistics.get(), listeners,
db_options_.sst_file_checksum_func.get()));
db_options_.file_checksum_gen_factory.get()));
// If the Column family flag is to only optimize filters for hits,
// we can skip creating filters if this is the bottommost_level where

@ -155,6 +155,11 @@ IOStatus WritableFileWriter::Close() {
writable_file_.reset();
TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds);
if (s.ok() && checksum_generator_ != nullptr && !checksum_finalized_) {
checksum_generator_->Finalize();
checksum_finalized_ = true;
}
return s;
}
@ -216,9 +221,17 @@ IOStatus WritableFileWriter::Flush() {
return s;
}
std::string WritableFileWriter::GetFileChecksum() {
if (checksum_generator_ != nullptr) {
return checksum_generator_->GetChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* WritableFileWriter::GetFileChecksumFuncName() const {
if (checksum_func_ != nullptr) {
return checksum_func_->Name();
if (checksum_generator_ != nullptr) {
return checksum_generator_->Name();
} else {
return kUnknownFileChecksumFuncName.c_str();
}
@ -332,14 +345,8 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
}
void WritableFileWriter::CalculateFileChecksum(const Slice& data) {
if (checksum_func_ != nullptr) {
if (is_first_checksum_) {
file_checksum_ = checksum_func_->Value(data.data(), data.size());
is_first_checksum_ = false;
} else {
file_checksum_ =
checksum_func_->Extend(file_checksum_, data.data(), data.size());
}
if (checksum_generator_ != nullptr) {
checksum_generator_->Update(data.data(), data.size());
}
}

@ -72,9 +72,8 @@ class WritableFileWriter {
RateLimiter* rate_limiter_;
Statistics* stats_;
std::vector<std::shared_ptr<EventListener>> listeners_;
FileChecksumFunc* checksum_func_;
std::string file_checksum_ = kUnknownFileChecksum;
bool is_first_checksum_ = true;
std::unique_ptr<FileChecksumGenerator> checksum_generator_;
bool checksum_finalized_;
public:
WritableFileWriter(
@ -82,7 +81,7 @@ class WritableFileWriter {
const FileOptions& options, Env* env = nullptr,
Statistics* stats = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
FileChecksumFunc* checksum_func = nullptr)
FileChecksumGenFactory* file_checksum_gen_factory = nullptr)
: writable_file_(std::move(file)),
file_name_(_file_name),
env_(env),
@ -98,7 +97,8 @@ class WritableFileWriter {
rate_limiter_(options.rate_limiter),
stats_(stats),
listeners_(),
checksum_func_(checksum_func) {
checksum_generator_(nullptr),
checksum_finalized_(false) {
TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0",
reinterpret_cast<void*>(max_buffer_size_));
buf_.Alignment(writable_file_->GetRequiredBufferAlignment());
@ -113,6 +113,13 @@ class WritableFileWriter {
#else // !ROCKSDB_LITE
(void)listeners;
#endif
if (file_checksum_gen_factory != nullptr) {
FileChecksumGenContext checksum_gen_context;
checksum_gen_context.file_name = _file_name;
checksum_generator_ =
file_checksum_gen_factory->CreateFileChecksumGenerator(
checksum_gen_context);
}
}
WritableFileWriter(const WritableFileWriter&) = delete;
@ -150,11 +157,12 @@ class WritableFileWriter {
bool TEST_BufferIsEmpty() { return buf_.CurrentSize() == 0; }
void TEST_SetFileChecksumFunc(FileChecksumFunc* checksum_func) {
checksum_func_ = checksum_func;
void TEST_SetFileChecksumGenerator(
FileChecksumGenerator* checksum_generator) {
checksum_generator_.reset(checksum_generator);
}
const std::string& GetFileChecksum() const { return file_checksum_; }
std::string GetFileChecksum();
const char* GetFileChecksumFuncName() const;

@ -18,27 +18,44 @@
namespace ROCKSDB_NAMESPACE {
// FileChecksumFunc is the function class to generates the checksum value
struct FileChecksumGenContext {
std::string file_name;
};
// FileChecksumGenerator is the class to generates the checksum value
// for each file when the file is written to the file system.
class FileChecksumFunc {
class FileChecksumGenerator {
public:
virtual ~FileChecksumFunc() {}
// Return the checksum of concat (A, data[0,n-1]) where init_checksum is the
// returned value of some string A. It is used to maintain the checksum of a
// stream of data
virtual std::string Extend(const std::string& init_checksum, const char* data,
size_t n) = 0;
virtual ~FileChecksumGenerator() {}
// Update the current result after process the data. For different checksum
// functions, the temporal results may be stored and used in Update to
// include the new data.
virtual void Update(const char* data, size_t n) = 0;
// Return the checksum value of data[0,n-1]
virtual std::string Value(const char* data, size_t n) = 0;
// Generate the final results if no further new data will be updated.
virtual void Finalize() = 0;
// Return a processed value of the checksum for store in somewhere
virtual std::string ProcessChecksum(const std::string& checksum) = 0;
// Get the checksum
virtual std::string GetChecksum() const = 0;
// Returns a name that identifies the current file checksum function.
virtual const char* Name() const = 0;
};
// Create the FileChecksumGenerator object for each SST file.
class FileChecksumGenFactory {
public:
virtual ~FileChecksumGenFactory() {}
// Create a new FileChecksumGenerator.
virtual std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) = 0;
// Return the name of this FileChecksumGenFactory.
virtual const char* Name() const = 0;
};
// FileChecksumList stores the checksum information of a list of files (e.g.,
// SST files). The FileChecksumLIst can be used to store the checksum
// information of all SST file getting from the MANIFEST, which are
@ -80,7 +97,4 @@ class FileChecksumList {
// Create a new file checksum list.
extern FileChecksumList* NewFileChecksumList();
// Create a Crc32c based file checksum function
extern FileChecksumFunc* CreateFileChecksumFuncCrc32c();
} // namespace ROCKSDB_NAMESPACE

@ -1126,12 +1126,14 @@ struct DBOptions {
// Default: 0
size_t log_readahead_size = 0;
// If user does NOT provide SST file checksum function, the SST file checksum
// will NOT be used. The single checksum instance are shared by options and
// file writers. Make sure the algorithm is thread safe.
// If user does NOT provide the checksum generator factory, the file checksum
// will NOT be used. A new file checksum generator object will be created
// when a SST file is created. Therefore, each created FileChecksumGenerator
// will only be used from a single thread and so does not need to be
// thread-safe.
//
// Default: nullptr
std::shared_ptr<FileChecksumFunc> sst_file_checksum_func = nullptr;
std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
// By default, RocksDB recovery fails if any table file referenced in
// MANIFEST are missing after scanning the MANIFEST.

@ -78,7 +78,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
cf_options.memtable_insert_with_hint_prefix_extractor.get()),
cf_paths(cf_options.cf_paths),
compaction_thread_limiter(cf_options.compaction_thread_limiter),
sst_file_checksum_func(db_options.sst_file_checksum_func.get()) {}
file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()) {}
// Multiple two operands. If they overflow, return op1.
uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) {

@ -126,7 +126,7 @@ struct ImmutableCFOptions {
std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
FileChecksumFunc* sst_file_checksum_func;
FileChecksumGenFactory* file_checksum_gen_factory;
};
struct MutableCFOptions {

@ -95,7 +95,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
persist_stats_to_disk(options.persist_stats_to_disk),
write_dbid_to_manifest(options.write_dbid_to_manifest),
log_readahead_size(options.log_readahead_size),
sst_file_checksum_func(options.sst_file_checksum_func),
file_checksum_gen_factory(options.file_checksum_gen_factory),
best_efforts_recovery(options.best_efforts_recovery) {
}
@ -247,9 +247,9 @@ void ImmutableDBOptions::Dump(Logger* log) const {
ROCKS_LOG_HEADER(
log, " Options.log_readahead_size: %" ROCKSDB_PRIszt,
log_readahead_size);
ROCKS_LOG_HEADER(log, " Options.sst_file_checksum_func: %s",
sst_file_checksum_func
? sst_file_checksum_func->Name()
ROCKS_LOG_HEADER(log, " Options.file_checksum_gen_factory: %s",
file_checksum_gen_factory
? file_checksum_gen_factory->Name()
: kUnknownFileChecksumFuncName.c_str());
ROCKS_LOG_HEADER(log, " Options.best_efforts_recovery: %d",
static_cast<int>(best_efforts_recovery));

@ -87,7 +87,7 @@ struct ImmutableDBOptions {
bool persist_stats_to_disk;
bool write_dbid_to_manifest;
size_t log_readahead_size;
std::shared_ptr<FileChecksumFunc> sst_file_checksum_func;
std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
bool best_efforts_recovery;
};

@ -143,7 +143,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.avoid_unnecessary_blocking_io =
immutable_db_options.avoid_unnecessary_blocking_io;
options.log_readahead_size = immutable_db_options.log_readahead_size;
options.sst_file_checksum_func = immutable_db_options.sst_file_checksum_func;
options.file_checksum_gen_factory =
immutable_db_options.file_checksum_gen_factory;
options.best_efforts_recovery = immutable_db_options.best_efforts_recovery;
return options;
}

@ -197,8 +197,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
sizeof(std::vector<std::shared_ptr<EventListener>>)},
{offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr<Cache>)},
{offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)},
{offsetof(struct DBOptions, sst_file_checksum_func),
sizeof(std::shared_ptr<FileChecksumFunc>)},
{offsetof(struct DBOptions, file_checksum_gen_factory),
sizeof(std::shared_ptr<FileChecksumGenFactory>)},
};
char* options_ptr = new char[sizeof(DBOptions)];

@ -1167,9 +1167,6 @@ Status BlockBasedTableBuilder::Finish() {
if (ok()) {
WriteFooter(metaindex_block_handle, index_block_handle);
}
if (r->file != nullptr) {
file_checksum_ = r->file->GetFileChecksum();
}
r->state = Rep::State::kClosed;
return r->status;
}
@ -1205,6 +1202,14 @@ TableProperties BlockBasedTableBuilder::GetTableProperties() const {
return ret;
}
std::string BlockBasedTableBuilder::GetFileChecksum() const {
if (rep_->file != nullptr) {
return rep_->file->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const {
if (rep_->file != nullptr) {
return rep_->file->GetFileChecksumFuncName();

@ -96,7 +96,7 @@ class BlockBasedTableBuilder : public TableBuilder {
TableProperties GetTableProperties() const override;
// Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; }
std::string GetFileChecksum() const override;
// Get file checksum function name
const char* GetFileChecksumFuncName() const override;
@ -146,9 +146,6 @@ class BlockBasedTableBuilder : public TableBuilder {
// Some compression libraries fail when the raw size is bigger than int. If
// uncompressed size is bigger than kCompressionSizeLimit, don't compress it
const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max();
// Store file checksum. If checksum is disabled, its value is "0".
std::string file_checksum_ = kUnknownFileChecksum;
};
Slice CompressBlock(const Slice& raw, const CompressionInfo& info,

@ -390,10 +390,6 @@ Status CuckooTableBuilder::Finish() {
std::string footer_encoding;
footer.EncodeTo(&footer_encoding);
io_status_ = file_->Append(footer_encoding);
if (file_ != nullptr) {
file_checksum_ = file_->GetFileChecksum();
}
status_ = io_status_;
return status_;
}
@ -520,6 +516,14 @@ bool CuckooTableBuilder::MakeSpaceForKey(
return null_found;
}
std::string CuckooTableBuilder::GetFileChecksum() const {
if (file_ != nullptr) {
return file_->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* CuckooTableBuilder::GetFileChecksumFuncName() const {
if (file_ != nullptr) {
return file_->GetFileChecksumFuncName();

@ -71,7 +71,7 @@ class CuckooTableBuilder: public TableBuilder {
TableProperties GetTableProperties() const override { return properties_; }
// Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; }
std::string GetFileChecksum() const override;
// Get file checksum function name
const char* GetFileChecksumFuncName() const override;
@ -130,9 +130,6 @@ class CuckooTableBuilder: public TableBuilder {
std::string smallest_user_key_ = "";
bool closed_; // Either Finish() or Abandon() has been called.
// Store file checksum. If checksum is disabled, its value is "0"
std::string file_checksum_ = kUnknownFileChecksum;
};
} // namespace ROCKSDB_NAMESPACE

@ -159,7 +159,7 @@ class MockTableBuilder : public TableBuilder {
}
// Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; }
std::string GetFileChecksum() const override { return kUnknownFileChecksum; }
// Get file checksum function name
const char* GetFileChecksumFuncName() const override {
return kUnknownFileChecksumFuncName.c_str();
@ -169,7 +169,6 @@ class MockTableBuilder : public TableBuilder {
uint32_t id_;
MockTableFileSystem* file_system_;
stl_wrappers::KVMap table_;
std::string file_checksum_ = kUnknownFileChecksum;
};
class MockTableFactory : public TableFactory {

@ -285,10 +285,6 @@ Status PlainTableBuilder::Finish() {
if (io_status_.ok()) {
offset_ += footer_encoding.size();
}
if (file_ != nullptr) {
file_checksum_ = file_->GetFileChecksum();
}
status_ = io_status_;
return status_;
}
@ -305,6 +301,14 @@ uint64_t PlainTableBuilder::FileSize() const {
return offset_;
}
std::string PlainTableBuilder::GetFileChecksum() const {
if (file_ != nullptr) {
return file_->GetFileChecksum();
} else {
return kUnknownFileChecksum;
}
}
const char* PlainTableBuilder::GetFileChecksumFuncName() const {
if (file_ != nullptr) {
return file_->GetFileChecksumFuncName();

@ -88,7 +88,7 @@ class PlainTableBuilder: public TableBuilder {
bool SaveIndexInFile() const { return store_index_in_file_; }
// Get file checksum
const std::string& GetFileChecksum() const override { return file_checksum_; }
std::string GetFileChecksum() const override;
// Get file checksum function name
const char* GetFileChecksumFuncName() const override;
@ -119,9 +119,6 @@ class PlainTableBuilder: public TableBuilder {
const SliceTransform* prefix_extractor_;
// Store file checksum. If checksum is disabled, its value is "0".
std::string file_checksum_ = kUnknownFileChecksum;
Slice GetPrefix(const Slice& target) const {
assert(target.size() >= 8); // target is internal key
return GetPrefixFromUserKey(GetUserKey(target));

@ -164,7 +164,7 @@ class TableBuilder {
virtual TableProperties GetTableProperties() const = 0;
// Return file checksum
virtual const std::string& GetFileChecksum() const = 0;
virtual std::string GetFileChecksum() const = 0;
// Return file checksum function name
virtual const char* GetFileChecksumFuncName() const = 0;

@ -1187,9 +1187,9 @@ class FileChecksumTestHelper {
file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */));
}
void SetFileChecksumFunc(FileChecksumFunc* checksum_func) {
void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
if (file_writer_ != nullptr) {
file_writer_->TEST_SetFileChecksumFunc(checksum_func);
file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
}
}
@ -1230,15 +1230,18 @@ class FileChecksumTestHelper {
return s;
}
std::string GetFileChecksum() { return table_builder_->GetFileChecksum(); }
std::string GetFileChecksum() {
file_writer_->Close();
return table_builder_->GetFileChecksum();
}
const char* GetFileChecksumFuncName() {
return table_builder_->GetFileChecksumFuncName();
}
Status CalculateFileChecksum(FileChecksumFunc* file_checksum_func,
Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
std::string* checksum) {
assert(file_checksum_func != nullptr);
assert(file_checksum_generator != nullptr);
cur_uniq_id_ = checksum_uniq_id_++;
test::StringSink* ss_rw =
ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
@ -1248,8 +1251,6 @@ class FileChecksumTestHelper {
std::unique_ptr<char[]> scratch(new char[2048]);
Slice result;
uint64_t offset = 0;
std::string tmp_checksum;
bool first_read = true;
Status s;
s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false);
@ -1257,13 +1258,7 @@ class FileChecksumTestHelper {
return s;
}
while (result.size() != 0) {
if (first_read) {
first_read = false;
tmp_checksum = file_checksum_func->Value(scratch.get(), result.size());
} else {
tmp_checksum = file_checksum_func->Extend(tmp_checksum, scratch.get(),
result.size());
}
file_checksum_generator->Update(scratch.get(), result.size());
offset += static_cast<uint64_t>(result.size());
s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
false);
@ -1272,7 +1267,8 @@ class FileChecksumTestHelper {
}
}
EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
*checksum = tmp_checksum;
file_checksum_generator->Finalize();
*checksum = file_checksum_generator->GetChecksum();
return Status::OK();
}
@ -3279,9 +3275,10 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) {
}
TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
new FileChecksumGenCrc32cFactory();
Options options;
options.sst_file_checksum_func =
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c());
options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
ImmutableCFOptions ioptions(options);
MutableCFOptions moptions(options);
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
@ -3300,9 +3297,14 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
}
std::string column_family_name;
FileChecksumGenContext gen_context;
gen_context.file_name = "db/tmp";
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
FileChecksumTestHelper f(true);
f.CreateWriteableFile();
f.SetFileChecksumFunc(options.sst_file_checksum_func.get());
f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
std::unique_ptr<TableBuilder> builder;
builder.reset(ioptions.table_factory->NewTableBuilder(
TableBuilderOptions(ioptions, moptions, *comparator,
@ -3316,9 +3318,12 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
f.AddKVtoKVMap(1000);
f.WriteKVAndFlushTable();
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
std::string checksum;
ASSERT_OK(
f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum));
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
}
@ -3420,9 +3425,10 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
plain_table_options.hash_table_ratio = 0;
PlainTableFactory factory(plain_table_options);
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
new FileChecksumGenCrc32cFactory();
Options options;
options.sst_file_checksum_func =
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c());
options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
const ImmutableCFOptions ioptions(options);
const MutableCFOptions moptions(options);
InternalKeyComparator ikc(options.comparator);
@ -3430,9 +3436,15 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
int_tbl_prop_collector_factories;
std::string column_family_name;
int unknown_level = -1;
FileChecksumGenContext gen_context;
gen_context.file_name = "db/tmp";
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
FileChecksumTestHelper f(true);
f.CreateWriteableFile();
f.SetFileChecksumFunc(options.sst_file_checksum_func.get());
f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
TableBuilderOptions(
@ -3445,9 +3457,12 @@ TEST_F(PlainTableTest, Crc32FileChecksum) {
f.AddKVtoKVMap(1000);
f.WriteKVAndFlushTable();
ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
options.file_checksum_gen_factory->CreateFileChecksumGenerator(
gen_context);
std::string checksum;
ASSERT_OK(
f.CalculateFileChecksum(options.sst_file_checksum_func.get(), &checksum));
ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
}

@ -48,7 +48,8 @@
namespace ROCKSDB_NAMESPACE {
class FileChecksumFuncCrc32c;
class FileChecksumGenCrc32c;
class FileChecksumGenCrc32cFactory;
const std::string LDBCommand::ARG_ENV_URI = "env_uri";
const std::string LDBCommand::ARG_DB = "db";

@ -126,33 +126,31 @@ class FileChecksumTestHelper {
return s;
}
std::unique_ptr<char[]> scratch(new char[2048]);
bool first_read = true;
Slice result;
FileChecksumFunc* file_checksum_func =
options_.sst_file_checksum_func.get();
if (file_checksum_func == nullptr) {
FileChecksumGenFactory* file_checksum_gen_factory =
options_.file_checksum_gen_factory.get();
if (file_checksum_gen_factory == nullptr) {
cur_checksum = kUnknownFileChecksum;
checksum_func_name = kUnknownFileChecksumFuncName;
} else {
checksum_func_name = file_checksum_func->Name();
FileChecksumGenContext gen_context;
gen_context.file_name = file_meta.name;
std::unique_ptr<FileChecksumGenerator> file_checksum_gen =
file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context);
checksum_func_name = file_checksum_gen->Name();
s = file_reader->Read(2048, &result, scratch.get());
if (!s.ok()) {
return s;
}
while (result.size() != 0) {
if (first_read) {
first_read = false;
cur_checksum =
file_checksum_func->Value(scratch.get(), result.size());
} else {
cur_checksum = file_checksum_func->Extend(cur_checksum, scratch.get(),
result.size());
}
file_checksum_gen->Update(scratch.get(), result.size());
s = file_reader->Read(2048, &result, scratch.get());
if (!s.ok()) {
return s;
}
}
file_checksum_gen->Finalize();
cur_checksum = file_checksum_gen->GetChecksum();
}
std::string stored_checksum = file_meta.file_checksum;
@ -346,8 +344,9 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
opts.sst_file_checksum_func =
std::shared_ptr<FileChecksumFunc>(CreateFileChecksumFuncCrc32c());
FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
new FileChecksumGenCrc32cFactory();
opts.file_checksum_gen_factory.reset(file_checksum_gen_factory);
DB* db = nullptr;
std::string dbname = test::TmpDir();

@ -77,9 +77,4 @@ FileChecksumList* NewFileChecksumList() {
return checksum_list;
}
FileChecksumFunc* CreateFileChecksumFuncCrc32c() {
FileChecksumFunc* file_checksum_crc32c = new FileChecksumFuncCrc32c();
return file_checksum_crc32c;
}
} // namespace ROCKSDB_NAMESPACE

@ -16,24 +16,19 @@ namespace ROCKSDB_NAMESPACE {
// This is the class to generate the file checksum based on Crc32. It
// will be used as the default checksum method for SST file checksum
class FileChecksumFuncCrc32c : public FileChecksumFunc {
class FileChecksumGenCrc32c : public FileChecksumGenerator {
public:
std::string Extend(const std::string& init_checksum, const char* data,
size_t n) override {
assert(data != nullptr);
uint32_t checksum_value = StringToUint32(init_checksum);
return Uint32ToString(crc32c::Extend(checksum_value, data, n));
FileChecksumGenCrc32c(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
std::string Value(const char* data, size_t n) override {
assert(data != nullptr);
return Uint32ToString(crc32c::Value(data, n));
void Update(const char* data, size_t n) override {
checksum_ = crc32c::Extend(checksum_, data, n);
}
std::string ProcessChecksum(const std::string& checksum) override {
uint32_t checksum_value = StringToUint32(checksum);
return Uint32ToString(crc32c::Mask(checksum_value));
}
void Finalize() override { checksum_str_ = Uint32ToString(checksum_); }
std::string GetChecksum() const override { return checksum_str_; }
const char* Name() const override { return "FileChecksumCrc32c"; }
@ -84,6 +79,21 @@ class FileChecksumFuncCrc32c : public FileChecksumFunc {
}
return v;
}
private:
uint32_t checksum_;
std::string checksum_str_;
};
class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
return std::unique_ptr<FileChecksumGenerator>(
new FileChecksumGenCrc32c(context));
}
const char* Name() const override { return "FileChecksumGenCrc32cFactory"; }
};
// The default implementaion of FileChecksumList

Loading…
Cancel
Save