diff --git a/HISTORY.md b/HISTORY.md index 6ab13e5a3..5561305c5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -21,6 +21,7 @@ * Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. * Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace * Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`. +* Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo API for IntegratedBlobDB and get checksum of blob files along with SST files. ### New Features * Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. diff --git a/db/version_set.cc b/db/version_set.cc index 7aad3557f..e2eb161be 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4937,7 +4937,7 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, } // Get the checksum information including the checksum and checksum function -// name of all SST files in VersionSet. Store the information in +// name of all SST and blob files in VersionSet. Store the information in // FileChecksumList which contains a map from file number to its checksum info. // If DB is not running, make sure call VersionSet::Recover() to load the file // metadata from Manifest to VersionSet before calling this function. @@ -4954,6 +4954,7 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { if (cfd->IsDropped() || !cfd->initialized()) { continue; } + /* SST files */ for (int level = 0; level < cfd->NumberLevels(); level++) { for (const auto& file : cfd->current()->storage_info()->LevelFiles(level)) { @@ -4961,17 +4962,36 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { file->file_checksum, file->file_checksum_func_name); if (!s.ok()) { - break; + return s; } } + } + + /* Blob files */ + const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles(); + for (const auto& pair : blob_files) { + const uint64_t blob_file_number = pair.first; + const auto& meta = pair.second; + + assert(meta); + assert(blob_file_number == meta->GetBlobFileNumber()); + + std::string checksum_value = meta->GetChecksumValue(); + std::string checksum_method = meta->GetChecksumMethod(); + assert(checksum_value.empty() == checksum_method.empty()); + if (meta->GetChecksumMethod().empty()) { + checksum_value = kUnknownFileChecksum; + checksum_method = kUnknownFileChecksumFuncName; + } + + s = checksum_list->InsertOneFileChecksum(blob_file_number, checksum_value, + checksum_method); if (!s.ok()) { - break; + return s; } } - if (!s.ok()) { - break; - } } + return s; } diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index b97583a9d..13a974d8c 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1368,7 +1368,7 @@ class DB { virtual void GetLiveFilesMetaData( std::vector* /*metadata*/) {} - // Return a list of all table file checksum info. + // Return a list of all table and blob files checksum info. // Note: This function might be of limited use because it cannot be // synchronized with GetLiveFiles. virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 159d752d8..5d9179356 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -349,6 +349,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumNoChecksum) { ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); } +TEST_F(LdbCmdTest, BlobDBDumpFileChecksumNoChecksum) { + Env* base_env = TryLoadCustomOrDefaultEnv(); + std::unique_ptr env(NewMemEnv(base_env)); + Options opts; + opts.env = env.get(); + opts.create_if_missing = true; + opts.enable_blob_files = true; + + DB* db = nullptr; + std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); + ASSERT_OK(DB::Open(opts, dbname, &db)); + + WriteOptions wopts; + FlushOptions fopts; + fopts.wait = true; + Random rnd(test::RandomSeed()); + for (int i = 0; i < 200; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 100; i < 300; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 200; i < 400; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 300; i < 400; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + + char arg1[] = "./ldb"; + std::string arg2_str = "--db=" + dbname; + char arg3[] = "file_checksum_dump"; + char* argv[] = {arg1, const_cast(arg2_str.c_str()), arg3}; + + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); + + // Verify each sst and blob file checksum value and checksum name + FileChecksumTestHelper fct_helper(opts, db, dbname); + ASSERT_OK(fct_helper.VerifyEachFileChecksum()); + + // Manually trigger compaction + std::ostringstream oss_b_buf; + oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0; + std::ostringstream oss_e_buf; + oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 399; + std::string b_buf = oss_b_buf.str(); + std::string e_buf = oss_e_buf.str(); + Slice begin(b_buf); + Slice end(e_buf); + + CompactRangeOptions options; + ASSERT_OK(db->CompactRange(options, &begin, &end)); + // Verify each sst file checksum after compaction + FileChecksumTestHelper fct_helper_ac(opts, db, dbname); + ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); + + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); + + delete db; +} + TEST_F(LdbCmdTest, DumpFileChecksumCRC32) { Env* base_env = TryLoadCustomOrDefaultEnv(); std::unique_ptr env(NewMemEnv(base_env)); @@ -430,6 +509,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) { ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); } +TEST_F(LdbCmdTest, BlobDBDumpFileChecksumCRC32) { + Env* base_env = TryLoadCustomOrDefaultEnv(); + std::unique_ptr env(NewMemEnv(base_env)); + Options opts; + opts.env = env.get(); + opts.create_if_missing = true; + opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); + opts.enable_blob_files = true; + + DB* db = nullptr; + std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); + ASSERT_OK(DB::Open(opts, dbname, &db)); + + WriteOptions wopts; + FlushOptions fopts; + fopts.wait = true; + Random rnd(test::RandomSeed()); + for (int i = 0; i < 100; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 50; i < 150; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 100; i < 200; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + for (int i = 150; i < 250; i++) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(8) << std::fixed << i; + std::string v = rnd.RandomString(100); + ASSERT_OK(db->Put(wopts, oss.str(), v)); + } + ASSERT_OK(db->Flush(fopts)); + + char arg1[] = "./ldb"; + std::string arg2_str = "--db=" + dbname; + char arg3[] = "file_checksum_dump"; + char* argv[] = {arg1, const_cast(arg2_str.c_str()), arg3}; + + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); + + // Verify each sst and blob file checksum value and checksum name + FileChecksumTestHelper fct_helper(opts, db, dbname); + ASSERT_OK(fct_helper.VerifyEachFileChecksum()); + + // Manually trigger compaction + std::ostringstream oss_b_buf; + oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0; + std::ostringstream oss_e_buf; + oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 249; + std::string b_buf = oss_b_buf.str(); + std::string e_buf = oss_e_buf.str(); + Slice begin(b_buf); + Slice end(e_buf); + + CompactRangeOptions options; + ASSERT_OK(db->CompactRange(options, &begin, &end)); + // Verify each sst file checksum after compaction + FileChecksumTestHelper fct_helper_ac(opts, db, dbname); + ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); + + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); + delete db; +} + TEST_F(LdbCmdTest, OptionParsing) { // test parsing flags Options opts;