Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo to blob files (#8179)

Summary:
Extend the DB::GetLiveFilesChecksumInfo API to blob files.
This API is also used by the file_checksum_dump ldb command to dump checksum
of SST files which now also dumps blob files checksum.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8179

Test Plan: Add new unit test

Reviewed By: zhichao-cao

Differential Revision: D27714965

Pulled By: akankshamahajan15

fbshipit-source-id: d8b7343ea845a64c83800336d88cced7152a8c92
main
Akanksha Mahajan 4 years ago committed by Facebook GitHub Bot
parent b1f62be10e
commit 296b47db25
  1. 1
      HISTORY.md
  2. 32
      db/version_set.cc
  3. 2
      include/rocksdb/db.h
  4. 158
      tools/ldb_cmd_test.cc

@ -21,6 +21,7 @@
* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. * Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead.
* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace * Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace
* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`. * Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`.
* Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo API for IntegratedBlobDB and get checksum of blob files along with SST files.
### New Features ### New Features
* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. * Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true.

@ -4937,7 +4937,7 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
} }
// Get the checksum information including the checksum and checksum function // Get the checksum information including the checksum and checksum function
// name of all SST files in VersionSet. Store the information in // name of all SST and blob files in VersionSet. Store the information in
// FileChecksumList which contains a map from file number to its checksum info. // FileChecksumList which contains a map from file number to its checksum info.
// If DB is not running, make sure call VersionSet::Recover() to load the file // If DB is not running, make sure call VersionSet::Recover() to load the file
// metadata from Manifest to VersionSet before calling this function. // metadata from Manifest to VersionSet before calling this function.
@ -4954,6 +4954,7 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
if (cfd->IsDropped() || !cfd->initialized()) { if (cfd->IsDropped() || !cfd->initialized()) {
continue; continue;
} }
/* SST files */
for (int level = 0; level < cfd->NumberLevels(); level++) { for (int level = 0; level < cfd->NumberLevels(); level++) {
for (const auto& file : for (const auto& file :
cfd->current()->storage_info()->LevelFiles(level)) { cfd->current()->storage_info()->LevelFiles(level)) {
@ -4961,17 +4962,36 @@ Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) {
file->file_checksum, file->file_checksum,
file->file_checksum_func_name); file->file_checksum_func_name);
if (!s.ok()) { if (!s.ok()) {
break; return s;
} }
} }
}
/* Blob files */
const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles();
for (const auto& pair : blob_files) {
const uint64_t blob_file_number = pair.first;
const auto& meta = pair.second;
assert(meta);
assert(blob_file_number == meta->GetBlobFileNumber());
std::string checksum_value = meta->GetChecksumValue();
std::string checksum_method = meta->GetChecksumMethod();
assert(checksum_value.empty() == checksum_method.empty());
if (meta->GetChecksumMethod().empty()) {
checksum_value = kUnknownFileChecksum;
checksum_method = kUnknownFileChecksumFuncName;
}
s = checksum_list->InsertOneFileChecksum(blob_file_number, checksum_value,
checksum_method);
if (!s.ok()) { if (!s.ok()) {
break; return s;
} }
} }
if (!s.ok()) {
break;
}
} }
return s; return s;
} }

@ -1368,7 +1368,7 @@ class DB {
virtual void GetLiveFilesMetaData( virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData>* /*metadata*/) {} std::vector<LiveFileMetaData>* /*metadata*/) {}
// Return a list of all table file checksum info. // Return a list of all table and blob files checksum info.
// Note: This function might be of limited use because it cannot be // Note: This function might be of limited use because it cannot be
// synchronized with GetLiveFiles. // synchronized with GetLiveFiles.
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;

@ -349,6 +349,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumNoChecksum) {
ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files));
} }
TEST_F(LdbCmdTest, BlobDBDumpFileChecksumNoChecksum) {
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
opts.enable_blob_files = true;
DB* db = nullptr;
std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test");
ASSERT_OK(DB::Open(opts, dbname, &db));
WriteOptions wopts;
FlushOptions fopts;
fopts.wait = true;
Random rnd(test::RandomSeed());
for (int i = 0; i < 200; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 100; i < 300; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 200; i < 400; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 300; i < 400; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
char arg1[] = "./ldb";
std::string arg2_str = "--db=" + dbname;
char arg3[] = "file_checksum_dump";
char* argv[] = {arg1, const_cast<char*>(arg2_str.c_str()), arg3};
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
// Verify each sst and blob file checksum value and checksum name
FileChecksumTestHelper fct_helper(opts, db, dbname);
ASSERT_OK(fct_helper.VerifyEachFileChecksum());
// Manually trigger compaction
std::ostringstream oss_b_buf;
oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0;
std::ostringstream oss_e_buf;
oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 399;
std::string b_buf = oss_b_buf.str();
std::string e_buf = oss_e_buf.str();
Slice begin(b_buf);
Slice end(e_buf);
CompactRangeOptions options;
ASSERT_OK(db->CompactRange(options, &begin, &end));
// Verify each sst file checksum after compaction
FileChecksumTestHelper fct_helper_ac(opts, db, dbname);
ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum());
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
delete db;
}
TEST_F(LdbCmdTest, DumpFileChecksumCRC32) { TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
Env* base_env = TryLoadCustomOrDefaultEnv(); Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env)); std::unique_ptr<Env> env(NewMemEnv(base_env));
@ -430,6 +509,85 @@ TEST_F(LdbCmdTest, DumpFileChecksumCRC32) {
ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files));
} }
TEST_F(LdbCmdTest, BlobDBDumpFileChecksumCRC32) {
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
opts.enable_blob_files = true;
DB* db = nullptr;
std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test");
ASSERT_OK(DB::Open(opts, dbname, &db));
WriteOptions wopts;
FlushOptions fopts;
fopts.wait = true;
Random rnd(test::RandomSeed());
for (int i = 0; i < 100; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 50; i < 150; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 100; i < 200; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
for (int i = 150; i < 250; i++) {
std::ostringstream oss;
oss << std::setfill('0') << std::setw(8) << std::fixed << i;
std::string v = rnd.RandomString(100);
ASSERT_OK(db->Put(wopts, oss.str(), v));
}
ASSERT_OK(db->Flush(fopts));
char arg1[] = "./ldb";
std::string arg2_str = "--db=" + dbname;
char arg3[] = "file_checksum_dump";
char* argv[] = {arg1, const_cast<char*>(arg2_str.c_str()), arg3};
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
// Verify each sst and blob file checksum value and checksum name
FileChecksumTestHelper fct_helper(opts, db, dbname);
ASSERT_OK(fct_helper.VerifyEachFileChecksum());
// Manually trigger compaction
std::ostringstream oss_b_buf;
oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0;
std::ostringstream oss_e_buf;
oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 249;
std::string b_buf = oss_b_buf.str();
std::string e_buf = oss_e_buf.str();
Slice begin(b_buf);
Slice end(e_buf);
CompactRangeOptions options;
ASSERT_OK(db->CompactRange(options, &begin, &end));
// Verify each sst file checksum after compaction
FileChecksumTestHelper fct_helper_ac(opts, db, dbname);
ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum());
ASSERT_EQ(0,
LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
delete db;
}
TEST_F(LdbCmdTest, OptionParsing) { TEST_F(LdbCmdTest, OptionParsing) {
// test parsing flags // test parsing flags
Options opts; Options opts;

Loading…
Cancel
Save