diff --git a/HISTORY.md b/HISTORY.md index 2f9eb04fe..3ed1bb77b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,9 @@ ### Bug Fixes * Blob file checksums are now printed in hexadecimal format when using the `manifest_dump` `ldb` command. +### New Features +* ldb has a new feature, `list_live_files_metadata`, that shows the live SST files, as well as their LSM storage level and the column family they belong to. + ## 6.22.0 (2021-06-18) ### Behavior Changes * Added two additional tickers, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH and MEMTABLE_GARBAGE_BYTES_AT_FLUSH. These stats can be used to estimate the ratio of "garbage" (outdated) bytes in the memtable that are discarded at flush time. diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index ec4e0d0f6..471dcbb5a 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -247,6 +247,10 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) { return new DBFileDumperCommand(parsed_params.cmd_params, parsed_params.option_map, parsed_params.flags); + } else if (parsed_params.cmd == DBLiveFilesMetadataDumperCommand::Name()) { + return new DBLiveFilesMetadataDumperCommand(parsed_params.cmd_params, + parsed_params.option_map, + parsed_params.flags); } else if (parsed_params.cmd == InternalDumpCommand::Name()) { return new InternalDumpCommand(parsed_params.cmd_params, parsed_params.option_map, @@ -3396,6 +3400,118 @@ void DBFileDumperCommand::DoCommand() { } } +const std::string DBLiveFilesMetadataDumperCommand::ARG_SORT_BY_FILENAME = + "sort_by_filename"; + +DBLiveFilesMetadataDumperCommand::DBLiveFilesMetadataDumperCommand( + const std::vector& /*params*/, + const std::map& options, + const std::vector& flags) + : LDBCommand(options, flags, true, + BuildCmdLineOptions({ARG_SORT_BY_FILENAME})) { + sort_by_filename_ = IsFlagPresent(flags, ARG_SORT_BY_FILENAME); +} + +void DBLiveFilesMetadataDumperCommand::Help(std::string& ret) { + ret.append(" "); + ret.append(DBLiveFilesMetadataDumperCommand::Name()); + ret.append(" [--" + ARG_SORT_BY_FILENAME + "] "); + ret.append("\n"); +} + +void DBLiveFilesMetadataDumperCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + Status s; + + std::cout << "Live SST Files:" << std::endl; + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + if (sort_by_filename_) { + // Sort metadata vector by filename. + std::sort(metadata.begin(), metadata.end(), + [](const LiveFileMetaData& a, const LiveFileMetaData& b) -> bool { + std::string aName = a.db_path + a.name; + std::string bName = b.db_path + b.name; + return (aName.compare(bName) < 0); + }); + for (auto& fileMetadata : metadata) { + // The fileMetada.name alwasy starts with "/", + // however fileMetada.db_path is the string provided by + // the user as an input. Therefore we check if we can + // concantenate the two string sdirectly or if we need to + // drop a possible extra "/" at the end of fileMetadata.db_path. + std::string filename = fileMetadata.db_path + "/" + fileMetadata.name; + // Drops any repeating '/' character that could happen during + // concatenation of db path and file name. + filename = NormalizePath(filename); + std::string cf = fileMetadata.column_family_name; + int level = fileMetadata.level; + std::cout << filename << " : level " << level << ", column family '" << cf + << "'" << std::endl; + } + } else { + std::map>> + filesPerLevelPerCf; + // Collect live files metadata. + // Store filenames into a 2D map, that will automatically + // sort by column family (first key) and by level (second key). + for (auto& fileMetadata : metadata) { + std::string cf = fileMetadata.column_family_name; + int level = fileMetadata.level; + if (filesPerLevelPerCf.find(cf) == filesPerLevelPerCf.end()) { + filesPerLevelPerCf.emplace(cf, + std::map>()); + } + if (filesPerLevelPerCf[cf].find(level) == filesPerLevelPerCf[cf].end()) { + filesPerLevelPerCf[cf].emplace(level, std::vector()); + } + + // The fileMetada.name alwasy starts with "/", + // however fileMetada.db_path is the string provided by + // the user as an input. Therefore we check if we can + // concantenate the two string sdirectly or if we need to + // drop a possible extra "/" at the end of fileMetadata.db_path. + std::string filename = fileMetadata.db_path + "/" + fileMetadata.name; + // Drops any repeating '/' character that could happen during + // concatenation of db path and file name. + filename = NormalizePath(filename); + filesPerLevelPerCf[cf][level].push_back(filename); + } + // For each column family, + // iterate through the levels and print out the live SST file names. + for (auto it = filesPerLevelPerCf.begin(); it != filesPerLevelPerCf.end(); + it++) { + // it->first: Column Family name (string) + // it->second: map[level]={SST files...}. + std::cout << "===== Column Family: " << it->first + << " =====" << std::endl; + + // For simplicity, create reference to the inner map (level={live SST + // files}). + std::map>& filesPerLevel = it->second; + int maxLevel = filesPerLevel.rbegin()->first; + + // Even if the first few levels are empty, they are printed out. + for (int level = 0; level <= maxLevel; level++) { + std::cout << "---------- level " << level << " ----------" << std::endl; + if (filesPerLevel.find(level) != filesPerLevel.end()) { + std::vector& fileList = filesPerLevel[level]; + + // Locally sort by filename for better information display. + std::sort(fileList.begin(), fileList.end()); + for (const std::string& filename : fileList) { + std::cout << filename << std::endl; + } + } + } // End of for-loop over levels. + } // End of for-loop over filesPerLevelPerCf. + } // End of else ("not sort_by_filename"). + std::cout << "------------------------------" << std::endl; +} + void WriteExternalSstFilesCommand::Help(std::string& ret) { ret.append(" "); ret.append(WriteExternalSstFilesCommand::Name()); diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index f5f7eff2e..9944dc11e 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -46,6 +46,25 @@ class DBFileDumperCommand : public LDBCommand { virtual void DoCommand() override; }; +class DBLiveFilesMetadataDumperCommand : public LDBCommand { + public: + static std::string Name() { return "list_live_files_metadata"; } + + DBLiveFilesMetadataDumperCommand( + const std::vector& params, + const std::map& options, + const std::vector& flags); + + static void Help(std::string& ret); + + virtual void DoCommand() override; + + private: + bool sort_by_filename_; + + static const std::string ARG_SORT_BY_FILENAME; +}; + class DBDumperCommand : public LDBCommand { public: static std::string Name() { return "dump"; } diff --git a/tools/ldb_test.py b/tools/ldb_test.py index 699317b95..c94d9efaf 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -452,6 +452,93 @@ class LDBTestCase(unittest.TestCase): filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber) + def listLiveFilesMetadata(self, params, dumpFile): + return 0 == run_err_null("./ldb list_live_files_metadata %s > %s" % ( + params, dumpFile)) + + def testListLiveFilesMetadata(self): + print("Running testListLiveFilesMetadata...") + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + self.assertRunOK("put x2 y2", "OK") + + # Compare the SST filename and the level of list_live_files_metadata + # with the data collected from dump_live_files. + dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1)) + dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2") + self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath2)) + + # Collect SST filename and level from dump_live_files + with open(dumpFilePath1, "r") as tmp: + data = tmp.read() + filename1 = re.findall(r".*\d+\.sst",data)[0] + level1 = re.findall(r"level:\d+",data)[0].split(':')[1] + + # Collect SST filename and level from list_live_files_metadata + with open(dumpFilePath2, "r") as tmp: + data = tmp.read() + filename2 = re.findall(r".*\d+\.sst",data)[0] + level2 = re.findall(r"level \d+",data)[0].split(' ')[1] + + # Assert equality between filenames and levels. + self.assertEqual(filename1,filename2) + self.assertEqual(level1,level2) + + # Create multiple column families and compare the output + # of list_live_files_metadata with dump_live_files once again. + # Create new CF, and insert data: + self.assertRunOK("create_column_family mycol1", "OK") + self.assertRunOK("put --column_family=mycol1 v1 v2", "OK") + self.assertRunOK("create_column_family mycol2", "OK") + self.assertRunOK("put --column_family=mycol2 h1 h2", "OK") + self.assertRunOK("put --column_family=mycol2 h3 h4", "OK") + + # Call dump_live_files and list_live_files_metadata + # and pipe the output to compare them later. + dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3)) + dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4") + self.assertTrue(self.listLiveFilesMetadata("--sort_by_filename --db=%s" % dbPath, dumpFilePath4)) + + # dump_live_files: + # parse the output and create a map: + # [key: sstFilename]->[value:[LSM level, Column Family Name]] + referenceMap = {} + with open(dumpFilePath3, "r") as tmp: + data = tmp.read() + # Note: the following regex are contingent on what the + # dump_live_files outputs. + namesAndLevels = re.findall(r"\d+.sst level:\d+", data) + cfs = re.findall(r"(?<=column family name=)\w+", data) + # re.findall should not reorder the data. + # Therefore namesAndLevels[i] matches the data from cfs[i]. + for count, nameAndLevel in enumerate(namesAndLevels): + sstFilename = re.findall(r"\d+.sst",nameAndLevel)[0] + sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0] + cf = cfs[count] + referenceMap[sstFilename] = [sstLevel, cf] + + # list_live_files_metadata: + # parse the output and create a map: + # [key: sstFilename]->[value:[LSM level, Column Family Name]] + testMap = {} + with open(dumpFilePath4, "r") as tmp: + data = tmp.read() + # Since for each SST file, all the information is contained + # on one line, the parsing is easy to perform and relies on + # the appearance of an "00xxx.sst" pattern. + sstLines = re.findall(r".*\d+.sst.*", data) + for line in sstLines: + sstFilename = re.findall(r"\d+.sst", line)[0] + sstLevel = re.findall(r"(?<=level )\d+",line)[0] + cf = re.findall(r"(?<=column family \')\w+(?=\')",line)[0] + testMap[sstFilename] = [sstLevel, cf] + + # Compare the map obtained from dump_live_files and the map + # obtained from list_live_files_metadata. Everything should match. + self.assertEqual(referenceMap,testMap) def getManifests(self, directory): return glob.glob(directory + "/MANIFEST-*") diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index f8f7e7181..08a22c0ad 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -94,6 +94,7 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options, DropColumnFamilyCommand::Help(ret); DBFileDumperCommand::Help(ret); InternalDumpCommand::Help(ret); + DBLiveFilesMetadataDumperCommand::Help(ret); RepairCommand::Help(ret); BackupCommand::Help(ret); RestoreCommand::Help(ret);