From fff28a7725f8f3be0122151f13c1af3c27f2b7ac Mon Sep 17 00:00:00 2001 From: yuzhangyu Date: Thu, 21 Apr 2022 20:37:07 -0700 Subject: [PATCH] Add blob dump support to the dump command (#9881) Summary: This patch is the first part of adding blob dump support. It only adds blob dump support to the dump command. A follow up patch will add blob dump support to the dump_live_files command. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9881 Reviewed By: ltamasi Differential Revision: D35796731 Pulled By: jowlyzhang fbshipit-source-id: 2cc5973b222d505a331ac7b969edcf992b47c5ee --- include/rocksdb/utilities/ldb_cmd.h | 1 + tools/ldb_cmd.cc | 46 +++++++++++++++++++++++++---- tools/ldb_cmd_impl.h | 1 + tools/ldb_test.py | 17 +++++++++++ 4 files changed, 59 insertions(+), 6 deletions(-) diff --git a/include/rocksdb/utilities/ldb_cmd.h b/include/rocksdb/utilities/ldb_cmd.h index 5283269d1..71fedc5d4 100644 --- a/include/rocksdb/utilities/ldb_cmd.h +++ b/include/rocksdb/utilities/ldb_cmd.h @@ -70,6 +70,7 @@ class LDBCommand { static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD; static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE; static const std::string ARG_DECODE_BLOB_INDEX; + static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS; struct ParsedParams { std::string cmd; diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 2f9b5227b..a6a055dd4 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -45,6 +45,7 @@ #include "util/file_checksum_helper.h" #include "util/stderr_logger.h" #include "util/string_util.h" +#include "utilities/blob_db/blob_dump_tool.h" #include "utilities/merge_operators.h" #include "utilities/ttl/db_ttl_impl.h" @@ -100,6 +101,8 @@ const std::string LDBCommand::ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD = const std::string LDBCommand::ARG_BLOB_COMPACTION_READAHEAD_SIZE = "blob_compaction_readahead_size"; const std::string LDBCommand::ARG_DECODE_BLOB_INDEX = "decode_blob_index"; +const std::string LDBCommand::ARG_DUMP_UNCOMPRESSED_BLOBS = + "dump_uncompressed_blobs"; const char* LDBCommand::DELIM = " ==> "; @@ -111,6 +114,9 @@ void DumpWalFile(Options options, std::string wal_file, bool print_header, void DumpSstFile(Options options, std::string filename, bool output_hex, bool show_properties, bool decode_blob_index); + +void DumpBlobFile(const std::string& filename, bool is_key_hex, + bool is_value_hex, bool dump_uncompressed_blobs); }; LDBCommand* LDBCommand::InitFromCmdLineArgs( @@ -1853,12 +1859,13 @@ DBDumperCommand::DBDumperCommand( const std::vector& /*params*/, const std::map& options, const std::vector& flags) - : LDBCommand(options, flags, true, - BuildCmdLineOptions( - {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, - ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, - ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, - ARG_TIMESTAMP, ARG_PATH, ARG_DECODE_BLOB_INDEX})), + : LDBCommand( + options, flags, true, + BuildCmdLineOptions( + {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO, + ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS, + ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, ARG_TIMESTAMP, + ARG_PATH, ARG_DECODE_BLOB_INDEX, ARG_DUMP_UNCOMPRESSED_BLOBS})), null_from_(true), null_to_(true), max_keys_(-1), @@ -1906,6 +1913,7 @@ DBDumperCommand::DBDumperCommand( print_stats_ = IsFlagPresent(flags, ARG_STATS); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX); + dump_uncompressed_blobs_ = IsFlagPresent(flags, ARG_DUMP_UNCOMPRESSED_BLOBS); if (is_key_hex_) { if (!null_from_) { @@ -1940,6 +1948,7 @@ void DBDumperCommand::Help(std::string& ret) { ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); ret.append(" [--" + ARG_PATH + "=]"); ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]"); + ret.append(" [--" + ARG_DUMP_UNCOMPRESSED_BLOBS + "]"); ret.append("\n"); } @@ -1984,6 +1993,10 @@ void DBDumperCommand::DoCommand() { DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_, /* json_ */ false); break; + case kBlobFile: + DumpBlobFile(path_, is_key_hex_, is_value_hex_, + dump_uncompressed_blobs_); + break; default: exec_state_ = LDBCommandExecuteResult::Failed( "File type not supported: " + path_); @@ -3533,6 +3546,27 @@ void DumpSstFile(Options options, std::string filename, bool output_hex, } } +void DumpBlobFile(const std::string& filename, bool is_key_hex, + bool is_value_hex, bool dump_uncompressed_blobs) { + using ROCKSDB_NAMESPACE::blob_db::BlobDumpTool; + BlobDumpTool tool; + BlobDumpTool::DisplayType blob_type = is_value_hex + ? BlobDumpTool::DisplayType::kHex + : BlobDumpTool::DisplayType::kRaw; + BlobDumpTool::DisplayType show_uncompressed_blob = + dump_uncompressed_blobs ? blob_type : BlobDumpTool::DisplayType::kNone; + BlobDumpTool::DisplayType show_blob = + dump_uncompressed_blobs ? BlobDumpTool::DisplayType::kNone : blob_type; + + BlobDumpTool::DisplayType show_key = is_key_hex + ? BlobDumpTool::DisplayType::kHex + : BlobDumpTool::DisplayType::kRaw; + Status s = tool.Run(filename, show_key, show_blob, show_uncompressed_blob, + /* show_summary */ true); + if (!s.ok()) { + fprintf(stderr, "Failed: %s\n", s.ToString().c_str()); + } +} } // namespace DBFileDumperCommand::DBFileDumperCommand( diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index b1cbfdac3..04a81f8c3 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -108,6 +108,7 @@ class DBDumperCommand : public LDBCommand { bool print_stats_; std::string path_; bool decode_blob_index_; + bool dump_uncompressed_blobs_; static const std::string ARG_COUNT_ONLY; static const std::string ARG_COUNT_DELIM; diff --git a/tools/ldb_test.py b/tools/ldb_test.py index f931a6bd2..c518eb282 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -716,6 +716,23 @@ class LDBTestCase(unittest.TestCase): expected_pattern, unexpected=False, isPattern=True) + def testBlobDump(self): + print("Running testBlobDump") + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") + self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK") + + # Pattern to expect from blob file dump. + regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" + expected_pattern = re.compile(regex) + blob_files = self.getBlobFiles(dbPath) + self.assertTrue(len(blob_files) >= 1) + cmd = "dump --path=%s --dump_uncompressed_blobs" + self.assertRunOKFull((cmd) + % (blob_files[0]), + expected_pattern, unexpected=False, + isPattern=True) + def testWALDump(self): print("Running testWALDump...")