From 9b5790f0189af5e13d01872df615057cf7466eea Mon Sep 17 00:00:00 2001 From: yuzhangyu Date: Wed, 20 Apr 2022 11:10:20 -0700 Subject: [PATCH] Add --decode_blob_index option to idump and dump commands (#9870) Summary: This patch completes the first part of the task: "Extend all three commands so they can decode and print blob references if a new option --decode_blob_index is specified" Pull Request resolved: https://github.com/facebook/rocksdb/pull/9870 Reviewed By: ltamasi Differential Revision: D35753932 Pulled By: jowlyzhang fbshipit-source-id: 9d2bbba0eef2ed86b982767eba9de1b4881f35c9 --- tools/ldb_cmd.cc | 46 +++++++++++++++++++++++++++++++------------- tools/ldb_cmd_impl.h | 2 ++ tools/ldb_test.py | 20 ++++++++++++++++--- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 2a0bd5985..2f9b5227b 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -18,6 +18,7 @@ #include #include +#include "db/blob/blob_index.h" #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/log_reader.h" @@ -1696,11 +1697,11 @@ InternalDumpCommand::InternalDumpCommand( const std::vector& /*params*/, const std::map& options, const std::vector& flags) - : LDBCommand( - options, flags, true, - BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, - ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, - ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})), + : LDBCommand(options, flags, true, + BuildCmdLineOptions( + {ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO, + ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS, + ARG_INPUT_KEY_HEX, ARG_DECODE_BLOB_INDEX})), has_from_(false), has_to_(false), max_keys_(-1), @@ -1708,7 +1709,8 @@ InternalDumpCommand::InternalDumpCommand( count_only_(false), count_delim_(false), print_stats_(false), - is_input_key_hex_(false) { + is_input_key_hex_(false), + decode_blob_index_(false) { has_from_ = ParseStringOption(options, ARG_FROM, &from_); has_to_ = ParseStringOption(options, ARG_TO, &to_); @@ -1726,6 +1728,7 @@ InternalDumpCommand::InternalDumpCommand( print_stats_ = IsFlagPresent(flags, ARG_STATS); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); is_input_key_hex_ = IsFlagPresent(flags, ARG_INPUT_KEY_HEX); + decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX); if (is_input_key_hex_) { if (has_from_) { @@ -1746,6 +1749,7 @@ void InternalDumpCommand::Help(std::string& ret) { ret.append(" [--" + ARG_COUNT_ONLY + "]"); ret.append(" [--" + ARG_COUNT_DELIM + "=]"); ret.append(" [--" + ARG_STATS + "]"); + ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]"); ret.append("\n"); } @@ -1777,8 +1781,8 @@ void InternalDumpCommand::DoCommand() { long long count = 0; for (auto& key_version : key_versions) { - InternalKey ikey(key_version.user_key, key_version.sequence, - static_cast(key_version.type)); + ValueType value_type = static_cast(key_version.type); + InternalKey ikey(key_version.user_key, key_version.sequence, value_type); if (has_to_ && ikey.user_key() == to_) { // GetAllKeyVersions() includes keys with user key `to_`, but idump has // traditionally excluded such keys. @@ -1812,8 +1816,21 @@ void InternalDumpCommand::DoCommand() { if (!count_only_ && !count_delim_) { std::string key = ikey.DebugString(is_key_hex_); - std::string value = Slice(key_version.value).ToString(is_value_hex_); - std::cout << key << " => " << value << "\n"; + Slice value(key_version.value); + if (!decode_blob_index_ || value_type != kTypeBlobIndex) { + fprintf(stdout, "%s => %s\n", key.c_str(), + value.ToString(is_value_hex_).c_str()); + } else { + BlobIndex blob_index; + + const Status s = blob_index.DecodeFrom(value); + if (!s.ok()) { + fprintf(stderr, "%s => error decoding blob index =>\n", key.c_str()); + } else { + fprintf(stdout, "%s => %s\n", key.c_str(), + blob_index.DebugString(is_value_hex_).c_str()); + } + } } // Terminate if maximum number of keys have been dumped @@ -1841,13 +1858,14 @@ DBDumperCommand::DBDumperCommand( {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, - ARG_TIMESTAMP, ARG_PATH})), + ARG_TIMESTAMP, ARG_PATH, ARG_DECODE_BLOB_INDEX})), null_from_(true), null_to_(true), max_keys_(-1), count_only_(false), count_delim_(false), - print_stats_(false) { + print_stats_(false), + decode_blob_index_(false) { auto itr = options.find(ARG_FROM); if (itr != options.end()) { null_from_ = false; @@ -1887,6 +1905,7 @@ DBDumperCommand::DBDumperCommand( print_stats_ = IsFlagPresent(flags, ARG_STATS); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); + decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX); if (is_key_hex_) { if (!null_from_) { @@ -1920,6 +1939,7 @@ void DBDumperCommand::Help(std::string& ret) { ret.append(" [--" + ARG_TTL_START + "=:- is inclusive]"); ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); ret.append(" [--" + ARG_PATH + "=]"); + ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]"); ret.append("\n"); } @@ -1958,7 +1978,7 @@ void DBDumperCommand::DoCommand() { break; case kTableFile: DumpSstFile(options_, path_, is_key_hex_, /* show_properties */ true, - /* decode_blob_index */ false); + decode_blob_index_); break; case kDescriptorFile: DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_, diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index 8cfdc79a5..b1cbfdac3 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -107,6 +107,7 @@ class DBDumperCommand : public LDBCommand { bool count_delim_; bool print_stats_; std::string path_; + bool decode_blob_index_; static const std::string ARG_COUNT_ONLY; static const std::string ARG_COUNT_DELIM; @@ -137,6 +138,7 @@ class InternalDumpCommand : public LDBCommand { bool count_delim_; bool print_stats_; bool is_input_key_hex_; + bool decode_blob_index_; static const std::string ARG_DELIM; static const std::string ARG_COUNT_ONLY; diff --git a/tools/ldb_test.py b/tools/ldb_test.py index cd2f0cc08..f931a6bd2 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -393,6 +393,20 @@ class LDBTestCase(unittest.TestCase): hex(ord('b'))), "'a' seq:1, type:1 => val\nInternal keys in range: 1") + def testIDumpDecodeBlobIndex(self): + print("Running testIDumpDecodeBlobIndex...") + self.assertRunOK("put a val --create_if_missing", "OK") + self.assertRunOK("put b val --enable_blob_files", "OK") + + # Pattern to expect from dump with decode_blob_index flag enabled. + regex = ".*\[blob ref\].*" + expected_pattern = re.compile(regex) + cmd = "idump %s --decode_blob_index" + self.assertRunOKFull((cmd) + % (self.dbParam(self.DB_NAME)), + expected_pattern, unexpected=False, + isPattern=True) + def testMiscAdminTask(self): print("Running testMiscAdminTask...") # These tests need to be improved; for example with asserts about @@ -687,16 +701,16 @@ class LDBTestCase(unittest.TestCase): dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK") - self.assertRunOK("put sst2 sst2_val", "OK") + self.assertRunOK("put sst2 sst2_val --enable_blob_files", "OK") self.assertRunOK("get sst1", "sst1_val") # Pattern to expect from SST dump. - regex = ".*Sst file format:.*" + regex = ".*Sst file format:.*\n.*\[blob ref\].*" expected_pattern = re.compile(regex) sst_files = self.getSSTFiles(dbPath) self.assertTrue(len(sst_files) >= 1) - cmd = "dump --path=%s" + cmd = "dump --path=%s --decode_blob_index" self.assertRunOKFull((cmd) % (sst_files[0]), expected_pattern, unexpected=False,