Add --decode_blob_index option to idump and dump commands (#9870)

Summary:
This patch completes the first part of the task: "Extend all three commands so they can decode and print blob references if a new option --decode_blob_index is specified"

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9870

Reviewed By: ltamasi

Differential Revision: D35753932

Pulled By: jowlyzhang

fbshipit-source-id: 9d2bbba0eef2ed86b982767eba9de1b4881f35c9
main
yuzhangyu 3 years ago committed by Facebook GitHub Bot
parent a5063c8931
commit 9b5790f018
  1. 46
      tools/ldb_cmd.cc
  2. 2
      tools/ldb_cmd_impl.h
  3. 20
      tools/ldb_test.py

@ -18,6 +18,7 @@
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include "db/blob/blob_index.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/log_reader.h" #include "db/log_reader.h"
@ -1696,11 +1697,11 @@ InternalDumpCommand::InternalDumpCommand(
const std::vector<std::string>& /*params*/, const std::vector<std::string>& /*params*/,
const std::map<std::string, std::string>& options, const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags) const std::vector<std::string>& flags)
: LDBCommand( : LDBCommand(options, flags, true,
options, flags, true, BuildCmdLineOptions(
BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, {ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})), ARG_INPUT_KEY_HEX, ARG_DECODE_BLOB_INDEX})),
has_from_(false), has_from_(false),
has_to_(false), has_to_(false),
max_keys_(-1), max_keys_(-1),
@ -1708,7 +1709,8 @@ InternalDumpCommand::InternalDumpCommand(
count_only_(false), count_only_(false),
count_delim_(false), count_delim_(false),
print_stats_(false), print_stats_(false),
is_input_key_hex_(false) { is_input_key_hex_(false),
decode_blob_index_(false) {
has_from_ = ParseStringOption(options, ARG_FROM, &from_); has_from_ = ParseStringOption(options, ARG_FROM, &from_);
has_to_ = ParseStringOption(options, ARG_TO, &to_); has_to_ = ParseStringOption(options, ARG_TO, &to_);
@ -1726,6 +1728,7 @@ InternalDumpCommand::InternalDumpCommand(
print_stats_ = IsFlagPresent(flags, ARG_STATS); print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
is_input_key_hex_ = IsFlagPresent(flags, ARG_INPUT_KEY_HEX); is_input_key_hex_ = IsFlagPresent(flags, ARG_INPUT_KEY_HEX);
decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX);
if (is_input_key_hex_) { if (is_input_key_hex_) {
if (has_from_) { if (has_from_) {
@ -1746,6 +1749,7 @@ void InternalDumpCommand::Help(std::string& ret) {
ret.append(" [--" + ARG_COUNT_ONLY + "]"); ret.append(" [--" + ARG_COUNT_ONLY + "]");
ret.append(" [--" + ARG_COUNT_DELIM + "=<char>]"); ret.append(" [--" + ARG_COUNT_DELIM + "=<char>]");
ret.append(" [--" + ARG_STATS + "]"); ret.append(" [--" + ARG_STATS + "]");
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]");
ret.append("\n"); ret.append("\n");
} }
@ -1777,8 +1781,8 @@ void InternalDumpCommand::DoCommand() {
long long count = 0; long long count = 0;
for (auto& key_version : key_versions) { for (auto& key_version : key_versions) {
InternalKey ikey(key_version.user_key, key_version.sequence, ValueType value_type = static_cast<ValueType>(key_version.type);
static_cast<ValueType>(key_version.type)); InternalKey ikey(key_version.user_key, key_version.sequence, value_type);
if (has_to_ && ikey.user_key() == to_) { if (has_to_ && ikey.user_key() == to_) {
// GetAllKeyVersions() includes keys with user key `to_`, but idump has // GetAllKeyVersions() includes keys with user key `to_`, but idump has
// traditionally excluded such keys. // traditionally excluded such keys.
@ -1812,8 +1816,21 @@ void InternalDumpCommand::DoCommand() {
if (!count_only_ && !count_delim_) { if (!count_only_ && !count_delim_) {
std::string key = ikey.DebugString(is_key_hex_); std::string key = ikey.DebugString(is_key_hex_);
std::string value = Slice(key_version.value).ToString(is_value_hex_); Slice value(key_version.value);
std::cout << key << " => " << value << "\n"; if (!decode_blob_index_ || value_type != kTypeBlobIndex) {
fprintf(stdout, "%s => %s\n", key.c_str(),
value.ToString(is_value_hex_).c_str());
} else {
BlobIndex blob_index;
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
fprintf(stderr, "%s => error decoding blob index =>\n", key.c_str());
} else {
fprintf(stdout, "%s => %s\n", key.c_str(),
blob_index.DebugString(is_value_hex_).c_str());
}
}
} }
// Terminate if maximum number of keys have been dumped // Terminate if maximum number of keys have been dumped
@ -1841,13 +1858,14 @@ DBDumperCommand::DBDumperCommand(
{ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM,
ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP, ARG_PATH})), ARG_TIMESTAMP, ARG_PATH, ARG_DECODE_BLOB_INDEX})),
null_from_(true), null_from_(true),
null_to_(true), null_to_(true),
max_keys_(-1), max_keys_(-1),
count_only_(false), count_only_(false),
count_delim_(false), count_delim_(false),
print_stats_(false) { print_stats_(false),
decode_blob_index_(false) {
auto itr = options.find(ARG_FROM); auto itr = options.find(ARG_FROM);
if (itr != options.end()) { if (itr != options.end()) {
null_from_ = false; null_from_ = false;
@ -1887,6 +1905,7 @@ DBDumperCommand::DBDumperCommand(
print_stats_ = IsFlagPresent(flags, ARG_STATS); print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX);
if (is_key_hex_) { if (is_key_hex_) {
if (!null_from_) { if (!null_from_) {
@ -1920,6 +1939,7 @@ void DBDumperCommand::Help(std::string& ret) {
ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]"); ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]"); ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]"); ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]");
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]");
ret.append("\n"); ret.append("\n");
} }
@ -1958,7 +1978,7 @@ void DBDumperCommand::DoCommand() {
break; break;
case kTableFile: case kTableFile:
DumpSstFile(options_, path_, is_key_hex_, /* show_properties */ true, DumpSstFile(options_, path_, is_key_hex_, /* show_properties */ true,
/* decode_blob_index */ false); decode_blob_index_);
break; break;
case kDescriptorFile: case kDescriptorFile:
DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_, DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_,

@ -107,6 +107,7 @@ class DBDumperCommand : public LDBCommand {
bool count_delim_; bool count_delim_;
bool print_stats_; bool print_stats_;
std::string path_; std::string path_;
bool decode_blob_index_;
static const std::string ARG_COUNT_ONLY; static const std::string ARG_COUNT_ONLY;
static const std::string ARG_COUNT_DELIM; static const std::string ARG_COUNT_DELIM;
@ -137,6 +138,7 @@ class InternalDumpCommand : public LDBCommand {
bool count_delim_; bool count_delim_;
bool print_stats_; bool print_stats_;
bool is_input_key_hex_; bool is_input_key_hex_;
bool decode_blob_index_;
static const std::string ARG_DELIM; static const std::string ARG_DELIM;
static const std::string ARG_COUNT_ONLY; static const std::string ARG_COUNT_ONLY;

@ -393,6 +393,20 @@ class LDBTestCase(unittest.TestCase):
hex(ord('b'))), hex(ord('b'))),
"'a' seq:1, type:1 => val\nInternal keys in range: 1") "'a' seq:1, type:1 => val\nInternal keys in range: 1")
def testIDumpDecodeBlobIndex(self):
print("Running testIDumpDecodeBlobIndex...")
self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val --enable_blob_files", "OK")
# Pattern to expect from dump with decode_blob_index flag enabled.
regex = ".*\[blob ref\].*"
expected_pattern = re.compile(regex)
cmd = "idump %s --decode_blob_index"
self.assertRunOKFull((cmd)
% (self.dbParam(self.DB_NAME)),
expected_pattern, unexpected=False,
isPattern=True)
def testMiscAdminTask(self): def testMiscAdminTask(self):
print("Running testMiscAdminTask...") print("Running testMiscAdminTask...")
# These tests need to be improved; for example with asserts about # These tests need to be improved; for example with asserts about
@ -687,16 +701,16 @@ class LDBTestCase(unittest.TestCase):
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK") self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK")
self.assertRunOK("put sst2 sst2_val", "OK") self.assertRunOK("put sst2 sst2_val --enable_blob_files", "OK")
self.assertRunOK("get sst1", "sst1_val") self.assertRunOK("get sst1", "sst1_val")
# Pattern to expect from SST dump. # Pattern to expect from SST dump.
regex = ".*Sst file format:.*" regex = ".*Sst file format:.*\n.*\[blob ref\].*"
expected_pattern = re.compile(regex) expected_pattern = re.compile(regex)
sst_files = self.getSSTFiles(dbPath) sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1) self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s" cmd = "dump --path=%s --decode_blob_index"
self.assertRunOKFull((cmd) self.assertRunOKFull((cmd)
% (sst_files[0]), % (sst_files[0]),
expected_pattern, unexpected=False, expected_pattern, unexpected=False,

Loading…
Cancel
Save