From 58a0ae06dca85529fcc8c0b04b96ef019b27a8d3 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Wed, 7 Aug 2013 13:36:26 -0700 Subject: [PATCH 1/2] [RocksDB] Improve sst_dump to take user key range Summary: The ability to dump internal keys associated with certain user keys, directly from sst files, is very useful for diagnosis. Will incorporate it directly into ldb later. Test Plan: run it Reviewers: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D12075 --- tools/sst_dump.cc | 112 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 14 deletions(-) diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 27afe437e..074f17376 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -14,6 +14,7 @@ #include "table/block.h" #include "table/block_builder.h" #include "table/format.h" +#include "util/ldb_cmd.h" #include "util/random.h" #include "util/testharness.h" #include "util/testutil.h" @@ -21,11 +22,17 @@ namespace leveldb { class SstFileReader { -public: - explicit SstFileReader(std::string file_name, - bool verify_checksum = false, - bool output_hex = false); - Status ReadSequential(bool print_kv, uint64_t read_num = -1); + public: + explicit SstFileReader(const std::string& file_name, + bool verify_checksum, + bool output_hex); + + Status ReadSequential(bool print_kv, + uint64_t read_num, + bool has_from, + const std::string& from_key, + bool has_to, + const std::string& to_key); uint64_t GetReadNumber() { return read_num_; } @@ -37,17 +44,25 @@ private: EnvOptions soptions_; }; -SstFileReader::SstFileReader(std::string file_path, +SstFileReader::SstFileReader(const std::string& file_path, bool verify_checksum, bool output_hex) :file_name_(file_path), read_num_(0), verify_checksum_(verify_checksum), output_hex_(output_hex) { + std::cout << "Process " << file_path << "\n"; } -Status SstFileReader::ReadSequential(bool print_kv, uint64_t read_num) +Status SstFileReader::ReadSequential(bool print_kv, + uint64_t read_num, + bool has_from, + const std::string& from_key, + bool has_to, + const std::string& to_key) { unique_ptr table; + InternalKeyComparator internal_comparator_(BytewiseComparator()); Options table_options; + table_options.comparator = &internal_comparator_; unique_ptr file; Status s = table_options.env->NewRandomAccessFile(file_name_, &file, soptions_); @@ -63,17 +78,38 @@ Status SstFileReader::ReadSequential(bool print_kv, uint64_t read_num) Iterator* iter = table->NewIterator(ReadOptions(verify_checksum_, false)); uint64_t i = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + if (has_from) { + InternalKey ikey(from_key, kMaxSequenceNumber, kValueTypeForSeek); + iter->Seek(ikey.Encode()); + } else { + iter->SeekToFirst(); + } + for (; iter->Valid(); iter->Next()) { Slice key = iter->key(); Slice value = iter->value(); ++i; if (read_num > 0 && i > read_num) break; + + ParsedInternalKey ikey; + if (!ParseInternalKey(key, &ikey)) { + std::cerr << "Internal Key [" + << key.ToString(true /* in hex*/) + << "] parse error!\n"; + continue; + } + + // If end marker was specified, we stop before it + if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) { + break; + } + if (print_kv) { - fprintf(stdout, "%s ==> %s\n", - key.ToString(output_hex_).c_str(), - value.ToString(output_hex_).c_str()); + std::cout << ikey.DebugString(output_hex_) + << " => " + << value.ToString(output_hex_) << "\n"; } + } read_num_ += i; @@ -90,9 +126,29 @@ static void print_help() { "sst_dump [--command=check|scan] [--verify_checksum] " "--file=data_dir_OR_sst_file" " [--output_hex]" + " [--input_key_hex]" + " [--from=]" + " [--to=]" " [--read_num=NUM]\n"); } +string HexToString(const string& str) { + string parsed; + if (str[0] != '0' || str[1] != 'x') { + fprintf(stderr, "Invalid hex input %s. Must start with 0x\n", + str.c_str()); + throw "Invalid hex input"; + } + + for (unsigned int i = 2; i < str.length();) { + int c; + sscanf(str.c_str() + i, "%2X", &c); + parsed.push_back(c); + i += 2; + } + return parsed; +} + int main(int argc, char** argv) { const char* dir_or_file = nullptr; @@ -103,24 +159,47 @@ int main(int argc, char** argv) { uint64_t n; bool verify_checksum = false; bool output_hex = false; + bool input_key_hex = false; + bool has_from = false; + bool has_to = false; + std::string from_key; + std::string to_key; for (int i = 1; i < argc; i++) { if (strncmp(argv[i], "--file=", 7) == 0) { dir_or_file = argv[i] + 7; } else if (strcmp(argv[i], "--output_hex") == 0) { output_hex = true; + } else if (strcmp(argv[i], "--input_key_hex") == 0) { + input_key_hex = true; } else if (sscanf(argv[i], "--read_num=%ld%c", &n, &junk) == 1) { read_num = n; } else if (strcmp(argv[i], "--verify_checksum") == 0) { verify_checksum = true; } else if (strncmp(argv[i], "--command=", 10) == 0) { command = argv[i] + 10; - } else { + } else if (strncmp(argv[i], "--from=", 7) == 0) { + from_key = argv[i] + 7; + has_from = true; + } else if (strncmp(argv[i], "--to=", 5) == 0) { + to_key = argv[i] + 5; + has_to = true; + }else { print_help(); exit(1); } } + + if (input_key_hex) { + if (has_from) { + from_key = HexToString(from_key); + } + if (has_to) { + to_key = HexToString(to_key); + } + } + if(dir_or_file == nullptr) { print_help(); exit(1); @@ -136,6 +215,9 @@ int main(int argc, char** argv) { dir = false; } + std::cout << "from [" << leveldb::Slice(from_key).ToString(true) + << "] to [" << leveldb::Slice(to_key).ToString(true) << "]\n"; + uint64_t total_read = 0; for (size_t i = 0; i < filenames.size(); i++) { std::string filename = filenames.at(i); @@ -145,7 +227,7 @@ int main(int argc, char** argv) { continue; } if(dir) { - filename = std::string(dir_or_file) + "//" + filename; + filename = std::string(dir_or_file) + "/" + filename; } leveldb::SstFileReader reader(filename, verify_checksum, output_hex); @@ -153,7 +235,9 @@ int main(int argc, char** argv) { // scan all files in give file path. if (command == "" || command == "scan" || command == "check") { st = reader.ReadSequential(command != "check", - read_num > 0 ? (read_num - total_read) : read_num); + read_num > 0 ? (read_num - total_read) : + read_num, + has_from, from_key, has_to, to_key); if (!st.ok()) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); From 3a3b1c3e6cb0f39ed55a9699ed30e5e1eb18eead Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Thu, 8 Aug 2013 15:51:16 -0700 Subject: [PATCH 2/2] [RocksDB] Improve manifest dump to print internal keys in hex for version edits. Summary: Currently, VersionEdit::DebugString always display internal keys in the original ascii format. This could cause manifest dump to be truncated if internal keys contain special charactors (like null). Also added an option --input_key_hex for ldb idump to indicate that the passed in user keys are in hex. Test Plan: run ldb manifest_dump Reviewers: dhruba, emayanke CC: leveldb Differential Revision: https://reviews.facebook.net/D12111 --- db/version_edit.cc | 8 ++++---- db/version_edit.h | 2 +- db/version_set.cc | 2 +- util/ldb_cmd.cc | 13 +++++++++---- util/ldb_cmd.h | 2 ++ 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/db/version_edit.cc b/db/version_edit.cc index ed63c1013..f85bec086 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -218,7 +218,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) { return result; } -std::string VersionEdit::DebugString() const { +std::string VersionEdit::DebugString(bool hex_key) const { std::string r; r.append("VersionEdit {"); if (has_comparator_) { @@ -245,7 +245,7 @@ std::string VersionEdit::DebugString() const { r.append("\n CompactPointer: "); AppendNumberTo(&r, compact_pointers_[i].first); r.append(" "); - r.append(compact_pointers_[i].second.DebugString()); + r.append(compact_pointers_[i].second.DebugString(hex_key)); } for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); iter != deleted_files_.end(); @@ -264,9 +264,9 @@ std::string VersionEdit::DebugString() const { r.append(" "); AppendNumberTo(&r, f.file_size); r.append(" "); - r.append(f.smallest.DebugString()); + r.append(f.smallest.DebugString(hex_key)); r.append(" .. "); - r.append(f.largest.DebugString()); + r.append(f.largest.DebugString(hex_key)); } r.append("\n}\n"); return r; diff --git a/db/version_edit.h b/db/version_edit.h index 2743e9e0d..20b234470 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -89,7 +89,7 @@ class VersionEdit { void EncodeTo(std::string* dst) const; Status DecodeFrom(const Slice& src); - std::string DebugString() const; + std::string DebugString(bool hex_key = false) const; private: friend class VersionSet; diff --git a/db/version_set.cc b/db/version_set.cc index 9ea9ad809..807d99c3c 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1496,7 +1496,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, // Write out each individual edit if (verbose) { printf("*************************Edit[%d] = %s\n", - count, edit.DebugString().c_str()); + count, edit.DebugString(hex).c_str()); } count++; diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index 083683ee5..597e75bff 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -578,6 +578,7 @@ void PrintBucketCounts(const vector& bucket_counts, int ttl_start, const string InternalDumpCommand::ARG_COUNT_ONLY = "count_only"; const string InternalDumpCommand::ARG_STATS = "stats"; +const string InternalDumpCommand::ARG_INPUT_KEY_HEX = "input_key_hex"; InternalDumpCommand::InternalDumpCommand(const vector& params, const map& options, @@ -585,12 +586,14 @@ InternalDumpCommand::InternalDumpCommand(const vector& params, LDBCommand(options, flags, true, BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO, ARG_MAX_KEYS, - ARG_COUNT_ONLY, ARG_STATS})), + ARG_COUNT_ONLY, ARG_STATS, + ARG_INPUT_KEY_HEX})), has_from_(false), has_to_(false), max_keys_(-1), count_only_(false), - print_stats_(false) { + print_stats_(false), + is_input_key_hex_(false) { has_from_ = ParseStringOption(options, ARG_FROM, &from_); has_to_ = ParseStringOption(options, ARG_TO, &to_); @@ -599,8 +602,9 @@ InternalDumpCommand::InternalDumpCommand(const vector& params, print_stats_ = IsFlagPresent(flags, ARG_STATS); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); + is_input_key_hex_ = IsFlagPresent(flags, ARG_INPUT_KEY_HEX); - if (is_key_hex_) { + if (is_input_key_hex_) { if (has_from_) { from_ = HexToString(from_); } @@ -614,6 +618,7 @@ void InternalDumpCommand::Help(string& ret) { ret.append(" "); ret.append(InternalDumpCommand::Name()); ret.append(HelpRangeCmdArgs()); + ret.append(" [--" + ARG_INPUT_KEY_HEX + "]"); ret.append(" [--" + ARG_MAX_KEYS + "=]"); ret.append(" [--" + ARG_COUNT_ONLY + "]"); ret.append(" [--" + ARG_STATS + "]"); @@ -674,7 +679,7 @@ void InternalDumpCommand::DoCommand() { if (!count_only_) { string key = ikey.DebugString(is_key_hex_); string value = iter->value().ToString(is_value_hex_); - fprintf(stdout, "%s => %s\n", key.data(), value.data()); + std::cout << key << " => " << value << "\n"; } // Terminate if maximum number of keys have been dumped diff --git a/util/ldb_cmd.h b/util/ldb_cmd.h index cc0b93c06..753322b61 100644 --- a/util/ldb_cmd.h +++ b/util/ldb_cmd.h @@ -424,9 +424,11 @@ private: int max_keys_; bool count_only_; bool print_stats_; + bool is_input_key_hex_; static const string ARG_COUNT_ONLY; static const string ARG_STATS; + static const string ARG_INPUT_KEY_HEX; }; class DBLoaderCommand: public LDBCommand {