From 92ca816a60556846a954289d183a290a1a6b2dc2 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Thu, 20 Jun 2013 16:02:36 -0700 Subject: [PATCH] [RocksDB] Support internal key/value dump for ldb Summary: This diff added a command 'idump' to ldb tool, which dumps the internal key/value pairs. It could be useful for diagnosis and estimating the per user key 'overhead'. Also cleaned up the ldb code a bit where I touched. Test Plan: make check; ldb idump Reviewers: emayanke, sheki, dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D11517 --- util/ldb_cmd.cc | 148 +++++++++++++++++++++++++++++++++++++++++++---- util/ldb_cmd.h | 46 ++++++++++++--- util/ldb_tool.cc | 1 + 3 files changed, 176 insertions(+), 19 deletions(-) diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index 780d3d2a4..0134e93b4 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -5,6 +5,7 @@ #include "util/ldb_cmd.h" #include "db/dbformat.h" +#include "db/db_impl.h" #include "db/log_reader.h" #include "db/filename.h" #include "db/write_batch_internal.h" @@ -45,7 +46,7 @@ const char* LDBCommand::DELIM = " ==> "; LDBCommand* LDBCommand::InitFromCmdLineArgs( int argc, char** argv, - Options options + const Options& options ) { vector args; for (int i = 1; i < argc; i++) { @@ -66,7 +67,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( */ LDBCommand* LDBCommand::InitFromCmdLineArgs( const vector& args, - Options options + const Options& options ) { // --x=y command line arguments are added as x->y map entries. map option_map; @@ -80,9 +81,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( const string OPTION_PREFIX = "--"; - for (vector::const_iterator itr = args.begin(); - itr != args.end(); itr++) { - string arg = *itr; + for (const auto& arg : args) { if (arg[0] == '-' && arg[1] == '-'){ vector splits = stringSplit(arg, '='); if (splits.size() == 2) { @@ -93,7 +92,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( flags.push_back(optionKey); } } else { - cmdTokens.push_back(string(arg)); + cmdTokens.push_back(arg); } } @@ -119,9 +118,9 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( LDBCommand* LDBCommand::SelectCommand( const std::string& cmd, - vector& cmdParams, - map& option_map, - vector& flags + const vector& cmdParams, + const map& option_map, + const vector& flags ) { if (cmd == GetCommand::Name()) { @@ -150,6 +149,8 @@ LDBCommand* LDBCommand::SelectCommand( return new DBLoaderCommand(cmdParams, option_map, flags); } else if (cmd == ManifestDumpCommand::Name()) { return new ManifestDumpCommand(cmdParams, option_map, flags); + } else if (cmd == InternalDumpCommand::Name()) { + return new InternalDumpCommand(cmdParams, option_map, flags); } return nullptr; } @@ -163,7 +164,8 @@ LDBCommand* LDBCommand::SelectCommand( * updated. */ bool LDBCommand::ParseIntOption(const map& options, - string option, int& value, LDBCommandExecuteResult& exec_state) { + const string& option, int& value, + LDBCommandExecuteResult& exec_state) { map::const_iterator itr = option_map_.find(option); if (itr != option_map_.end()) { @@ -181,6 +183,21 @@ bool LDBCommand::ParseIntOption(const map& options, return false; } +/** + * Parses the specified option and fills in the value. + * Returns true if the option is found. + * Returns false otherwise. + */ +bool LDBCommand::ParseStringOption(const map& options, + const string& option, string* value) { + auto itr = option_map_.find(option); + if (itr != option_map_.end()) { + *value = itr->second; + return true; + } + return false; +} + Options LDBCommand::PrepareOptionsForOpenDB() { Options opt = options_; @@ -453,7 +470,7 @@ void ManifestDumpCommand::Help(string& ret) { ManifestDumpCommand::ManifestDumpCommand(const vector& params, const map& options, const vector& flags) : LDBCommand(options, flags, false, - BuildCmdLineOptions({ARG_VERBOSE,ARG_PATH})), + BuildCmdLineOptions({ARG_VERBOSE, ARG_PATH, ARG_HEX})), verbose_(false), path_("") { @@ -559,6 +576,115 @@ void PrintBucketCounts(const vector& bucket_counts, int ttl_start, ReadableTime(ttl_end).c_str(), bucket_counts[num_buckets - 1]); } +const string InternalDumpCommand::ARG_COUNT_ONLY = "count_only"; +const string InternalDumpCommand::ARG_STATS = "stats"; + +InternalDumpCommand::InternalDumpCommand(const vector& params, + const map& options, + const vector& flags) : + LDBCommand(options, flags, true, + BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, + ARG_FROM, ARG_TO, ARG_MAX_KEYS, + ARG_COUNT_ONLY, ARG_STATS})), + has_from_(false), + has_to_(false), + max_keys_(-1), + count_only_(false), + print_stats_(false) { + + has_from_ = ParseStringOption(options, ARG_FROM, &from_); + has_to_ = ParseStringOption(options, ARG_TO, &to_); + + ParseIntOption(options, ARG_MAX_KEYS, max_keys_, exec_state_); + + print_stats_ = IsFlagPresent(flags, ARG_STATS); + count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); + + if (is_key_hex_) { + if (has_from_) { + from_ = HexToString(from_); + } + if (has_to_) { + to_ = HexToString(to_); + } + } +} + +void InternalDumpCommand::Help(string& ret) { + ret.append(" "); + ret.append(InternalDumpCommand::Name()); + ret.append(HelpRangeCmdArgs()); + ret.append(" [--" + ARG_MAX_KEYS + "=]"); + ret.append(" [--" + ARG_COUNT_ONLY + "]"); + ret.append(" [--" + ARG_STATS + "]"); + ret.append("\n"); +} + +void InternalDumpCommand::DoCommand() { + if (!db_) { + return; + } + + if (print_stats_) { + string stats; + if (db_->GetProperty("leveldb.stats", &stats)) { + fprintf(stdout, "%s\n", stats.c_str()); + } + } + + // Cast as DBImpl to get internal iterator + DBImpl* idb = dynamic_cast(db_); + if (!idb) { + exec_state_ = LDBCommandExecuteResult::FAILED("DB is not DBImpl"); + return; + } + + // Setup internal key iterator + auto iter = unique_ptr(idb->TEST_NewInternalIterator()); + Status st = iter->status(); + if (!st.ok()) { + exec_state_ = LDBCommandExecuteResult::FAILED("Iterator error:" + + st.ToString()); + } + + if (has_from_) { + InternalKey ikey(from_, kMaxSequenceNumber, kValueTypeForSeek); + iter->Seek(ikey.Encode()); + } else { + iter->SeekToFirst(); + } + + long long count = 0; + for (; iter->Valid(); iter->Next()) { + ParsedInternalKey ikey; + if (!ParseInternalKey(iter->key(), &ikey)) { + fprintf(stderr, "Internal Key [%s] parse error!\n", + iter->key().ToString(true /* in hex*/).data()); + // TODO: add error counter + continue; + } + + // If end marker was specified, we stop before it + if (has_to_ && options_.comparator->Compare(ikey.user_key, to_) >= 0) { + break; + } + + ++count; + + if (!count_only_) { + string key = ikey.DebugString(is_key_hex_); + string value = iter->value().ToString(is_value_hex_); + fprintf(stdout, "%s => %s\n", key.data(), value.data()); + } + + // Terminate if maximum number of keys have been dumped + if (max_keys_ > 0 && count >= max_keys_) break; + } + + fprintf(stdout, "Internal keys in range: %lld\n", (long long) count); +} + + const string DBDumperCommand::ARG_COUNT_ONLY = "count_only"; const string DBDumperCommand::ARG_STATS = "stats"; const string DBDumperCommand::ARG_TTL_BUCKET = "bucket"; diff --git a/util/ldb_cmd.h b/util/ldb_cmd.h index d8e4c4b11..240ebca75 100644 --- a/util/ldb_cmd.h +++ b/util/ldb_cmd.h @@ -55,13 +55,13 @@ public: static LDBCommand* InitFromCmdLineArgs( const vector& args, - Options options = Options() + const Options& options = Options() ); static LDBCommand* InitFromCmdLineArgs( int argc, char** argv, - Options options = Options() + const Options& options = Options() ); bool ValidateCmdLineOptions(); @@ -230,6 +230,8 @@ protected: string msg = st.ToString(); exec_state_ = LDBCommandExecuteResult::FAILED(msg); } + + options_ = opt; } void CloseDB () { @@ -281,13 +283,16 @@ protected: return ret; } - bool ParseIntOption(const map& options, string option, - int& value, LDBCommandExecuteResult& exec_state); + bool ParseIntOption(const map& options, const string& option, + int& value, LDBCommandExecuteResult& exec_state); -private: + bool ParseStringOption(const map& options, + const string& option, string* value); Options options_; +private: + /** * Interpret command line options and flags to determine if the key * should be input/output in hex. @@ -347,9 +352,9 @@ private: static LDBCommand* SelectCommand( const string& cmd, - vector& cmdParams, - map& option_map, - vector& flags + const vector& cmdParams, + const map& option_map, + const vector& flags ); }; @@ -397,6 +402,31 @@ private: static const string ARG_TTL_BUCKET; }; +class InternalDumpCommand: public LDBCommand { +public: + static string Name() { return "idump"; } + + InternalDumpCommand(const vector& params, + const map& options, + const vector& flags); + + static void Help(string& ret); + + virtual void DoCommand(); + +private: + bool has_from_; + string from_; + bool has_to_; + string to_; + int max_keys_; + bool count_only_; + bool print_stats_; + + static const string ARG_COUNT_ONLY; + static const string ARG_STATS; +}; + class DBLoaderCommand: public LDBCommand { public: static string Name() { return "load"; } diff --git a/util/ldb_tool.cc b/util/ldb_tool.cc index e46aee39d..eec1f4e0e 100644 --- a/util/ldb_tool.cc +++ b/util/ldb_tool.cc @@ -59,6 +59,7 @@ public: DBDumperCommand::Help(ret); DBLoaderCommand::Help(ret); ManifestDumpCommand::Help(ret); + InternalDumpCommand::Help(ret); fprintf(stderr, "%s\n", ret.c_str()); }