From cef6f84393f691b618b837d41df62c58542f81fe Mon Sep 17 00:00:00 2001 From: Qiao Yang Date: Mon, 24 Nov 2014 10:04:16 -0800 Subject: [PATCH] Added 'dump_live_files' command to ldb tool. Summary: Priliminary diff to solicit comments. Given DB path, dump all SST files (key/value and properties), WAL file and manifest files. What command options do we need to support for this command? Maybe output_hex for keys? Test Plan: Create additional ldb unit tests. Reviewers: sdong, rven Reviewed By: rven Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D29547 --- tools/ldb_test.py | 16 +++ util/ldb_cmd.cc | 281 ++++++++++++++++++++++++++++++--------- util/ldb_cmd.h | 15 +++ util/ldb_tool.cc | 1 + util/sst_dump_tool.cc | 69 +--------- util/sst_dump_tool_imp.h | 78 +++++++++++ 6 files changed, 326 insertions(+), 134 deletions(-) create mode 100644 util/sst_dump_tool_imp.h diff --git a/tools/ldb_test.py b/tools/ldb_test.py index b4ef5221f..f248f88cd 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -378,6 +378,22 @@ class LDBTestCase(unittest.TestCase): my_check_output("rm -f %s" % sstFilePath, shell=True) self.assertRunFAIL("checkconsistency") + def dumpLiveFiles(self, params, dumpFile): + return 0 == run_err_null("./ldb dump_live_files %s > %s" % ( + params, dumpFile)) + + def testDumpLiveFiles(self): + print "Running testDumpLiveFiles..." + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put x1 y1 --create_if_missing", "OK") + self.assertRunOK("put x2 y2", "OK") + dumpFilePath = os.path.join(self.TMP_DIR, "dump1") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath)) + self.assertRunOK("delete x1", "OK") + self.assertRunOK("put x3 y3", "OK") + dumpFilePath = os.path.join(self.TMP_DIR, "dump2") + self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath)) if __name__ == "__main__": unittest.main() diff --git a/util/ldb_cmd.cc b/util/ldb_cmd.cc index 8a8fa7a2e..5547fc085 100644 --- a/util/ldb_cmd.cc +++ b/util/ldb_cmd.cc @@ -14,7 +14,9 @@ #include "db/write_batch_internal.h" #include "rocksdb/write_batch.h" #include "rocksdb/cache.h" +#include "rocksdb/table_properties.h" #include "util/coding.h" +#include "util/sst_dump_tool_imp.h" #include "util/scoped_arena_iterator.h" #include "utilities/ttl/db_ttl_impl.h" @@ -165,6 +167,8 @@ LDBCommand* LDBCommand::SelectCommand( return new ManifestDumpCommand(cmdParams, option_map, flags); } else if (cmd == ListColumnFamiliesCommand::Name()) { return new ListColumnFamiliesCommand(cmdParams, option_map, flags); + } else if (cmd == DBFileDumperCommand::Name()) { + return new DBFileDumperCommand(cmdParams, option_map, flags); } else if (cmd == InternalDumpCommand::Name()) { return new InternalDumpCommand(cmdParams, option_map, flags); } else if (cmd == CheckConsistencyCommand::Name()) { @@ -438,6 +442,8 @@ void CompactorCommand::DoCommand() { delete end; } +// ---------------------------------------------------------------------------- + const string DBLoaderCommand::ARG_DISABLE_WAL = "disable_wal"; const string DBLoaderCommand::ARG_BULK_LOAD = "bulk_load"; const string DBLoaderCommand::ARG_COMPACT = "compact"; @@ -513,6 +519,31 @@ void DBLoaderCommand::DoCommand() { // ---------------------------------------------------------------------------- +namespace { + +void DumpManifestFile(std::string file, bool verbose, bool hex) { + Options options; + EnvOptions sopt; + std::string dbname("dummy"); + std::shared_ptr tc( + NewLRUCache(options.max_open_files - 10, options.table_cache_numshardbits, + options.table_cache_remove_scan_count_limit)); + // Notice we are using the default options not through SanitizeOptions(), + // if VersionSet::DumpManifest() depends on any option done by + // SanitizeOptions(), we need to initialize it manually. + options.db_paths.emplace_back("dummy", 0); + WriteController wc; + WriteBuffer wb(options.db_write_buffer_size); + VersionSet versions(dbname, &options, sopt, tc.get(), &wb, &wc); + Status s = versions.DumpManifest(options, file, verbose, hex); + if (!s.ok()) { + printf("Error in processing file %s %s\n", file.c_str(), + s.ToString().c_str()); + } +} + +} // namespace + const string ManifestDumpCommand::ARG_VERBOSE = "verbose"; const string ManifestDumpCommand::ARG_PATH = "path"; @@ -585,25 +616,7 @@ void ManifestDumpCommand::DoCommand() { printf("Processing Manifest file %s\n", manifestfile.c_str()); } - Options options; - EnvOptions sopt; - std::string file(manifestfile); - std::string dbname("dummy"); - std::shared_ptr tc(NewLRUCache( - options.max_open_files - 10, options.table_cache_numshardbits, - options.table_cache_remove_scan_count_limit)); - // Notice we are using the default options not through SanitizeOptions(), - // if VersionSet::DumpManifest() depends on any option done by - // SanitizeOptions(), we need to initialize it manually. - options.db_paths.emplace_back("dummy", 0); - WriteController wc; - WriteBuffer wb(options.db_write_buffer_size); - VersionSet versions(dbname, &options, sopt, tc.get(), &wb, &wc); - Status s = versions.DumpManifest(options, file, verbose_, is_key_hex_); - if (!s.ok()) { - printf("Error in processing file %s %s\n", manifestfile.c_str(), - s.ToString().c_str()); - } + DumpManifestFile(manifestfile, verbose_, is_key_hex_); if (verbose_) { printf("Processing Manifest file %s done\n", manifestfile.c_str()); } @@ -1325,9 +1338,19 @@ void ChangeCompactionStyleCommand::DoCommand() { files_per_level.c_str()); } +// ---------------------------------------------------------------------------- + +namespace { + +struct StdErrReporter : public log::Reader::Reporter { + virtual void Corruption(size_t bytes, const Status& s) { + cerr << "Corruption detected in log file " << s.ToString() << "\n"; + } +}; + class InMemoryHandler : public WriteBatch::Handler { public: - InMemoryHandler(stringstream& row, bool print_values) : Handler(),row_(row) { + InMemoryHandler(stringstream& row, bool print_values) : Handler(), row_(row) { print_values_ = print_values; } @@ -1357,13 +1380,63 @@ class InMemoryHandler : public WriteBatch::Handler { row_ << LDBCommand::StringToHex(key.ToString()) << " "; } - virtual ~InMemoryHandler() { }; + virtual ~InMemoryHandler() {} private: stringstream & row_; bool print_values_; }; +void DumpWalFile(std::string wal_file, bool print_header, bool print_values, + LDBCommandExecuteResult* exec_state) { + unique_ptr file; + Env* env_ = Env::Default(); + EnvOptions soptions; + Status status = env_->NewSequentialFile(wal_file, &file, soptions); + if (!status.ok()) { + if (exec_state) { + *exec_state = LDBCommandExecuteResult::FAILED("Failed to open WAL file " + + status.ToString()); + } else { + cerr << "Error: Failed to open WAL file " << status.ToString() + << std::endl; + } + } else { + StdErrReporter reporter; + log::Reader reader(move(file), &reporter, true, 0); + string scratch; + WriteBatch batch; + Slice record; + stringstream row; + if (print_header) { + cout << "Sequence,Count,ByteSize,Physical Offset,Key(s)"; + if (print_values) { + cout << " : value "; + } + cout << "\n"; + } + while (reader.ReadRecord(&record, &scratch)) { + row.str(""); + if (record.size() < 12) { + reporter.Corruption(record.size(), + Status::Corruption("log record too small")); + } else { + WriteBatchInternal::SetContents(&batch, record); + row << WriteBatchInternal::Sequence(&batch) << ","; + row << WriteBatchInternal::Count(&batch) << ","; + row << WriteBatchInternal::ByteSize(&batch) << ","; + row << reader.LastRecordOffset() << ","; + InMemoryHandler handler(row, print_values); + batch.Iterate(&handler); + row << "\n"; + } + cout << row.str(); + } + } +} + +} // namespace + const string WALDumperCommand::ARG_WAL_FILE = "walfile"; const string WALDumperCommand::ARG_PRINT_VALUE = "print_value"; const string WALDumperCommand::ARG_PRINT_HEADER = "header"; @@ -1401,53 +1474,10 @@ void WALDumperCommand::Help(string& ret) { } void WALDumperCommand::DoCommand() { - struct StdErrReporter : public log::Reader::Reporter { - virtual void Corruption(size_t bytes, const Status& s) { - cerr<<"Corruption detected in log file "< file; - Env* env_ = Env::Default(); - EnvOptions soptions; - Status status = env_->NewSequentialFile(wal_file_, &file, soptions); - if (!status.ok()) { - exec_state_ = LDBCommandExecuteResult::FAILED("Failed to open WAL file " + - status.ToString()); - } else { - StdErrReporter reporter; - log::Reader reader(move(file), &reporter, true, 0); - string scratch; - WriteBatch batch; - Slice record; - stringstream row; - if (print_header_) { - cout<<"Sequence,Count,ByteSize,Physical Offset,Key(s)"; - if (print_values_) { - cout << " : value "; - } - cout << "\n"; - } - while(reader.ReadRecord(&record, &scratch)) { - row.str(""); - if (record.size() < 12) { - reporter.Corruption( - record.size(), Status::Corruption("log record too small")); - } else { - WriteBatchInternal::SetContents(&batch, record); - row<& params, const map& options, const vector& flags) : @@ -1486,6 +1516,7 @@ void GetCommand::DoCommand() { } } +// ---------------------------------------------------------------------------- ApproxSizeCommand::ApproxSizeCommand(const vector& params, const map& options, const vector& flags) : @@ -1537,6 +1568,7 @@ void ApproxSizeCommand::DoCommand() { */ } +// ---------------------------------------------------------------------------- BatchPutCommand::BatchPutCommand(const vector& params, const map& options, const vector& flags) : @@ -1590,6 +1622,7 @@ Options BatchPutCommand::PrepareOptionsForOpenDB() { return opt; } +// ---------------------------------------------------------------------------- ScanCommand::ScanCommand(const vector& params, const map& options, const vector& flags) : @@ -1701,6 +1734,7 @@ void ScanCommand::DoCommand() { delete it; } +// ---------------------------------------------------------------------------- DeleteCommand::DeleteCommand(const vector& params, const map& options, const vector& flags) : @@ -1780,6 +1814,7 @@ Options PutCommand::PrepareOptionsForOpenDB() { return opt; } +// ---------------------------------------------------------------------------- const char* DBQuerierCommand::HELP_CMD = "help"; const char* DBQuerierCommand::GET_CMD = "get"; @@ -1861,6 +1896,8 @@ void DBQuerierCommand::DoCommand() { } } +// ---------------------------------------------------------------------------- + CheckConsistencyCommand::CheckConsistencyCommand(const vector& params, const map& options, const vector& flags) : LDBCommand(options, flags, false, @@ -1889,5 +1926,117 @@ void CheckConsistencyCommand::DoCommand() { } } +// ---------------------------------------------------------------------------- + +namespace { + +void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { + std::string from_key; + std::string to_key; + if (filename.length() <= 4 || + filename.rfind(".sst") != filename.length() - 4) { + std::cout << "Invalid sst file name." << std::endl; + return; + } + // no verification + rocksdb::SstFileReader reader(filename, false, output_hex); + Status st = reader.ReadSequential(true, -1, false, // has_from + from_key, false, // has_to + to_key); + if (!st.ok()) { + std::cerr << "Error in reading SST file " << filename << st.ToString() + << std::endl; + return; + } + + if (show_properties) { + const rocksdb::TableProperties* table_properties; + + std::shared_ptr + table_properties_from_reader; + st = reader.ReadTableProperties(&table_properties_from_reader); + if (!st.ok()) { + std::cerr << filename << ": " << st.ToString() + << ". Try to use initial table properties" << std::endl; + table_properties = reader.GetInitTableProperties(); + } else { + table_properties = table_properties_from_reader.get(); + } + if (table_properties != nullptr) { + std::cout << std::endl << "Table Properties:" << std::endl; + std::cout << table_properties->ToString("\n") << std::endl; + std::cout << "# deleted keys: " + << rocksdb::GetDeletedKeys( + table_properties->user_collected_properties) + << std::endl; + } + } +} + +} // namespace + +DBFileDumperCommand::DBFileDumperCommand(const vector& params, + const map& options, + const vector& flags) + : LDBCommand(options, flags, true, BuildCmdLineOptions({})) {} + +void DBFileDumperCommand::Help(string& ret) { + ret.append(" "); + ret.append(DBFileDumperCommand::Name()); + ret.append("\n"); +} + +void DBFileDumperCommand::DoCommand() { + if (!db_) { + return; + } + Status s; + + std::cout << "Manifest File" << std::endl; + std::cout << "==============================" << std::endl; + std::string manifest_filename; + s = ReadFileToString(db_->GetEnv(), CurrentFileName(db_->GetName()), + &manifest_filename); + if (!s.ok() || manifest_filename.empty() || + manifest_filename.back() != '\n') { + std::cerr << "Error when reading CURRENT file " + << CurrentFileName(db_->GetName()) << std::endl; + } + // remove the trailing '\n' + manifest_filename.resize(manifest_filename.size() - 1); + string manifest_filepath = db_->GetName() + "/" + manifest_filename; + std::cout << manifest_filepath << std::endl; + DumpManifestFile(manifest_filepath, false, false); + std::cout << std::endl; + + std::cout << "SST Files" << std::endl; + std::cout << "==============================" << std::endl; + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + for (auto& fileMetadata : metadata) { + std::string filename = fileMetadata.db_path + fileMetadata.name; + std::cout << filename << " level:" << fileMetadata.level << std::endl; + std::cout << "------------------------------" << std::endl; + DumpSstFile(filename, false, true); + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "Write Ahead Log Files" << std::endl; + std::cout << "==============================" << std::endl; + rocksdb::VectorLogPtr wal_files; + s = db_->GetSortedWalFiles(wal_files); + if (!s.ok()) { + std::cerr << "Error when getting WAL files" << std::endl; + } else { + for (auto& wal : wal_files) { + // TODO(qyang): option.wal_dir should be passed into ldb command + std::string filename = db_->GetOptions().wal_dir + wal->PathName(); + std::cout << filename << std::endl; + DumpWalFile(filename, true, true, &exec_state_); + } + } +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/util/ldb_cmd.h b/util/ldb_cmd.h index fd4d4d4b9..e75433e76 100644 --- a/util/ldb_cmd.h +++ b/util/ldb_cmd.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "db/version_set.h" #include "rocksdb/env.h" @@ -392,6 +394,19 @@ private: string to_; }; +class DBFileDumperCommand : public LDBCommand { + public: + static string Name() { return "dump_live_files"; } + + DBFileDumperCommand(const vector& params, + const map& options, + const vector& flags); + + static void Help(string& ret); + + virtual void DoCommand(); +}; + class DBDumperCommand: public LDBCommand { public: static string Name() { return "dump"; } diff --git a/util/ldb_tool.cc b/util/ldb_tool.cc index fe2d7d538..fe84fa933 100644 --- a/util/ldb_tool.cc +++ b/util/ldb_tool.cc @@ -80,6 +80,7 @@ public: DBLoaderCommand::Help(ret); ManifestDumpCommand::Help(ret); ListColumnFamiliesCommand::Help(ret); + DBFileDumperCommand::Help(ret); InternalDumpCommand::Help(ret); fprintf(stderr, "%s\n", ret.c_str()); diff --git a/util/sst_dump_tool.cc b/util/sst_dump_tool.cc index 8d2233de8..d0bef3e36 100644 --- a/util/sst_dump_tool.cc +++ b/util/sst_dump_tool.cc @@ -5,83 +5,16 @@ // #ifndef ROCKSDB_LITE -#include "rocksdb/sst_dump_tool.h" +#include "util/sst_dump_tool_imp.h" #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif -#include -#include -#include #include -#include "db/dbformat.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/immutable_options.h" -#include "rocksdb/iterator.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/status.h" -#include "rocksdb/table.h" -#include "rocksdb/table_properties.h" -#include "table/block.h" -#include "table/block_based_table_factory.h" -#include "table/block_builder.h" -#include "table/format.h" -#include "table/meta_blocks.h" -#include "table/plain_table_factory.h" -#include "util/ldb_cmd.h" -#include "util/random.h" -#include "util/testharness.h" -#include "util/testutil.h" - namespace rocksdb { -class SstFileReader { - public: - explicit SstFileReader(const std::string& file_name, - bool verify_checksum, - bool output_hex); - - Status ReadSequential(bool print_kv, - uint64_t read_num, - bool has_from, - const std::string& from_key, - bool has_to, - const std::string& to_key); - - Status ReadTableProperties( - std::shared_ptr* table_properties); - uint64_t GetReadNumber() { return read_num_; } - TableProperties* GetInitTableProperties() { return table_properties_.get(); } - - private: - Status NewTableReader(const std::string& file_path); - Status ReadTableProperties(uint64_t table_magic_number, - RandomAccessFile* file, uint64_t file_size); - Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); - Status SetOldTableOptions(); - - std::string file_name_; - uint64_t read_num_; - bool verify_checksum_; - bool output_hex_; - EnvOptions soptions_; - - Status init_result_; - unique_ptr table_reader_; - unique_ptr file_; - // options_ and internal_comparator_ will also be used in - // ReadSequential internally (specifically, seek-related operations) - Options options_; - const ImmutableCFOptions ioptions_; - InternalKeyComparator internal_comparator_; - unique_ptr table_properties_; -}; - SstFileReader::SstFileReader(const std::string& file_path, bool verify_checksum, bool output_hex) diff --git a/util/sst_dump_tool_imp.h b/util/sst_dump_tool_imp.h new file mode 100644 index 000000000..833f62a42 --- /dev/null +++ b/util/sst_dump_tool_imp.h @@ -0,0 +1,78 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +#ifndef ROCKSDB_LITE +#pragma once + +#include "rocksdb/sst_dump_tool.h" + +#include +#include +#include + +#include "db/dbformat.h" +#include "db/memtable.h" +#include "db/write_batch_internal.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/immutable_options.h" +#include "rocksdb/iterator.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" +#include "rocksdb/table_properties.h" +#include "table/block.h" +#include "table/block_based_table_factory.h" +#include "table/block_builder.h" +#include "table/format.h" +#include "table/meta_blocks.h" +#include "table/plain_table_factory.h" +#include "util/ldb_cmd.h" +#include "util/random.h" +#include "util/testharness.h" +#include "util/testutil.h" + +namespace rocksdb { + +class SstFileReader { + public: + explicit SstFileReader(const std::string& file_name, bool verify_checksum, + bool output_hex); + + Status ReadSequential(bool print_kv, uint64_t read_num, bool has_from, + const std::string& from_key, bool has_to, + const std::string& to_key); + + Status ReadTableProperties( + std::shared_ptr* table_properties); + uint64_t GetReadNumber() { return read_num_; } + TableProperties* GetInitTableProperties() { return table_properties_.get(); } + + private: + Status NewTableReader(const std::string& file_path); + Status ReadTableProperties(uint64_t table_magic_number, + RandomAccessFile* file, uint64_t file_size); + Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); + Status SetOldTableOptions(); + + std::string file_name_; + uint64_t read_num_; + bool verify_checksum_; + bool output_hex_; + EnvOptions soptions_; + + Status init_result_; + unique_ptr table_reader_; + unique_ptr file_; + // options_ and internal_comparator_ will also be used in + // ReadSequential internally (specifically, seek-related operations) + Options options_; + const ImmutableCFOptions ioptions_; + InternalKeyComparator internal_comparator_; + unique_ptr table_properties_; +}; + +} // namespace rocksdb + +#endif // ROCKSDB_LITE