From 8254e9b57c801498f285c5135f5cea7581c63d08 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 11 Aug 2017 15:49:17 -0700 Subject: [PATCH] make sst_dump compression size command consistent Summary: - like other subcommands, reporting compression sizes should be specified with the `--command` CLI arg. - also added `--compression_types` arg as it's useful to restrict the types of compression used, at least in my dictionary compression experiments. Closes https://github.com/facebook/rocksdb/pull/2706 Differential Revision: D5589520 Pulled By: ajkr fbshipit-source-id: 305bb4ebcc95eecc8a85523cd3b1050619c9ddc5 --- tools/sst_dump_test.cc | 2 +- tools/sst_dump_tool.cc | 81 ++++++++++++++++++++++++--------------- tools/sst_dump_tool_imp.h | 5 ++- 3 files changed, 56 insertions(+), 32 deletions(-) diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index 0a222afa3..460b5a2cc 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -195,7 +195,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) { } snprintf(usage[0], optLength, "./sst_dump"); - snprintf(usage[1], optLength, "--show_compression_sizes"); + snprintf(usage[1], optLength, "--command=recompress"); snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst"); rocksdb::SSTDumpTool tool; ASSERT_TRUE(!tool.Run(3, usage)); diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index fa89e6cdd..4dca284cc 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -60,6 +60,17 @@ extern const uint64_t kLegacyPlainTableMagicNumber; const char* testFileName = "test_file_name"; +static const std::vector> + kCompressions = { + {CompressionType::kNoCompression, "kNoCompression"}, + {CompressionType::kSnappyCompression, "kSnappyCompression"}, + {CompressionType::kZlibCompression, "kZlibCompression"}, + {CompressionType::kBZip2Compression, "kBZip2Compression"}, + {CompressionType::kLZ4Compression, "kLZ4Compression"}, + {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"}, + {CompressionType::kXpressCompression, "kXpressCompression"}, + {CompressionType::kZSTD, "kZSTD"}}; + Status SstFileReader::GetTableReader(const std::string& file_path) { // Warning about 'magic_number' being uninitialized shows up only in UBsan // builds. Though access is guarded by 's.ok()' checks, fix the issue to @@ -174,7 +185,10 @@ uint64_t SstFileReader::CalculateCompressedTableSize( return size; } -int SstFileReader::ShowAllCompressionSizes(size_t block_size) { +int SstFileReader::ShowAllCompressionSizes( + size_t block_size, + const std::vector>& + compression_types) { ReadOptions read_options; Options opts; const ImmutableCFOptions imoptions(opts); @@ -184,17 +198,7 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) { fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size); - std::pair compressions[] = { - {CompressionType::kNoCompression, "kNoCompression"}, - {CompressionType::kSnappyCompression, "kSnappyCompression"}, - {CompressionType::kZlibCompression, "kZlibCompression"}, - {CompressionType::kBZip2Compression, "kBZip2Compression"}, - {CompressionType::kLZ4Compression, "kLZ4Compression"}, - {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"}, - {CompressionType::kXpressCompression, "kXpressCompression"}, - {CompressionType::kZSTD, "kZSTD"}}; - - for (auto& i : compressions) { + for (auto& i : compression_types) { if (CompressionTypeSupported(i.first)) { CompressionOptions compress_opt; std::string column_family_name; @@ -359,6 +363,8 @@ void print_help() { scan: Iterate over entries in files and print them to screen raw: Dump all the table contents to _dump.txt verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered + recompress: reports the SST file size if recompressed with different + compression types --output_hex Can be combined with scan command to print the keys and values in Hex @@ -383,15 +389,17 @@ void print_help() { Can be combined with --from and --to to indicate that these values are encoded in Hex --show_properties - Print table properties after iterating over the file - - --show_compression_sizes - Independent command that will recreate the SST file using 16K block size with different - compressions and report the size of the file using such compression + Print table properties after iterating over the file when executing + check|scan|raw --set_block_size= - Can be combined with --show_compression_sizes to set the block size that will be used - when trying different compression algorithms + Can be combined with --command=recompress to set the block size that will + be used when trying different compression algorithms + + --compression_types= + Can be combined with --command=recompress to run recompression for this + list of compression types --parse_internal_key=<0xKEY> Convenience option to parse an internal key on the command line. Dumps the @@ -415,13 +423,13 @@ int SSTDumpTool::Run(int argc, char** argv) { bool has_to = false; bool use_from_as_prefix = false; bool show_properties = false; - bool show_compression_sizes = false; bool show_summary = false; bool set_block_size = false; std::string from_key; std::string to_key; std::string block_size_str; size_t block_size; + std::vector> compression_types; uint64_t total_num_files = 0; uint64_t total_num_data_blocks = 0; uint64_t total_data_block_size = 0; @@ -453,19 +461,34 @@ int SSTDumpTool::Run(int argc, char** argv) { use_from_as_prefix = true; } else if (strcmp(argv[i], "--show_properties") == 0) { show_properties = true; - } else if (strcmp(argv[i], "--show_compression_sizes") == 0) { - show_compression_sizes = true; } else if (strcmp(argv[i], "--show_summary") == 0) { show_summary = true; } else if (strncmp(argv[i], "--set_block_size=", 17) == 0) { set_block_size = true; block_size_str = argv[i] + 17; std::istringstream iss(block_size_str); + iss >> block_size; if (iss.fail()) { - fprintf(stderr, "block size must be numeric"); + fprintf(stderr, "block size must be numeric\n"); exit(1); } - iss >> block_size; + } else if (strncmp(argv[i], "--compression_types=", 20) == 0) { + std::string compression_types_csv = argv[i] + 20; + std::istringstream iss(compression_types_csv); + std::string compression_type; + while (std::getline(iss, compression_type, ',')) { + auto iter = std::find_if( + kCompressions.begin(), kCompressions.end(), + [&compression_type](std::pair curr) { + return curr.second == compression_type; + }); + if (iter == kCompressions.end()) { + fprintf(stderr, "%s is not a valid CompressionType\n", + compression_type.c_str()); + exit(1); + } + compression_types.emplace_back(*iter); + } } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) { std::string in_key(argv[i] + 21); try { @@ -547,12 +570,10 @@ int SSTDumpTool::Run(int argc, char** argv) { continue; } - if (show_compression_sizes) { - if (set_block_size) { - reader.ShowAllCompressionSizes(block_size); - } else { - reader.ShowAllCompressionSizes(16384); - } + if (command == "recompress") { + reader.ShowAllCompressionSizes( + set_block_size ? block_size : 16384, + compression_types.empty() ? kCompressions : compression_types); return 0; } diff --git a/tools/sst_dump_tool_imp.h b/tools/sst_dump_tool_imp.h index e2b639607..9531b5415 100644 --- a/tools/sst_dump_tool_imp.h +++ b/tools/sst_dump_tool_imp.h @@ -34,7 +34,10 @@ class SstFileReader { Status DumpTable(const std::string& out_filename); Status getStatus() { return init_result_; } - int ShowAllCompressionSizes(size_t block_size); + int ShowAllCompressionSizes( + size_t block_size, + const std::vector>& + compression_types); private: // Get the TableReader implementation for the sst file