From 2ed91622fbdcedc5e56006369d08b9819b02d1eb Mon Sep 17 00:00:00 2001 From: "Peter (Stig) Edwards" Date: Fri, 13 Sep 2019 16:29:16 -0700 Subject: [PATCH] sst_dump recompress show #blocks compressed and not compressed (#5791) Summary: Closes https://github.com/facebook/rocksdb/issues/1474 Helps show when the 12.5% threshold for GoodCompressionRatio (originally from ldb) is hit. Example output: ``` > ./sst_dump --file=/tmp/test.sst --command=recompress from [] to [] Process /tmp/test.sst Sst file format: block-based Block Size: 16384 Compression: kNoCompression Size: 122579836 Blocks: 2300 Compressed: 0 ( 0.0%) Not compressed (ratio): 2300 (100.0%) Not compressed (abort): 0 ( 0.0%) Compression: kSnappyCompression Size: 46289962 Blocks: 2300 Compressed: 2119 ( 92.1%) Not compressed (ratio): 181 ( 7.9%) Not compressed (abort): 0 ( 0.0%) Compression: kZlibCompression Size: 29689825 Blocks: 2300 Compressed: 2301 (100.0%) Not compressed (ratio): 0 ( 0.0%) Not compressed (abort): 0 ( 0.0%) Unsupported compression type: kBZip2Compression. Compression: kLZ4Compression Size: 44785490 Blocks: 2300 Compressed: 1950 ( 84.8%) Not compressed (ratio): 350 ( 15.2%) Not compressed (abort): 0 ( 0.0%) Compression: kLZ4HCCompression Size: 37498895 Blocks: 2300 Compressed: 2301 (100.0%) Not compressed (ratio): 0 ( 0.0%) Not compressed (abort): 0 ( 0.0%) Unsupported compression type: kXpressCompression. Compression: kZSTD Size: 32208707 Blocks: 2300 Compressed: 2301 (100.0%) Not compressed (ratio): 0 ( 0.0%) Not compressed (abort): 0 ( 0.0%) ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/5791 Differential Revision: D17347870 fbshipit-source-id: af10849c010b46b20e54162b70123c2805ffe526 --- tools/sst_dump_tool.cc | 57 +++++++++++++++++++++++++++++++++------ tools/sst_dump_tool_imp.h | 3 ++- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index cbd9c0c87..efe272c09 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -158,7 +158,8 @@ Status SstFileDumper::DumpTable(const std::string& out_filename) { } uint64_t SstFileDumper::CalculateCompressedTableSize( - const TableBuilderOptions& tb_options, size_t block_size) { + const TableBuilderOptions& tb_options, size_t block_size, + uint64_t* num_data_blocks) { std::unique_ptr out_file; std::unique_ptr env(NewMemEnv(Env::Default())); env->NewWritableFile(testFileName, &out_file, soptions_); @@ -189,6 +190,8 @@ uint64_t SstFileDumper::CalculateCompressedTableSize( exit(1); } uint64_t size = table_builder->FileSize(); + assert(num_data_blocks != nullptr); + *num_data_blocks = table_builder->GetTableProperties().num_data_blocks; env->DeleteFile(testFileName); return size; } @@ -199,6 +202,8 @@ int SstFileDumper::ShowAllCompressionSizes( compression_types) { ReadOptions read_options; Options opts; + opts.statistics = rocksdb::CreateDBStatistics(); + opts.statistics->set_stats_level(StatsLevel::kAll); const ImmutableCFOptions imoptions(opts); const ColumnFamilyOptions cfo(opts); const MutableCFOptions moptions(cfo); @@ -217,16 +222,52 @@ int SstFileDumper::ShowAllCompressionSizes( imoptions, moptions, ikc, &block_based_table_factories, i.first, 0 /* sample_for_compression */, compress_opt, false /* skip_filters */, column_family_name, unknown_level); - uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size); - fprintf(stdout, "Compression: %s", i.second); - fprintf(stdout, " Size: %" PRIu64 "\n", file_size); + uint64_t num_data_blocks = 0; + uint64_t file_size = + CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks); + fprintf(stdout, "Compression: %-24s", i.second); + fprintf(stdout, " Size: %10" PRIu64, file_size); + fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks); + const uint64_t compressed_blocks = + opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED); + const uint64_t not_compressed_blocks = + opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED); + // When the option enable_index_compression is true, + // NUMBER_BLOCK_COMPRESSED is incremented for index block(s). + if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) { + num_data_blocks = compressed_blocks + not_compressed_blocks; + } + const uint64_t ratio_not_compressed_blocks = + (num_data_blocks - compressed_blocks) - not_compressed_blocks; + const double compressed_pcnt = + (0 == num_data_blocks) ? 0.0 + : ((static_cast(compressed_blocks) / + static_cast(num_data_blocks)) * + 100.0); + const double ratio_not_compressed_pcnt = + (0 == num_data_blocks) + ? 0.0 + : ((static_cast(ratio_not_compressed_blocks) / + static_cast(num_data_blocks)) * + 100.0); + const double not_compressed_pcnt = + (0 == num_data_blocks) + ? 0.0 + : ((static_cast(not_compressed_blocks) / + static_cast(num_data_blocks)) * + 100.0); + fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks, + compressed_pcnt); + fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)", + ratio_not_compressed_blocks, ratio_not_compressed_pcnt); + fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n", + not_compressed_blocks, not_compressed_pcnt); } else { fprintf(stdout, "Unsupported compression type: %s.\n", i.second); } } return 0; } - Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number, RandomAccessFileReader* file, uint64_t file_size) { @@ -365,15 +406,15 @@ namespace { void print_help() { fprintf(stderr, - R"(sst_dump --file= [--command=check|scan|raw] + R"(sst_dump --file= [--command=check|scan|raw|recompress] --file= Path to SST file or directory containing SST files --command=check|scan|raw|verify - check: Iterate over entries in files but dont print anything except if an error is encounterd (default command) + check: Iterate over entries in files but don't print anything except if an error is encountered (default command) scan: Iterate over entries in files and print them to screen raw: Dump all the table contents to _dump.txt - verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered + verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered recompress: reports the SST file size if recompressed with different compression types diff --git a/tools/sst_dump_tool_imp.h b/tools/sst_dump_tool_imp.h index 846738a40..51c15c8aa 100644 --- a/tools/sst_dump_tool_imp.h +++ b/tools/sst_dump_tool_imp.h @@ -46,7 +46,8 @@ class SstFileDumper { RandomAccessFileReader* file, uint64_t file_size); uint64_t CalculateCompressedTableSize(const TableBuilderOptions& tb_options, - size_t block_size); + size_t block_size, + uint64_t* num_data_blocks); Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); Status SetOldTableOptions();