sst_dump recompress show #blocks compressed and not compressed (#5791)

Summary:
Closes https://github.com/facebook/rocksdb/issues/1474
Helps show when the 12.5% threshold for GoodCompressionRatio (originally from ldb) is hit.

Example output:

```
> ./sst_dump --file=/tmp/test.sst --command=recompress
from [] to []
Process /tmp/test.sst
Sst file format: block-based
Block Size: 16384
Compression: kNoCompression           Size:  122579836 Blocks:   2300 Compressed:      0 (  0.0%) Not compressed (ratio):   2300 (100.0%) Not compressed (abort):      0 (  0.0%)
Compression: kSnappyCompression       Size:   46289962 Blocks:   2300 Compressed:   2119 ( 92.1%) Not compressed (ratio):    181 (  7.9%) Not compressed (abort):      0 (  0.0%)
Compression: kZlibCompression         Size:   29689825 Blocks:   2300 Compressed:   2301 (100.0%) Not compressed (ratio):      0 (  0.0%) Not compressed (abort):      0 (  0.0%)
Unsupported compression type: kBZip2Compression.
Compression: kLZ4Compression          Size:   44785490 Blocks:   2300 Compressed:   1950 ( 84.8%) Not compressed (ratio):    350 ( 15.2%) Not compressed (abort):      0 (  0.0%)
Compression: kLZ4HCCompression        Size:   37498895 Blocks:   2300 Compressed:   2301 (100.0%) Not compressed (ratio):      0 (  0.0%) Not compressed (abort):      0 (  0.0%)
Unsupported compression type: kXpressCompression.
Compression: kZSTD                    Size:   32208707 Blocks:   2300 Compressed:   2301 (100.0%) Not compressed (ratio):      0 (  0.0%) Not compressed (abort):      0 (  0.0%)
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5791

Differential Revision: D17347870

fbshipit-source-id: af10849c010b46b20e54162b70123c2805ffe526
main
Peter (Stig) Edwards 5 years ago committed by Facebook Github Bot
parent bf5dbc17e3
commit 2ed91622fb
  1. 57
      tools/sst_dump_tool.cc
  2. 3
      tools/sst_dump_tool_imp.h

@ -158,7 +158,8 @@ Status SstFileDumper::DumpTable(const std::string& out_filename) {
} }
uint64_t SstFileDumper::CalculateCompressedTableSize( uint64_t SstFileDumper::CalculateCompressedTableSize(
const TableBuilderOptions& tb_options, size_t block_size) { const TableBuilderOptions& tb_options, size_t block_size,
uint64_t* num_data_blocks) {
std::unique_ptr<WritableFile> out_file; std::unique_ptr<WritableFile> out_file;
std::unique_ptr<Env> env(NewMemEnv(Env::Default())); std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
env->NewWritableFile(testFileName, &out_file, soptions_); env->NewWritableFile(testFileName, &out_file, soptions_);
@ -189,6 +190,8 @@ uint64_t SstFileDumper::CalculateCompressedTableSize(
exit(1); exit(1);
} }
uint64_t size = table_builder->FileSize(); uint64_t size = table_builder->FileSize();
assert(num_data_blocks != nullptr);
*num_data_blocks = table_builder->GetTableProperties().num_data_blocks;
env->DeleteFile(testFileName); env->DeleteFile(testFileName);
return size; return size;
} }
@ -199,6 +202,8 @@ int SstFileDumper::ShowAllCompressionSizes(
compression_types) { compression_types) {
ReadOptions read_options; ReadOptions read_options;
Options opts; Options opts;
opts.statistics = rocksdb::CreateDBStatistics();
opts.statistics->set_stats_level(StatsLevel::kAll);
const ImmutableCFOptions imoptions(opts); const ImmutableCFOptions imoptions(opts);
const ColumnFamilyOptions cfo(opts); const ColumnFamilyOptions cfo(opts);
const MutableCFOptions moptions(cfo); const MutableCFOptions moptions(cfo);
@ -217,16 +222,52 @@ int SstFileDumper::ShowAllCompressionSizes(
imoptions, moptions, ikc, &block_based_table_factories, i.first, imoptions, moptions, ikc, &block_based_table_factories, i.first,
0 /* sample_for_compression */, compress_opt, 0 /* sample_for_compression */, compress_opt,
false /* skip_filters */, column_family_name, unknown_level); false /* skip_filters */, column_family_name, unknown_level);
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size); uint64_t num_data_blocks = 0;
fprintf(stdout, "Compression: %s", i.second); uint64_t file_size =
fprintf(stdout, " Size: %" PRIu64 "\n", file_size); CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks);
fprintf(stdout, "Compression: %-24s", i.second);
fprintf(stdout, " Size: %10" PRIu64, file_size);
fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks);
const uint64_t compressed_blocks =
opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED);
const uint64_t not_compressed_blocks =
opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED);
// When the option enable_index_compression is true,
// NUMBER_BLOCK_COMPRESSED is incremented for index block(s).
if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) {
num_data_blocks = compressed_blocks + not_compressed_blocks;
}
const uint64_t ratio_not_compressed_blocks =
(num_data_blocks - compressed_blocks) - not_compressed_blocks;
const double compressed_pcnt =
(0 == num_data_blocks) ? 0.0
: ((static_cast<double>(compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double ratio_not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(ratio_not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
compressed_pcnt);
fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
not_compressed_blocks, not_compressed_pcnt);
} else { } else {
fprintf(stdout, "Unsupported compression type: %s.\n", i.second); fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
} }
} }
return 0; return 0;
} }
Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number, Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
RandomAccessFileReader* file, RandomAccessFileReader* file,
uint64_t file_size) { uint64_t file_size) {
@ -365,15 +406,15 @@ namespace {
void print_help() { void print_help() {
fprintf(stderr, fprintf(stderr,
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw] R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress]
--file=<data_dir_OR_sst_file> --file=<data_dir_OR_sst_file>
Path to SST file or directory containing SST files Path to SST file or directory containing SST files
--command=check|scan|raw|verify --command=check|scan|raw|verify
check: Iterate over entries in files but dont print anything except if an error is encounterd (default command) check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
scan: Iterate over entries in files and print them to screen scan: Iterate over entries in files and print them to screen
raw: Dump all the table contents to <file_name>_dump.txt raw: Dump all the table contents to <file_name>_dump.txt
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
recompress: reports the SST file size if recompressed with different recompress: reports the SST file size if recompressed with different
compression types compression types

@ -46,7 +46,8 @@ class SstFileDumper {
RandomAccessFileReader* file, uint64_t file_size); RandomAccessFileReader* file, uint64_t file_size);
uint64_t CalculateCompressedTableSize(const TableBuilderOptions& tb_options, uint64_t CalculateCompressedTableSize(const TableBuilderOptions& tb_options,
size_t block_size); size_t block_size,
uint64_t* num_data_blocks);
Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); Status SetTableOptionsByMagicNumber(uint64_t table_magic_number);
Status SetOldTableOptions(); Status SetOldTableOptions();

Loading…
Cancel
Save