Allow sst_dump to check size of different compression levels and report time (#6634)

Summary:
Summary : 1. Add two arguments --compression_level_from and --compression_level_to to check
	  the compression size with different compression level in the given range. Users must
          specify one compression type else it will error out. Both from and to levels must
	  also be specified together.
	  2. Display the time taken to compress each file with different compressions by default.

Test Plan : make -j64 check
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6634

Test Plan: make -j64 check

Reviewed By: anand1976

Differential Revision: D20810282

Pulled By: akankshamahajan15

fbshipit-source-id: ac9098d3c079a1fad098f6678dbedb4d888a791b
main
Akanksha Mahajan 5 years ago committed by Facebook GitHub Bot
parent 791e5714a5
commit 75b13ea94a
  1. 1
      HISTORY.md
  2. 183
      tools/sst_dump_tool.cc
  3. 9
      tools/sst_dump_tool_imp.h

@ -14,6 +14,7 @@
* Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism. * Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism.
* Provide an allocator for memkind to be used with block cache. This is to work with memory technologies (Intel DCPMM is one such technology currently available) that require different libraries for allocation and management (such as PMDK and memkind). The high capacities available make it possible to provision large caches (up to several TBs in size) beyond what is achievable with DRAM. * Provide an allocator for memkind to be used with block cache. This is to work with memory technologies (Intel DCPMM is one such technology currently available) that require different libraries for allocation and management (such as PMDK and memkind). The high capacities available make it possible to provision large caches (up to several TBs in size) beyond what is achievable with DRAM.
* Option `max_background_flushes` can be set dynamically using DB::SetDBOptions(). * Option `max_background_flushes` can be set dynamically using DB::SetDBOptions().
* Added functionality in sst_dump tool to check the compressed file size for different compression levels and print the time spent on compressing files with each compression type. Added arguments `--compression_level_from` and `--compression_level_to` to report size of all compression levels and one compression_type must be specified with it so that it will report compressed sizes of one compression type with different levels.
### Bug Fixes ### Bug Fixes
* Fix a bug when making options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts dynamically changeable: the modified values are not written to option files or returned back to users when being queried. * Fix a bug when making options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts dynamically changeable: the modified values are not written to option files or returned back to users when being queried.

@ -9,6 +9,7 @@
#include "tools/sst_dump_tool_imp.h" #include "tools/sst_dump_tool_imp.h"
#include <cinttypes> #include <cinttypes>
#include <chrono>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <memory> #include <memory>
@ -207,7 +208,32 @@ uint64_t SstFileDumper::CalculateCompressedTableSize(
int SstFileDumper::ShowAllCompressionSizes( int SstFileDumper::ShowAllCompressionSizes(
size_t block_size, size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>& const std::vector<std::pair<CompressionType, const char*>>&
compression_types) { compression_types,
int32_t compress_level_from,
int32_t compress_level_to) {
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
for (auto& i : compression_types) {
if (CompressionTypeSupported(i.first)) {
fprintf(stdout, "Compression: %-24s\n", i.second);
CompressionOptions compress_opt;
for(int32_t j = compress_level_from; j <= compress_level_to;
j++) {
fprintf(stdout, "Compression level: %d", j);
compress_opt.level = j;
ShowCompressionSize(block_size, i.first, compress_opt);
}
} else {
fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
}
}
return 0;
}
int SstFileDumper::ShowCompressionSize(
size_t block_size,
CompressionType compress_type,
const CompressionOptions& compress_opt) {
ReadOptions read_options; ReadOptions read_options;
Options opts; Options opts;
opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
@ -219,63 +245,60 @@ int SstFileDumper::ShowAllCompressionSizes(
std::vector<std::unique_ptr<IntTblPropCollectorFactory> > std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
block_based_table_factories; block_based_table_factories;
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size); std::string column_family_name;
int unknown_level = -1;
for (auto& i : compression_types) { TableBuilderOptions tb_opts(
if (CompressionTypeSupported(i.first)) { imoptions, moptions, ikc, &block_based_table_factories, compress_type,
CompressionOptions compress_opt; 0 /* sample_for_compression */, compress_opt,
std::string column_family_name; false /* skip_filters */, column_family_name, unknown_level);
int unknown_level = -1; uint64_t num_data_blocks = 0;
TableBuilderOptions tb_opts( std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
imoptions, moptions, ikc, &block_based_table_factories, i.first, uint64_t file_size =
0 /* sample_for_compression */, compress_opt, CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks);
false /* skip_filters */, column_family_name, unknown_level); std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
uint64_t num_data_blocks = 0; fprintf(stdout, " Size: %10" PRIu64, file_size);
uint64_t file_size = fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks);
CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks); fprintf(stdout, " Time Taken: %10s microsecs",
fprintf(stdout, "Compression: %-24s", i.second); std::to_string(std::chrono::duration_cast<std::chrono::microseconds>
fprintf(stdout, " Size: %10" PRIu64, file_size); (end-start).count()).c_str());
fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks); const uint64_t compressed_blocks =
const uint64_t compressed_blocks = opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED);
opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED); const uint64_t not_compressed_blocks =
const uint64_t not_compressed_blocks = opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED);
opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED); // When the option enable_index_compression is true,
// When the option enable_index_compression is true, // NUMBER_BLOCK_COMPRESSED is incremented for index block(s).
// NUMBER_BLOCK_COMPRESSED is incremented for index block(s). if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) {
if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) { num_data_blocks = compressed_blocks + not_compressed_blocks;
num_data_blocks = compressed_blocks + not_compressed_blocks;
}
const uint64_t ratio_not_compressed_blocks =
(num_data_blocks - compressed_blocks) - not_compressed_blocks;
const double compressed_pcnt =
(0 == num_data_blocks) ? 0.0
: ((static_cast<double>(compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double ratio_not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(ratio_not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
compressed_pcnt);
fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
not_compressed_blocks, not_compressed_pcnt);
} else {
fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
}
} }
const uint64_t ratio_not_compressed_blocks =
(num_data_blocks - compressed_blocks) - not_compressed_blocks;
const double compressed_pcnt =
(0 == num_data_blocks) ? 0.0
: ((static_cast<double>(compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double ratio_not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(ratio_not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
const double not_compressed_pcnt =
(0 == num_data_blocks)
? 0.0
: ((static_cast<double>(not_compressed_blocks) /
static_cast<double>(num_data_blocks)) *
100.0);
fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
compressed_pcnt);
fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
not_compressed_blocks, not_compressed_pcnt);
return 0; return 0;
} }
Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number, Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
RandomAccessFileReader* file, RandomAccessFileReader* file,
uint64_t file_size) { uint64_t file_size) {
@ -484,6 +507,14 @@ void print_help() {
--parse_internal_key=<0xKEY> --parse_internal_key=<0xKEY>
Convenience option to parse an internal key on the command line. Dumps the Convenience option to parse an internal key on the command line. Dumps the
internal key in hex format {'key' @ SN: type} internal key in hex format {'key' @ SN: type}
--compression_level_from=<compression_level>
Compression level to start compressing when executing recompress. One compression type
and compression_level_to must also be specified
--compression_level_to=<compression_level>
Compression level to stop compressing when executing recompress. One compression type
and compression_level_from must also be specified
)"); )");
} }
@ -507,9 +538,14 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
bool show_properties = false; bool show_properties = false;
bool show_summary = false; bool show_summary = false;
bool set_block_size = false; bool set_block_size = false;
bool has_compression_level_from = false;
bool has_compression_level_to = false;
bool has_specified_compression_types = false;
std::string from_key; std::string from_key;
std::string to_key; std::string to_key;
std::string block_size_str; std::string block_size_str;
std::string compression_level_from_str;
std::string compression_level_to_str;
size_t block_size = 0; size_t block_size = 0;
std::vector<std::pair<CompressionType, const char*>> compression_types; std::vector<std::pair<CompressionType, const char*>> compression_types;
uint64_t total_num_files = 0; uint64_t total_num_files = 0;
@ -517,6 +553,8 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
uint64_t total_data_block_size = 0; uint64_t total_data_block_size = 0;
uint64_t total_index_block_size = 0; uint64_t total_index_block_size = 0;
uint64_t total_filter_block_size = 0; uint64_t total_filter_block_size = 0;
int32_t compress_level_from = CompressionOptions::kDefaultCompressionLevel;
int32_t compress_level_to = CompressionOptions::kDefaultCompressionLevel;
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (strncmp(argv[i], "--env_uri=", 10) == 0) { if (strncmp(argv[i], "--env_uri=", 10) == 0) {
env_uri = argv[i] + 10; env_uri = argv[i] + 10;
@ -561,6 +599,7 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
std::string compression_types_csv = argv[i] + 20; std::string compression_types_csv = argv[i] + 20;
std::istringstream iss(compression_types_csv); std::istringstream iss(compression_types_csv);
std::string compression_type; std::string compression_type;
has_specified_compression_types = true;
while (std::getline(iss, compression_type, ',')) { while (std::getline(iss, compression_type, ',')) {
auto iter = std::find_if( auto iter = std::find_if(
kCompressions.begin(), kCompressions.end(), kCompressions.begin(), kCompressions.end(),
@ -594,13 +633,42 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
} }
fprintf(stdout, "key=%s\n", ikey.DebugString(true).c_str()); fprintf(stdout, "key=%s\n", ikey.DebugString(true).c_str());
return retc; return retc;
} else { } else if (strncmp(argv[i], "--compression_level_from=", 25) == 0) {
compression_level_from_str = argv[i] + 25;
has_compression_level_from = true;
std::istringstream iss(compression_level_from_str);
iss >> compress_level_from;
if (iss.fail()) {
fprintf(stderr, "compression_level_from must be numeric\n");
exit(1);
}
} else if (strncmp(argv[i], "--compression_level_to=", 22) == 0) {
compression_level_to_str = argv[i]+23 ;
has_compression_level_to = true;
std::istringstream iss(compression_level_to_str);
iss >> compress_level_to;
if (iss.fail()) {
fprintf(stderr, "compression_level_to must be numeric\n");
exit(1);
}
}else {
fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]); fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]);
print_help(); print_help();
exit(1); exit(1);
} }
} }
if(has_compression_level_from && has_compression_level_to) {
if(!has_specified_compression_types || compression_types.size() != 1) {
fprintf(stderr, "Specify one compression type.\n\n");
exit(1);
}
} else if(has_compression_level_from || has_compression_level_to) {
fprintf(stderr, "Specify both --compression_level_from and "
"--compression_level_to.\n\n");
exit(1);
}
if (use_from_as_prefix && has_from) { if (use_from_as_prefix && has_from) {
fprintf(stderr, "Cannot specify --prefix and --from\n\n"); fprintf(stderr, "Cannot specify --prefix and --from\n\n");
exit(1); exit(1);
@ -675,7 +743,8 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
if (command == "recompress") { if (command == "recompress") {
dumper.ShowAllCompressionSizes( dumper.ShowAllCompressionSizes(
set_block_size ? block_size : 16384, set_block_size ? block_size : 16384,
compression_types.empty() ? kCompressions : compression_types); compression_types.empty() ? kCompressions : compression_types,
compress_level_from, compress_level_to);
return 0; return 0;
} }

@ -38,7 +38,14 @@ class SstFileDumper {
int ShowAllCompressionSizes( int ShowAllCompressionSizes(
size_t block_size, size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>& const std::vector<std::pair<CompressionType, const char*>>&
compression_types); compression_types,
int32_t compress_level_from,
int32_t compress_level_to);
int ShowCompressionSize(
size_t block_size,
CompressionType compress_type,
const CompressionOptions& compress_opt);
private: private:
// Get the TableReader implementation for the sst file // Get the TableReader implementation for the sst file

Loading…
Cancel
Save