make sst_dump compression size command consistent

Summary:
- like other subcommands, reporting compression sizes should be specified with the `--command` CLI arg.
- also added `--compression_types` arg as it's useful to restrict the types of compression used, at least in my dictionary compression experiments.
Closes https://github.com/facebook/rocksdb/pull/2706

Differential Revision: D5589520

Pulled By: ajkr

fbshipit-source-id: 305bb4ebcc95eecc8a85523cd3b1050619c9ddc5
main
Andrew Kryczka 7 years ago committed by Facebook Github Bot
parent 74f18c1301
commit 8254e9b57c
  1. 2
      tools/sst_dump_test.cc
  2. 81
      tools/sst_dump_tool.cc
  3. 5
      tools/sst_dump_tool_imp.h

@ -195,7 +195,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
} }
snprintf(usage[0], optLength, "./sst_dump"); snprintf(usage[0], optLength, "./sst_dump");
snprintf(usage[1], optLength, "--show_compression_sizes"); snprintf(usage[1], optLength, "--command=recompress");
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst"); snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
rocksdb::SSTDumpTool tool; rocksdb::SSTDumpTool tool;
ASSERT_TRUE(!tool.Run(3, usage)); ASSERT_TRUE(!tool.Run(3, usage));

@ -60,6 +60,17 @@ extern const uint64_t kLegacyPlainTableMagicNumber;
const char* testFileName = "test_file_name"; const char* testFileName = "test_file_name";
static const std::vector<std::pair<CompressionType, const char*>>
kCompressions = {
{CompressionType::kNoCompression, "kNoCompression"},
{CompressionType::kSnappyCompression, "kSnappyCompression"},
{CompressionType::kZlibCompression, "kZlibCompression"},
{CompressionType::kBZip2Compression, "kBZip2Compression"},
{CompressionType::kLZ4Compression, "kLZ4Compression"},
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
{CompressionType::kXpressCompression, "kXpressCompression"},
{CompressionType::kZSTD, "kZSTD"}};
Status SstFileReader::GetTableReader(const std::string& file_path) { Status SstFileReader::GetTableReader(const std::string& file_path) {
// Warning about 'magic_number' being uninitialized shows up only in UBsan // Warning about 'magic_number' being uninitialized shows up only in UBsan
// builds. Though access is guarded by 's.ok()' checks, fix the issue to // builds. Though access is guarded by 's.ok()' checks, fix the issue to
@ -174,7 +185,10 @@ uint64_t SstFileReader::CalculateCompressedTableSize(
return size; return size;
} }
int SstFileReader::ShowAllCompressionSizes(size_t block_size) { int SstFileReader::ShowAllCompressionSizes(
size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>&
compression_types) {
ReadOptions read_options; ReadOptions read_options;
Options opts; Options opts;
const ImmutableCFOptions imoptions(opts); const ImmutableCFOptions imoptions(opts);
@ -184,17 +198,7 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size); fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
std::pair<CompressionType, const char*> compressions[] = { for (auto& i : compression_types) {
{CompressionType::kNoCompression, "kNoCompression"},
{CompressionType::kSnappyCompression, "kSnappyCompression"},
{CompressionType::kZlibCompression, "kZlibCompression"},
{CompressionType::kBZip2Compression, "kBZip2Compression"},
{CompressionType::kLZ4Compression, "kLZ4Compression"},
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
{CompressionType::kXpressCompression, "kXpressCompression"},
{CompressionType::kZSTD, "kZSTD"}};
for (auto& i : compressions) {
if (CompressionTypeSupported(i.first)) { if (CompressionTypeSupported(i.first)) {
CompressionOptions compress_opt; CompressionOptions compress_opt;
std::string column_family_name; std::string column_family_name;
@ -359,6 +363,8 @@ void print_help() {
scan: Iterate over entries in files and print them to screen scan: Iterate over entries in files and print them to screen
raw: Dump all the table contents to <file_name>_dump.txt raw: Dump all the table contents to <file_name>_dump.txt
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered
recompress: reports the SST file size if recompressed with different
compression types
--output_hex --output_hex
Can be combined with scan command to print the keys and values in Hex Can be combined with scan command to print the keys and values in Hex
@ -383,15 +389,17 @@ void print_help() {
Can be combined with --from and --to to indicate that these values are encoded in Hex Can be combined with --from and --to to indicate that these values are encoded in Hex
--show_properties --show_properties
Print table properties after iterating over the file Print table properties after iterating over the file when executing
check|scan|raw
--show_compression_sizes
Independent command that will recreate the SST file using 16K block size with different
compressions and report the size of the file using such compression
--set_block_size=<block_size> --set_block_size=<block_size>
Can be combined with --show_compression_sizes to set the block size that will be used Can be combined with --command=recompress to set the block size that will
when trying different compression algorithms be used when trying different compression algorithms
--compression_types=<comma-separated list of CompressionType members, e.g.,
kSnappyCompression>
Can be combined with --command=recompress to run recompression for this
list of compression types
--parse_internal_key=<0xKEY> --parse_internal_key=<0xKEY>
Convenience option to parse an internal key on the command line. Dumps the Convenience option to parse an internal key on the command line. Dumps the
@ -415,13 +423,13 @@ int SSTDumpTool::Run(int argc, char** argv) {
bool has_to = false; bool has_to = false;
bool use_from_as_prefix = false; bool use_from_as_prefix = false;
bool show_properties = false; bool show_properties = false;
bool show_compression_sizes = false;
bool show_summary = false; bool show_summary = false;
bool set_block_size = false; bool set_block_size = false;
std::string from_key; std::string from_key;
std::string to_key; std::string to_key;
std::string block_size_str; std::string block_size_str;
size_t block_size; size_t block_size;
std::vector<std::pair<CompressionType, const char*>> compression_types;
uint64_t total_num_files = 0; uint64_t total_num_files = 0;
uint64_t total_num_data_blocks = 0; uint64_t total_num_data_blocks = 0;
uint64_t total_data_block_size = 0; uint64_t total_data_block_size = 0;
@ -453,19 +461,34 @@ int SSTDumpTool::Run(int argc, char** argv) {
use_from_as_prefix = true; use_from_as_prefix = true;
} else if (strcmp(argv[i], "--show_properties") == 0) { } else if (strcmp(argv[i], "--show_properties") == 0) {
show_properties = true; show_properties = true;
} else if (strcmp(argv[i], "--show_compression_sizes") == 0) {
show_compression_sizes = true;
} else if (strcmp(argv[i], "--show_summary") == 0) { } else if (strcmp(argv[i], "--show_summary") == 0) {
show_summary = true; show_summary = true;
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) { } else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
set_block_size = true; set_block_size = true;
block_size_str = argv[i] + 17; block_size_str = argv[i] + 17;
std::istringstream iss(block_size_str); std::istringstream iss(block_size_str);
iss >> block_size;
if (iss.fail()) { if (iss.fail()) {
fprintf(stderr, "block size must be numeric"); fprintf(stderr, "block size must be numeric\n");
exit(1); exit(1);
} }
iss >> block_size; } else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
std::string compression_types_csv = argv[i] + 20;
std::istringstream iss(compression_types_csv);
std::string compression_type;
while (std::getline(iss, compression_type, ',')) {
auto iter = std::find_if(
kCompressions.begin(), kCompressions.end(),
[&compression_type](std::pair<CompressionType, const char*> curr) {
return curr.second == compression_type;
});
if (iter == kCompressions.end()) {
fprintf(stderr, "%s is not a valid CompressionType\n",
compression_type.c_str());
exit(1);
}
compression_types.emplace_back(*iter);
}
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) { } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
std::string in_key(argv[i] + 21); std::string in_key(argv[i] + 21);
try { try {
@ -547,12 +570,10 @@ int SSTDumpTool::Run(int argc, char** argv) {
continue; continue;
} }
if (show_compression_sizes) { if (command == "recompress") {
if (set_block_size) { reader.ShowAllCompressionSizes(
reader.ShowAllCompressionSizes(block_size); set_block_size ? block_size : 16384,
} else { compression_types.empty() ? kCompressions : compression_types);
reader.ShowAllCompressionSizes(16384);
}
return 0; return 0;
} }

@ -34,7 +34,10 @@ class SstFileReader {
Status DumpTable(const std::string& out_filename); Status DumpTable(const std::string& out_filename);
Status getStatus() { return init_result_; } Status getStatus() { return init_result_; }
int ShowAllCompressionSizes(size_t block_size); int ShowAllCompressionSizes(
size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>&
compression_types);
private: private:
// Get the TableReader implementation for the sst file // Get the TableReader implementation for the sst file

Loading…
Cancel
Save