|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
|
|
|
#include "util/sst_dump_tool_imp.h"
|
|
|
|
|
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <inttypes.h>
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
using std::dynamic_pointer_cast;
|
|
|
|
|
|
|
|
SstFileReader::SstFileReader(const std::string& file_path,
|
|
|
|
bool verify_checksum,
|
|
|
|
bool output_hex)
|
|
|
|
:file_name_(file_path), read_num_(0), verify_checksum_(verify_checksum),
|
|
|
|
output_hex_(output_hex), ioptions_(options_),
|
|
|
|
internal_comparator_(BytewiseComparator()) {
|
|
|
|
fprintf(stdout, "Process %s\n", file_path.c_str());
|
|
|
|
init_result_ = GetTableReader(file_name_);
|
|
|
|
}
|
|
|
|
|
|
|
|
extern const uint64_t kBlockBasedTableMagicNumber;
|
|
|
|
extern const uint64_t kLegacyBlockBasedTableMagicNumber;
|
|
|
|
extern const uint64_t kPlainTableMagicNumber;
|
|
|
|
extern const uint64_t kLegacyPlainTableMagicNumber;
|
|
|
|
|
|
|
|
Status SstFileReader::GetTableReader(const std::string& file_path) {
|
|
|
|
uint64_t magic_number;
|
|
|
|
|
|
|
|
// read table magic number
|
|
|
|
Footer footer;
|
|
|
|
|
|
|
|
unique_ptr<RandomAccessFile> file;
|
|
|
|
uint64_t file_size;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_);
|
|
|
|
if (s.ok()) {
|
|
|
|
s = options_.env->GetFileSize(file_path, &file_size);
|
|
|
|
}
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
|
|
|
|
file_.reset(new RandomAccessFileReader(std::move(file)));
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
s = ReadFooterFromFile(file_.get(), file_size, &footer);
|
|
|
|
}
|
|
|
|
if (s.ok()) {
|
|
|
|
magic_number = footer.table_magic_number();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
if (magic_number == kPlainTableMagicNumber ||
|
|
|
|
magic_number == kLegacyPlainTableMagicNumber) {
|
|
|
|
soptions_.use_mmap_reads = true;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
options_.env->NewRandomAccessFile(file_path, &file, soptions_);
|
|
|
|
file_.reset(new RandomAccessFileReader(std::move(file)));
|
|
|
|
}
|
|
|
|
options_.comparator = &internal_comparator_;
|
|
|
|
// For old sst format, ReadTableProperties might fail but file can be read
|
|
|
|
if (ReadTableProperties(magic_number, file_.get(), file_size).ok()) {
|
|
|
|
SetTableOptionsByMagicNumber(magic_number);
|
|
|
|
} else {
|
|
|
|
SetOldTableOptions();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
|
|
|
|
&table_reader_);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::NewTableReader(
|
|
|
|
const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
const InternalKeyComparator& internal_comparator, uint64_t file_size,
|
|
|
|
unique_ptr<TableReader>* table_reader) {
|
|
|
|
// We need to turn off pre-fetching of index and filter nodes for
|
|
|
|
// BlockBasedTable
|
|
|
|
shared_ptr<BlockBasedTableFactory> block_table_factory =
|
|
|
|
dynamic_pointer_cast<BlockBasedTableFactory>(options_.table_factory);
|
|
|
|
|
|
|
|
if (block_table_factory) {
|
|
|
|
return block_table_factory->NewTableReader(
|
|
|
|
ioptions_, soptions_, internal_comparator_, std::move(file_), file_size,
|
|
|
|
&table_reader_, /*enable_prefetch=*/false);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(!block_table_factory);
|
|
|
|
|
|
|
|
// For all other factory implementation
|
|
|
|
return options_.table_factory->NewTableReader(
|
|
|
|
ioptions_, soptions_, internal_comparator_, std::move(file_), file_size,
|
|
|
|
&table_reader_);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::DumpTable(const std::string& out_filename) {
|
|
|
|
unique_ptr<WritableFile> out_file;
|
|
|
|
Env* env = Env::Default();
|
|
|
|
env->NewWritableFile(out_filename, &out_file, soptions_);
|
|
|
|
Status s = table_reader_->DumpTable(out_file.get());
|
|
|
|
out_file->Close();
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::ReadTableProperties(uint64_t table_magic_number,
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
9 years ago
|
|
|
RandomAccessFileReader* file,
|
|
|
|
uint64_t file_size) {
|
|
|
|
TableProperties* table_properties = nullptr;
|
|
|
|
Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number,
|
|
|
|
options_.env, options_.info_log.get(),
|
|
|
|
&table_properties);
|
|
|
|
if (s.ok()) {
|
|
|
|
table_properties_.reset(table_properties);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "Not able to read table properties\n");
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::SetTableOptionsByMagicNumber(
|
|
|
|
uint64_t table_magic_number) {
|
|
|
|
assert(table_properties_);
|
|
|
|
if (table_magic_number == kBlockBasedTableMagicNumber ||
|
|
|
|
table_magic_number == kLegacyBlockBasedTableMagicNumber) {
|
|
|
|
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
|
|
|
|
fprintf(stdout, "Sst file format: block-based\n");
|
|
|
|
auto& props = table_properties_->user_collected_properties;
|
|
|
|
auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
|
|
|
|
if (pos != props.end()) {
|
|
|
|
auto index_type_on_file = static_cast<BlockBasedTableOptions::IndexType>(
|
|
|
|
DecodeFixed32(pos->second.c_str()));
|
|
|
|
if (index_type_on_file ==
|
|
|
|
BlockBasedTableOptions::IndexType::kHashSearch) {
|
|
|
|
options_.prefix_extractor.reset(NewNoopTransform());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (table_magic_number == kPlainTableMagicNumber ||
|
|
|
|
table_magic_number == kLegacyPlainTableMagicNumber) {
|
|
|
|
options_.allow_mmap_reads = true;
|
|
|
|
|
|
|
|
PlainTableOptions plain_table_options;
|
|
|
|
plain_table_options.user_key_len = kPlainTableVariableLength;
|
|
|
|
plain_table_options.bloom_bits_per_key = 0;
|
|
|
|
plain_table_options.hash_table_ratio = 0;
|
|
|
|
plain_table_options.index_sparseness = 1;
|
|
|
|
plain_table_options.huge_page_tlb_size = 0;
|
|
|
|
plain_table_options.encoding_type = kPlain;
|
|
|
|
plain_table_options.full_scan_mode = true;
|
|
|
|
|
|
|
|
options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
|
|
|
|
fprintf(stdout, "Sst file format: plain table\n");
|
|
|
|
} else {
|
|
|
|
char error_msg_buffer[80];
|
|
|
|
snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
|
|
|
|
"Unsupported table magic number --- %lx",
|
|
|
|
(long)table_magic_number);
|
|
|
|
return Status::InvalidArgument(error_msg_buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::SetOldTableOptions() {
|
|
|
|
assert(table_properties_ == nullptr);
|
|
|
|
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
|
|
|
|
fprintf(stdout, "Sst file format: block-based(old version)\n");
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::ReadSequential(bool print_kv,
|
|
|
|
uint64_t read_num,
|
|
|
|
bool has_from,
|
|
|
|
const std::string& from_key,
|
|
|
|
bool has_to,
|
|
|
|
const std::string& to_key) {
|
|
|
|
if (!table_reader_) {
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
|
|
|
Iterator* iter = table_reader_->NewIterator(ReadOptions(verify_checksum_,
|
|
|
|
false));
|
|
|
|
uint64_t i = 0;
|
|
|
|
if (has_from) {
|
|
|
|
InternalKey ikey;
|
|
|
|
ikey.SetMaxPossibleForUserKey(from_key);
|
|
|
|
iter->Seek(ikey.Encode());
|
|
|
|
} else {
|
|
|
|
iter->SeekToFirst();
|
|
|
|
}
|
|
|
|
for (; iter->Valid(); iter->Next()) {
|
|
|
|
Slice key = iter->key();
|
|
|
|
Slice value = iter->value();
|
|
|
|
++i;
|
|
|
|
if (read_num > 0 && i > read_num)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ParsedInternalKey ikey;
|
|
|
|
if (!ParseInternalKey(key, &ikey)) {
|
|
|
|
std::cerr << "Internal Key ["
|
|
|
|
<< key.ToString(true /* in hex*/)
|
|
|
|
<< "] parse error!\n";
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If end marker was specified, we stop before it
|
|
|
|
if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (print_kv) {
|
|
|
|
fprintf(stdout, "%s => %s\n",
|
|
|
|
ikey.DebugString(output_hex_).c_str(),
|
|
|
|
value.ToString(output_hex_).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
read_num_ += i;
|
|
|
|
|
|
|
|
Status ret = iter->status();
|
|
|
|
delete iter;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status SstFileReader::ReadTableProperties(
|
|
|
|
std::shared_ptr<const TableProperties>* table_properties) {
|
|
|
|
if (!table_reader_) {
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
|
|
|
*table_properties = table_reader_->GetTableProperties();
|
|
|
|
return init_result_;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
void print_help() {
|
|
|
|
fprintf(stderr,
|
|
|
|
"sst_dump [--command=check|scan|none|raw] [--verify_checksum] "
|
|
|
|
"--file=data_dir_OR_sst_file"
|
|
|
|
" [--output_hex]"
|
|
|
|
" [--input_key_hex]"
|
|
|
|
" [--from=<user_key>]"
|
|
|
|
" [--to=<user_key>]"
|
|
|
|
" [--read_num=NUM]"
|
|
|
|
" [--show_properties]\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
string HexToString(const string& str) {
|
|
|
|
string parsed;
|
|
|
|
if (str[0] != '0' || str[1] != 'x') {
|
|
|
|
fprintf(stderr, "Invalid hex input %s. Must start with 0x\n",
|
|
|
|
str.c_str());
|
|
|
|
throw "Invalid hex input";
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned int i = 2; i < str.length();) {
|
|
|
|
int c;
|
|
|
|
sscanf(str.c_str() + i, "%2X", &c);
|
|
|
|
parsed.push_back(c);
|
|
|
|
i += 2;
|
|
|
|
}
|
|
|
|
return parsed;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
int SSTDumpTool::Run(int argc, char** argv) {
|
|
|
|
const char* dir_or_file = nullptr;
|
|
|
|
uint64_t read_num = -1;
|
|
|
|
std::string command;
|
|
|
|
|
|
|
|
char junk;
|
|
|
|
uint64_t n;
|
|
|
|
bool verify_checksum = false;
|
|
|
|
bool output_hex = false;
|
|
|
|
bool input_key_hex = false;
|
|
|
|
bool has_from = false;
|
|
|
|
bool has_to = false;
|
|
|
|
bool show_properties = false;
|
|
|
|
std::string from_key;
|
|
|
|
std::string to_key;
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
if (strncmp(argv[i], "--file=", 7) == 0) {
|
|
|
|
dir_or_file = argv[i] + 7;
|
|
|
|
} else if (strcmp(argv[i], "--output_hex") == 0) {
|
|
|
|
output_hex = true;
|
|
|
|
} else if (strcmp(argv[i], "--input_key_hex") == 0) {
|
|
|
|
input_key_hex = true;
|
|
|
|
} else if (sscanf(argv[i],
|
|
|
|
"--read_num=%lu%c",
|
|
|
|
(unsigned long*)&n, &junk) == 1) {
|
|
|
|
read_num = n;
|
|
|
|
} else if (strcmp(argv[i], "--verify_checksum") == 0) {
|
|
|
|
verify_checksum = true;
|
|
|
|
} else if (strncmp(argv[i], "--command=", 10) == 0) {
|
|
|
|
command = argv[i] + 10;
|
|
|
|
} else if (strncmp(argv[i], "--from=", 7) == 0) {
|
|
|
|
from_key = argv[i] + 7;
|
|
|
|
has_from = true;
|
|
|
|
} else if (strncmp(argv[i], "--to=", 5) == 0) {
|
|
|
|
to_key = argv[i] + 5;
|
|
|
|
has_to = true;
|
|
|
|
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
|
|
|
show_properties = true;
|
|
|
|
} else {
|
|
|
|
print_help();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (input_key_hex) {
|
|
|
|
if (has_from) {
|
|
|
|
from_key = HexToString(from_key);
|
|
|
|
}
|
|
|
|
if (has_to) {
|
|
|
|
to_key = HexToString(to_key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dir_or_file == nullptr) {
|
|
|
|
print_help();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> filenames;
|
|
|
|
rocksdb::Env* env = rocksdb::Env::Default();
|
|
|
|
rocksdb::Status st = env->GetChildren(dir_or_file, &filenames);
|
|
|
|
bool dir = true;
|
|
|
|
if (!st.ok()) {
|
|
|
|
filenames.clear();
|
|
|
|
filenames.push_back(dir_or_file);
|
|
|
|
dir = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stdout, "from [%s] to [%s]\n",
|
|
|
|
rocksdb::Slice(from_key).ToString(true).c_str(),
|
|
|
|
rocksdb::Slice(to_key).ToString(true).c_str());
|
|
|
|
|
|
|
|
uint64_t total_read = 0;
|
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
|
|
|
std::string filename = filenames.at(i);
|
|
|
|
if (filename.length() <= 4 ||
|
|
|
|
filename.rfind(".sst") != filename.length() - 4) {
|
|
|
|
// ignore
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (dir) {
|
|
|
|
filename = std::string(dir_or_file) + "/" + filename;
|
|
|
|
}
|
|
|
|
|
|
|
|
rocksdb::SstFileReader reader(filename, verify_checksum,
|
|
|
|
output_hex);
|
|
|
|
if (!reader.getStatus().ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
|
|
|
reader.getStatus().ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (command == "raw") {
|
|
|
|
std::string out_filename = filename.substr(0, filename.length() - 4);
|
|
|
|
out_filename.append("_dump.txt");
|
|
|
|
|
|
|
|
st = reader.DumpTable(out_filename);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// scan all files in give file path.
|
|
|
|
if (command == "" || command == "scan" || command == "check") {
|
|
|
|
st = reader.ReadSequential(command == "scan",
|
|
|
|
read_num > 0 ? (read_num - total_read) :
|
|
|
|
read_num,
|
|
|
|
has_from, from_key, has_to, to_key);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
|
|
|
st.ToString().c_str());
|
|
|
|
}
|
|
|
|
total_read += reader.GetReadNumber();
|
|
|
|
if (read_num > 0 && total_read > read_num) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (show_properties) {
|
|
|
|
const rocksdb::TableProperties* table_properties;
|
|
|
|
|
|
|
|
std::shared_ptr<const rocksdb::TableProperties>
|
|
|
|
table_properties_from_reader;
|
|
|
|
st = reader.ReadTableProperties(&table_properties_from_reader);
|
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
fprintf(stderr, "Try to use initial table properties\n");
|
|
|
|
table_properties = reader.GetInitTableProperties();
|
|
|
|
} else {
|
|
|
|
table_properties = table_properties_from_reader.get();
|
|
|
|
}
|
|
|
|
if (table_properties != nullptr) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"Table Properties:\n"
|
|
|
|
"------------------------------\n"
|
|
|
|
" %s",
|
|
|
|
table_properties->ToString("\n ", ": ").c_str());
|
|
|
|
fprintf(stdout, "# deleted keys: %" PRIu64 "\n",
|
|
|
|
rocksdb::GetDeletedKeys(
|
|
|
|
table_properties->user_collected_properties));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
#endif // ROCKSDB_LITE
|