Dump routine to BlockBasedTableReader

Summary: Added necessary routines for dumping block based SST with block filter

Test Plan: Added "raw" mode to utility sst_dump

Reviewers: sdong, rven

Reviewed By: rven

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D29679
main
Manish Patil 10 years ago
parent ae508df90e
commit 7ea7bdf04d
  1. 6
      Makefile
  2. 2
      include/rocksdb/sst_dump_tool.h
  3. 54
      table/block_based_filter_block.cc
  4. 3
      table/block_based_filter_block.h
  5. 213
      table/block_based_table_reader.cc
  6. 7
      table/block_based_table_reader.h
  7. 6
      table/filter_block.h
  8. 40
      table/format.cc
  9. 6
      table/format.h
  10. 5
      table/table_reader.h
  11. 152
      util/sst_dump_test.cc
  12. 35
      util/sst_dump_tool.cc
  13. 3
      util/sst_dump_tool_imp.h

@ -165,7 +165,8 @@ TESTS = \
wal_manager_test \
listener_test \
compaction_job_test \
thread_list_test
thread_list_test \
sst_dump_test
SUBSET := $(shell echo $(TESTS) |sed s/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/)
@ -539,6 +540,9 @@ compactor_test: utilities/compaction/compactor_test.o $(LIBOBJECTS) $(TESTHARNES
options_test: util/options_test.o util/options_helper.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/options_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
sst_dump_test: util/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
$(MEMENVLIBRARY) : $(MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(MEMENVOBJECTS)

@ -9,7 +9,7 @@ namespace rocksdb {
class SSTDumpTool {
public:
void Run(int argc, char** argv);
int Run(int argc, char** argv);
};
} // namespace rocksdb

@ -7,6 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <algorithm>
#include "table/block_based_filter_block.h"
#include "db/dbformat.h"
@ -29,6 +30,38 @@ bool SamePrefix(const SliceTransform* prefix_extractor,
prefix_extractor->Transform(key2));
}
}
void AppendItem(std::string* props, const std::string& key,
const std::string& value) {
char cspace = ' ';
std::string value_str("");
size_t i = 0;
const size_t dataLength = 64;
const size_t tabLength = 2;
const size_t offLength = 16;
value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
i += dataLength;
while (i < value.size()) {
value_str.append("\n");
value_str.append(offLength, cspace);
value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
i += dataLength;
}
std::string result("");
if (key.size() < (offLength - tabLength))
result.append(size_t((offLength - tabLength)) - key.size(), cspace);
result.append(key);
props->append(result + ": " + value_str + "\n");
}
template <class TKey>
void AppendItem(std::string* props, const TKey& key, const std::string& value) {
std::string key_str = std::to_string(key);
AppendItem(props, key_str, value);
}
} // namespace
@ -196,4 +229,25 @@ bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
return num_ * 4 + 5 + (offset_ - data_);
}
std::string BlockBasedFilterBlockReader::ToString() const {
std::string result, filter_meta;
result.reserve(1024);
std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
AppendItem(&result, s_fb, std::to_string(num_));
AppendItem(&result, s_bo, s_hd);
for (size_t index = 0; index < num_; index++) {
uint32_t start = DecodeFixed32(offset_ + index * 4);
uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
if (start != limit) {
result.append(" filter block # " + std::to_string(index + 1) + "\n");
Slice filter = Slice(data_ + start, limit - start);
AppendItem(&result, start, filter.ToString(true));
}
}
return result;
}
} // namespace rocksdb

@ -82,6 +82,9 @@ class BlockBasedFilterBlockReader : public FilterBlockReader {
uint64_t block_offset = kNotValid) override;
virtual size_t ApproximateMemoryUsage() const override;
// convert this object to a human readable form
std::string ToString() const override;
private:
const FilterPolicy* policy_;
const SliceTransform* prefix_extractor_;

@ -1312,4 +1312,217 @@ bool BlockBasedTable::TEST_index_reader_preloaded() const {
return rep_->index_reader != nullptr;
}
Status BlockBasedTable::DumpTable(WritableFile* out_file) {
// Output Footer
out_file->Append(
"Footer Details:\n"
"--------------------------------------\n"
" ");
out_file->Append(rep_->footer.ToString().c_str());
out_file->Append("\n");
// Output MetaIndex
out_file->Append(
"Metaindex Details:\n"
"--------------------------------------\n");
std::unique_ptr<Block> meta;
std::unique_ptr<Iterator> meta_iter;
Status s = ReadMetaBlock(rep_, &meta, &meta_iter);
if (s.ok()) {
for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) {
s = meta_iter->status();
if (!s.ok()) {
return s;
}
if (meta_iter->key() == rocksdb::kPropertiesBlock) {
out_file->Append(" Properties block handle: ");
out_file->Append(meta_iter->value().ToString(true).c_str());
out_file->Append("\n");
} else if (strstr(meta_iter->key().ToString().c_str(),
"filter.rocksdb.") != nullptr) {
out_file->Append(" Filter block handle: ");
out_file->Append(meta_iter->value().ToString(true).c_str());
out_file->Append("\n");
}
}
out_file->Append("\n");
} else {
return s;
}
// Output TableProperties
const rocksdb::TableProperties* table_properties;
table_properties = rep_->table_properties.get();
if (table_properties != nullptr) {
out_file->Append(
"Table Properties:\n"
"--------------------------------------\n"
" ");
out_file->Append(table_properties->ToString("\n ", ": ").c_str());
out_file->Append("\n");
}
// Output Filter blocks
if (!rep_->filter && !table_properties->filter_policy_name.empty()) {
// Support only BloomFilter as off now
rocksdb::BlockBasedTableOptions table_options;
table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(1));
if (table_properties->filter_policy_name.compare(
table_options.filter_policy->Name()) == 0) {
std::string filter_block_key = kFilterBlockPrefix;
filter_block_key.append(table_properties->filter_policy_name);
BlockHandle handle;
if (FindMetaBlock(meta_iter.get(), filter_block_key, &handle).ok()) {
BlockContents block;
if (ReadBlockContents(rep_->file.get(), rep_->footer, ReadOptions(),
handle, &block, rep_->ioptions.env, false).ok()) {
rep_->filter.reset(
new BlockBasedFilterBlockReader(rep_->ioptions.prefix_extractor,
table_options, std::move(block)));
}
}
}
}
if (rep_->filter) {
out_file->Append(
"Filter Details:\n"
"--------------------------------------\n"
" ");
out_file->Append(rep_->filter->ToString().c_str());
out_file->Append("\n");
}
// Output Index block
s = DumpIndexBlock(out_file);
if (!s.ok()) {
return s;
}
// Output Data blocks
s = DumpDataBlocks(out_file);
return s;
}
Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
out_file->Append(
"Index Details:\n"
"--------------------------------------\n");
std::unique_ptr<Iterator> blockhandles_iter(NewIndexIterator(ReadOptions()));
Status s = blockhandles_iter->status();
if (!s.ok()) {
out_file->Append("Can not read Index Block \n\n");
return s;
}
out_file->Append(" Block key hex dump: Data block handle\n");
out_file->Append(" Block key ascii\n\n");
for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
blockhandles_iter->Next()) {
s = blockhandles_iter->status();
if (!s.ok()) {
break;
}
Slice key = blockhandles_iter->key();
InternalKey ikey;
ikey.DecodeFrom(key);
out_file->Append(" HEX ");
out_file->Append(ikey.user_key().ToString(true).c_str());
out_file->Append(": ");
out_file->Append(blockhandles_iter->value().ToString(true).c_str());
out_file->Append("\n");
std::string str_key = ikey.user_key().ToString();
std::string res_key("");
char cspace = ' ';
for (size_t i = 0; i < str_key.size(); i++) {
res_key.append(&str_key[i], 1);
res_key.append(1, cspace);
}
out_file->Append(" ASCII ");
out_file->Append(res_key.c_str());
out_file->Append("\n ------\n");
}
out_file->Append("\n");
return Status::OK();
}
Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
std::unique_ptr<Iterator> blockhandles_iter(NewIndexIterator(ReadOptions()));
Status s = blockhandles_iter->status();
if (!s.ok()) {
out_file->Append("Can not read Index Block \n\n");
return s;
}
size_t block_id = 1;
for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
block_id++, blockhandles_iter->Next()) {
s = blockhandles_iter->status();
if (!s.ok()) {
break;
}
out_file->Append("Data Block # ");
out_file->Append(std::to_string(block_id));
out_file->Append(" @ ");
out_file->Append(blockhandles_iter->value().ToString(true).c_str());
out_file->Append("\n");
out_file->Append("--------------------------------------\n");
std::unique_ptr<Iterator> datablock_iter;
datablock_iter.reset(
NewDataBlockIterator(rep_, ReadOptions(), blockhandles_iter->value()));
s = datablock_iter->status();
if (!s.ok()) {
out_file->Append("Error reading the block - Skipped \n\n");
continue;
}
for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
datablock_iter->Next()) {
s = datablock_iter->status();
if (!s.ok()) {
out_file->Append("Error reading the block - Skipped \n");
break;
}
Slice key = datablock_iter->key();
Slice value = datablock_iter->value();
InternalKey ikey, iValue;
ikey.DecodeFrom(key);
iValue.DecodeFrom(value);
out_file->Append(" HEX ");
out_file->Append(ikey.user_key().ToString(true).c_str());
out_file->Append(": ");
out_file->Append(iValue.user_key().ToString(true).c_str());
out_file->Append("\n");
std::string str_key = ikey.user_key().ToString();
std::string str_value = iValue.user_key().ToString();
std::string res_key(""), res_value("");
char cspace = ' ';
for (size_t i = 0; i < str_key.size(); i++) {
res_key.append(&str_key[i], 1);
res_key.append(1, cspace);
}
for (size_t i = 0; i < str_value.size(); i++) {
res_value.append(&str_value[i], 1);
res_value.append(1, cspace);
}
out_file->Append(" ASCII ");
out_file->Append(res_key.c_str());
out_file->Append(": ");
out_file->Append(res_value.c_str());
out_file->Append("\n ------\n");
}
out_file->Append("\n");
}
return Status::OK();
}
} // namespace rocksdb

@ -100,6 +100,9 @@ class BlockBasedTable : public TableReader {
size_t ApproximateMemoryUsage() const override;
// convert SST file to a human readable form
Status DumpTable(WritableFile* out_file) override;
~BlockBasedTable();
bool TEST_filter_block_preloaded() const;
@ -204,6 +207,10 @@ class BlockBasedTable : public TableReader {
// For Posix files the unique ID is three varints.
static const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1;
// Helper functions for DumpTable()
Status DumpIndexBlock(WritableFile* out_file);
Status DumpDataBlocks(WritableFile* out_file);
// No copying allowed
explicit BlockBasedTable(const TableReader&) = delete;
void operator=(const TableReader&) = delete;

@ -75,6 +75,12 @@ class FilterBlockReader {
uint64_t block_offset = kNotValid) = 0;
virtual size_t ApproximateMemoryUsage() const = 0;
// convert this object to a human readable form
virtual std::string ToString() const {
std::string error_msg("Unsupported filter \n");
return error_msg;
}
private:
// No copying allowed
FilterBlockReader(const FilterBlockReader&);

@ -51,6 +51,25 @@ Status BlockHandle::DecodeFrom(Slice* input) {
return Status::Corruption("bad block handle");
}
}
// Return a string that contains the copy of handle.
std::string BlockHandle::ToString(bool hex) const {
std::string handle_str;
EncodeTo(&handle_str);
if (hex) {
std::string result;
char buf[10];
for (size_t i = 0; i < handle_str.size(); i++) {
snprintf(buf, sizeof(buf), "%02X",
static_cast<unsigned char>(handle_str[i]));
result += buf;
}
return result;
} else {
return handle_str;
}
}
const BlockHandle BlockHandle::kNullBlockHandle(0, 0);
// legacy footer format:
@ -179,6 +198,27 @@ Status Footer::DecodeFrom(Slice* input) {
return result;
}
std::string Footer::ToString() const {
std::string result, handle_;
result.reserve(1024);
bool legacy = IsLegacyFooterFormat(table_magic_number_);
if (legacy) {
result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n ");
result.append("index handle: " + index_handle_.ToString() + "\n ");
result.append("table_magic_number: " + std::to_string(table_magic_number_) +
"\n ");
} else {
result.append("checksum: " + std::to_string(checksum_) + "\n ");
result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n ");
result.append("index handle: " + index_handle_.ToString() + "\n ");
result.append("footer version: " + std::to_string(version_) + "\n ");
result.append("table_magic_number: " + std::to_string(table_magic_number_) +
"\n ");
}
return result;
}
Status ReadFooterFromFile(RandomAccessFile* file,
uint64_t file_size,
Footer* footer) {

@ -42,6 +42,9 @@ class BlockHandle {
void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input);
// Return a string that contains the copy of handle.
std::string ToString(bool hex = true) const;
// if the block handle's offset and size are both "0", we will view it
// as a null block handle that points to no where.
bool IsNull() const {
@ -129,6 +132,9 @@ class Footer {
static const uint64_t kInvalidTableMagicNumber = 0;
// convert this object to a human readable form
std::string ToString() const;
private:
// REQUIRES: magic number wasn't initialized.
void set_table_magic_number(uint64_t magic_number) {

@ -67,6 +67,11 @@ class TableReader {
// key is the key to search for
virtual Status Get(const ReadOptions& readOptions, const Slice& key,
GetContext* get_context) = 0;
// convert db file to a human readable form
virtual Status DumpTable(WritableFile* out_file) {
return Status::NotSupported("DumpTable() not supported");
}
};
} // namespace rocksdb

@ -0,0 +1,152 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <stdint.h>
#include "rocksdb/sst_dump_tool.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based_table_factory.h"
#include "table/table_builder.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb {
const uint32_t optLength = 100;
namespace {
static std::string MakeKey(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "k_%04d", i);
InternalKey key(std::string(buf), 0, ValueType::kTypeValue);
return key.Encode().ToString();
}
static std::string MakeValue(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "v_%04d", i);
InternalKey key(std::string(buf), 0, ValueType::kTypeValue);
return key.Encode().ToString();
}
void createSST(const std::string& file_name,
const BlockBasedTableOptions& table_options) {
std::shared_ptr<rocksdb::TableFactory> tf;
tf.reset(new rocksdb::BlockBasedTableFactory(table_options));
unique_ptr<WritableFile> file;
Env* env = Env::Default();
EnvOptions env_options;
ReadOptions read_options;
Options opts;
const ImmutableCFOptions imoptions(opts);
rocksdb::InternalKeyComparator ikc(opts.comparator);
TableBuilder* tb = nullptr;
env->NewWritableFile(file_name, &file, env_options);
opts.table_factory = tf;
tb = opts.table_factory->NewTableBuilder(imoptions, ikc, file.get(),
CompressionType::kNoCompression,
CompressionOptions());
// Populate slightly more than 1K keys
uint32_t num_keys = 1024;
for (uint32_t i = 0; i < num_keys; i++) {
tb->Add(MakeKey(i), MakeValue(i));
}
tb->Finish();
file->Close();
}
void cleanup(const std::string& file_name) {
Env* env = Env::Default();
env->DeleteFile(file_name);
std::string outfile_name = file_name.substr(0, file_name.length() - 4);
outfile_name.append("_dump.txt");
env->DeleteFile(outfile_name);
}
} // namespace
// Test for sst dump tool "raw" mode
class SSTDumpToolTest {
public:
BlockBasedTableOptions table_options_;
SSTDumpToolTest() {}
~SSTDumpToolTest() {}
};
TEST(SSTDumpToolTest, EmptyFilter) {
std::string file_name = "rocksdb_sst_test.sst";
createSST(file_name, table_options_);
char* usage[3];
for (int i = 0; i < 3; i++) {
usage[i] = new char[optLength];
}
snprintf(usage[0], optLength, "./sst_dump");
snprintf(usage[1], optLength, "--command=raw");
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
rocksdb::SSTDumpTool tool;
ASSERT_TRUE(!tool.Run(3, usage));
cleanup(file_name);
for (int i = 0; i < 3; i++) {
delete[] usage[i];
}
}
TEST(SSTDumpToolTest, FilterBlock) {
table_options_.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
std::string file_name = "rocksdb_sst_test.sst";
createSST(file_name, table_options_);
char* usage[3];
for (int i = 0; i < 3; i++) {
usage[i] = new char[optLength];
}
snprintf(usage[0], optLength, "./sst_dump");
snprintf(usage[1], optLength, "--command=raw");
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
rocksdb::SSTDumpTool tool;
ASSERT_TRUE(!tool.Run(3, usage));
cleanup(file_name);
for (int i = 0; i < 3; i++) {
delete[] usage[i];
}
}
TEST(SSTDumpToolTest, FullFilterBlock) {
table_options_.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
std::string file_name = "rocksdb_sst_test.sst";
createSST(file_name, table_options_);
char* usage[3];
for (int i = 0; i < 3; i++) {
usage[i] = new char[optLength];
}
snprintf(usage[0], optLength, "./sst_dump");
snprintf(usage[1], optLength, "--command=raw");
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
rocksdb::SSTDumpTool tool;
ASSERT_TRUE(!tool.Run(3, usage));
cleanup(file_name);
for (int i = 0; i < 3; i++) {
delete[] usage[i];
}
}
} // namespace rocksdb
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }

@ -73,6 +73,15 @@ Status SstFileReader::NewTableReader(const std::string& file_path) {
return s;
}
Status SstFileReader::DumpTable(const std::string& out_filename) {
unique_ptr<WritableFile> out_file;
Env* env = Env::Default();
env->NewWritableFile(out_filename, &out_file, soptions_);
Status s = table_reader_->DumpTable(out_file.get());
out_file->Close();
return s;
}
Status SstFileReader::ReadTableProperties(uint64_t table_magic_number,
RandomAccessFile* file,
uint64_t file_size) {
@ -206,7 +215,7 @@ namespace {
void print_help() {
fprintf(stderr,
"sst_dump [--command=check|scan|none] [--verify_checksum] "
"sst_dump [--command=check|scan|none|raw] [--verify_checksum] "
"--file=data_dir_OR_sst_file"
" [--output_hex]"
" [--input_key_hex]"
@ -235,7 +244,7 @@ string HexToString(const string& str) {
} // namespace
void SSTDumpTool::Run(int argc, char** argv) {
int SSTDumpTool::Run(int argc, char** argv) {
const char* dir_or_file = nullptr;
uint64_t read_num = -1;
std::string command;
@ -318,8 +327,29 @@ void SSTDumpTool::Run(int argc, char** argv) {
if (dir) {
filename = std::string(dir_or_file) + "/" + filename;
}
rocksdb::SstFileReader reader(filename, verify_checksum,
output_hex);
if (!reader.getStatus().ok()) {
fprintf(stderr, "%s: %s\n", filename.c_str(),
reader.getStatus().ToString().c_str());
exit(1);
}
if (command == "raw") {
std::string out_filename = filename.substr(0, filename.length() - 4);
out_filename.append("_dump.txt");
st = reader.DumpTable(out_filename);
if (!st.ok()) {
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
exit(1);
} else {
fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]);
}
continue;
}
// scan all files in give file path.
if (command == "" || command == "scan" || command == "check") {
st = reader.ReadSequential(command != "check",
@ -360,6 +390,7 @@ void SSTDumpTool::Run(int argc, char** argv) {
}
}
}
return 0;
}
} // namespace rocksdb

@ -49,6 +49,9 @@ class SstFileReader {
uint64_t GetReadNumber() { return read_num_; }
TableProperties* GetInitTableProperties() { return table_properties_.get(); }
Status DumpTable(const std::string& out_filename);
Status getStatus() { return init_result_; }
private:
Status NewTableReader(const std::string& file_path);
Status ReadTableProperties(uint64_t table_magic_number,

Loading…
Cancel
Save