diff --git a/.gitignore b/.gitignore index 462f5b3c2..87d5b98a4 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ build/ ldb manifest_dump sst_dump +blob_dump column_aware_encoding_exp util/build_version.cc build_tools/VALGRIND_LOGS/ diff --git a/Makefile b/Makefile index f6a39892d..33930d8ac 100644 --- a/Makefile +++ b/Makefile @@ -485,7 +485,8 @@ TOOLS = \ ldb \ db_repl_stress \ rocksdb_dump \ - rocksdb_undump + rocksdb_undump \ + blob_dump \ TEST_LIBS = \ librocksdb_env_basic_test.a @@ -1343,6 +1344,9 @@ transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TEST sst_dump: tools/sst_dump.o $(LIBOBJECTS) $(AM_LINK) +blob_dump: tools/blob_dump.o $(LIBOBJECTS) + $(AM_LINK) + column_aware_encoding_exp: utilities/column_aware_encoding_exp.o $(EXPOBJECTS) $(AM_LINK) diff --git a/src.mk b/src.mk index fa1c376fe..8024eb0a4 100644 --- a/src.mk +++ b/src.mk @@ -202,6 +202,7 @@ TOOL_LIB_SOURCES = \ tools/ldb_cmd.cc \ tools/ldb_tool.cc \ tools/sst_dump_tool.cc \ + utilities/blob_db/blob_dump_tool.cc \ MOCK_LIB_SOURCES = \ env/mock_env.cc \ diff --git a/tools/blob_dump.cc b/tools/blob_dump.cc new file mode 100644 index 000000000..9b9e91304 --- /dev/null +++ b/tools/blob_dump.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. + +#ifndef ROCKSDB_LITE +#include +#include +#include +#include + +#include "utilities/blob_db/blob_dump_tool.h" + +using namespace rocksdb; +using namespace rocksdb::blob_db; + +int main(int argc, char** argv) { + using DisplayType = BlobDumpTool::DisplayType; + const std::unordered_map display_types = { + {"none", DisplayType::kNone}, + {"raw", DisplayType::kRaw}, + {"hex", DisplayType::kHex}, + {"detail", DisplayType::kDetail}, + }; + const struct option options[] = { + {"help", no_argument, nullptr, 'h'}, + {"file", required_argument, nullptr, 'f'}, + {"show_key", optional_argument, nullptr, 'k'}, + {"show_blob", optional_argument, nullptr, 'b'}, + }; + DisplayType show_key = DisplayType::kRaw; + DisplayType show_blob = DisplayType::kNone; + std::string file; + while (true) { + int c = getopt_long(argc, argv, "hk::b::f:", options, nullptr); + if (c < 0) { + break; + } + std::string arg_str(optarg ? optarg : ""); + switch (c) { + case 'h': + fprintf(stdout, + "Usage: blob_dump --file=filename " + "[--show_key[=none|raw|hex|detail]] " + "[--show_blob[=none|raw|hex|detail]]\n"); + return 0; + case 'f': + file = optarg; + break; + case 'k': + if (optarg) { + if (display_types.count(arg_str) == 0) { + fprintf(stderr, "Unrecognized key display type.\n"); + return -1; + } + show_key = display_types.at(arg_str); + } + break; + case 'b': + if (optarg) { + if (display_types.count(arg_str) == 0) { + fprintf(stderr, "Unrecognized blob display type.\n"); + return -1; + } + show_blob = display_types.at(arg_str); + } else { + show_blob = DisplayType::kDetail; + } + break; + default: + fprintf(stderr, "Unrecognized option.\n"); + return -1; + } + } + BlobDumpTool tool; + Status s = tool.Run(file, show_key, show_blob); + if (!s.ok()) { + fprintf(stderr, "Failed: %s\n", s.ToString().c_str()); + return -1; + } + return 0; +} +#else +#include +int main(int argc, char** argv) { + fprintf(stderr, "Not supported in lite mode.\n"); + return -1; +} +#endif // ROCKSDB_LITE diff --git a/utilities/blob_db/blob_dump_tool.cc b/utilities/blob_db/blob_dump_tool.cc new file mode 100644 index 000000000..2789cfa78 --- /dev/null +++ b/utilities/blob_db/blob_dump_tool.cc @@ -0,0 +1,255 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. +#ifndef ROCKSDB_LITE +#include "utilities/blob_db/blob_dump_tool.h" +#include +#include +#include +#include +#include +#include "port/port.h" +#include "rocksdb/convenience.h" +#include "rocksdb/env.h" +#include "util/coding.h" +#include "util/crc32c.h" +#include "util/string_util.h" + +namespace rocksdb { +namespace blob_db { + +BlobDumpTool::BlobDumpTool() + : reader_(nullptr), buffer_(nullptr), buffer_size_(0) {} + +Status BlobDumpTool::Run(const std::string& filename, DisplayType show_key, + DisplayType show_blob) { + Status s; + Env* env = Env::Default(); + s = env->FileExists(filename); + if (!s.ok()) { + return s; + } + uint64_t file_size = 0; + s = env->GetFileSize(filename, &file_size); + if (!s.ok()) { + return s; + } + std::unique_ptr file; + s = env->NewRandomAccessFile(filename, &file, EnvOptions()); + if (!s.ok()) { + return s; + } + if (file_size == 0) { + return Status::Corruption("File is empty."); + } + reader_.reset(new RandomAccessFileReader(std::move(file))); + uint64_t offset = 0; + uint64_t footer_offset = 0; + s = DumpBlobLogHeader(&offset); + if (!s.ok()) { + return s; + } + s = DumpBlobLogFooter(file_size, &footer_offset); + if (!s.ok()) { + return s; + } + if (show_key != DisplayType::kNone) { + while (offset < footer_offset) { + s = DumpRecord(show_key, show_blob, &offset); + if (!s.ok()) { + return s; + } + } + } + return s; +} + +Status BlobDumpTool::Read(uint64_t offset, size_t size, Slice* result) { + if (buffer_size_ < size) { + if (buffer_size_ == 0) { + buffer_size_ = 4096; + } + while (buffer_size_ < size) { + buffer_size_ *= 2; + } + buffer_.reset(new char[buffer_size_]); + } + Status s = reader_->Read(offset, size, result, buffer_.get()); + if (!s.ok()) { + return s; + } + if (result->size() != size) { + return Status::Corruption("Reach the end of the file unexpectedly."); + } + return s; +} + +Status BlobDumpTool::DumpBlobLogHeader(uint64_t* offset) { + Slice slice; + Status s = Read(0, BlobLogHeader::kHeaderSize, &slice); + if (!s.ok()) { + return s; + } + BlobLogHeader header; + s = header.DecodeFrom(slice); + if (!s.ok()) { + return s; + } + fprintf(stdout, "Blob log header:\n"); + fprintf(stdout, " Magic Number : %u\n", header.magic_number()); + fprintf(stdout, " Version : %d\n", header.version()); + CompressionType compression = header.compression(); + std::string compression_str; + if (!GetStringFromCompressionType(&compression_str, compression).ok()) { + compression_str = "Unrecongnized compression type (" + + ToString((int)header.compression()) + ")"; + } + fprintf(stdout, " Compression : %s\n", compression_str.c_str()); + fprintf(stdout, " TTL Range : %s\n", + GetString(header.ttl_range()).c_str()); + fprintf(stdout, " Timestamp Range: %s\n", + GetString(header.ts_range()).c_str()); + *offset = BlobLogHeader::kHeaderSize; + return s; +} + +Status BlobDumpTool::DumpBlobLogFooter(uint64_t file_size, + uint64_t* footer_offset) { + auto no_footer = [&]() { + *footer_offset = file_size; + fprintf(stdout, "No blob log footer.\n"); + return Status::OK(); + }; + if (file_size < BlobLogHeader::kHeaderSize + BlobLogFooter::kFooterSize) { + return no_footer(); + } + Slice slice; + Status s = Read(file_size - 4, 4, &slice); + if (!s.ok()) { + return s; + } + uint32_t magic_number = DecodeFixed32(slice.data()); + if (magic_number != kMagicNumber) { + return no_footer(); + } + *footer_offset = file_size - BlobLogFooter::kFooterSize; + s = Read(*footer_offset, BlobLogFooter::kFooterSize, &slice); + if (!s.ok()) { + return s; + } + BlobLogFooter footer; + s = footer.DecodeFrom(slice); + if (!s.ok()) { + return s; + } + fprintf(stdout, "Blob log footer:\n"); + fprintf(stdout, " Blob count : %" PRIu64 "\n", footer.GetBlobCount()); + fprintf(stdout, " TTL Range : %s\n", + GetString(footer.GetTTLRange()).c_str()); + fprintf(stdout, " Time Range : %s\n", + GetString(footer.GetTimeRange()).c_str()); + fprintf(stdout, " Sequence Range : %s\n", + GetString(footer.GetSNRange()).c_str()); + return s; +} + +Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob, + uint64_t* offset) { + fprintf(stdout, "Read record with offset 0x%" PRIx64 " (%" PRIu64 "):\n", + *offset, *offset); + Slice slice; + Status s = Read(*offset, BlobLogRecord::kHeaderSize, &slice); + if (!s.ok()) { + return s; + } + BlobLogRecord record; + s = record.DecodeHeaderFrom(slice); + if (!s.ok()) { + return s; + } + uint32_t key_size = record.GetKeySize(); + uint64_t blob_size = record.GetBlobSize(); + fprintf(stdout, " key size : %d\n", key_size); + fprintf(stdout, " blob size : %" PRIu64 "\n", record.GetBlobSize()); + fprintf(stdout, " TTL : %u\n", record.GetTTL()); + fprintf(stdout, " time : %" PRIu64 "\n", record.GetTimeVal()); + fprintf(stdout, " type : %d, %d\n", record.type(), record.subtype()); + fprintf(stdout, " header CRC : %u\n", record.header_checksum()); + fprintf(stdout, " CRC : %u\n", record.checksum()); + uint32_t header_crc = + crc32c::Extend(0, slice.data(), slice.size() - 2 * sizeof(uint32_t)); + *offset += BlobLogRecord::kHeaderSize; + s = Read(*offset, key_size + blob_size + BlobLogRecord::kFooterSize, &slice); + if (!s.ok()) { + return s; + } + header_crc = crc32c::Extend(header_crc, slice.data(), key_size); + header_crc = crc32c::Mask(header_crc); + if (header_crc != record.header_checksum()) { + return Status::Corruption("Record header checksum mismatch."); + } + uint32_t blob_crc = crc32c::Extend(0, slice.data() + key_size, blob_size); + blob_crc = crc32c::Mask(blob_crc); + if (blob_crc != record.checksum()) { + return Status::Corruption("Blob checksum mismatch."); + } + if (show_key != DisplayType::kNone) { + fprintf(stdout, " key : "); + DumpSlice(Slice(slice.data(), key_size), show_key); + if (show_blob != DisplayType::kNone) { + fprintf(stdout, " blob : "); + DumpSlice(Slice(slice.data() + key_size, blob_size), show_blob); + } + } + Slice footer_slice(slice.data() + record.GetKeySize() + record.GetBlobSize(), + BlobLogRecord::kFooterSize); + s = record.DecodeFooterFrom(footer_slice); + if (!s.ok()) { + return s; + } + fprintf(stdout, " footer CRC : %u\n", record.footer_checksum()); + fprintf(stdout, " sequence : %" PRIu64 "\n", record.GetSN()); + *offset += key_size + blob_size + BlobLogRecord::kFooterSize; + return s; +} + +void BlobDumpTool::DumpSlice(const Slice s, DisplayType type) { + if (type == DisplayType::kRaw) { + fprintf(stdout, "%s\n", s.ToString().c_str()); + } else if (type == DisplayType::kHex) { + fprintf(stdout, "%s\n", s.ToString(true /*hex*/).c_str()); + } else if (type == DisplayType::kDetail) { + char buf[100]; + for (size_t i = 0; i < s.size(); i += 16) { + memset(buf, 0, sizeof(buf)); + for (size_t j = 0; j < 16 && i + j < s.size(); j++) { + unsigned char c = s[i + j]; + snprintf(buf + j * 3 + 15, 2, "%x", c >> 4); + snprintf(buf + j * 3 + 16, 2, "%x", c & 0xf); + snprintf(buf + j + 65, 2, "%c", (0x20 <= c && c <= 0x7e) ? c : '.'); + } + for (size_t p = 0; p < sizeof(buf) - 1; p++) { + if (buf[p] == 0) { + buf[p] = ' '; + } + } + fprintf(stdout, "%s\n", i == 0 ? buf + 15 : buf); + } + } +} + +template +std::string BlobDumpTool::GetString(std::pair p) { + if (p.first == 0 && p.second == 0) { + return "nil"; + } + return "(" + ToString(p.first) + ", " + ToString(p.second) + ")"; +} + +} // namespace blob_db +} // namespace rocksdb + +#endif // ROCKSDB_LITE diff --git a/utilities/blob_db/blob_dump_tool.h b/utilities/blob_db/blob_dump_tool.h new file mode 100644 index 000000000..87d291b18 --- /dev/null +++ b/utilities/blob_db/blob_dump_tool.h @@ -0,0 +1,54 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. +#pragma once +#ifndef ROCKSDB_LITE + +#include +#include +#include +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "util/file_reader_writer.h" +#include "utilities/blob_db/blob_log_format.h" + +namespace rocksdb { +namespace blob_db { + +class BlobDumpTool { + public: + enum class DisplayType { + kNone, + kRaw, + kHex, + kDetail, + }; + + BlobDumpTool(); + + Status Run(const std::string& filename, DisplayType key_type, + DisplayType blob_type); + + private: + std::unique_ptr reader_; + std::unique_ptr buffer_; + size_t buffer_size_; + + Status Read(uint64_t offset, size_t size, Slice* result); + Status DumpBlobLogHeader(uint64_t* offset); + Status DumpBlobLogFooter(uint64_t file_size, uint64_t* footer_offset); + Status DumpRecord(DisplayType show_key, DisplayType show_blob, + uint64_t* offset); + void DumpSlice(const Slice s, DisplayType type); + + template + std::string GetString(std::pair p); +}; + +} // namespace blob_db +} // namespace rocksdb + +#endif // ROCKSDB_LITE diff --git a/utilities/blob_db/blob_file.cc b/utilities/blob_db/blob_file.cc index 411de255d..83032ab3e 100644 --- a/utilities/blob_db/blob_file.cc +++ b/utilities/blob_db/blob_file.cc @@ -153,7 +153,7 @@ Status BlobFile::ReadFooter(BlobLogFooter* bf) { return Status::IOError("EOF reached before footer"); } - s = bf->DecodeFrom(&result); + s = bf->DecodeFrom(result); return s; } diff --git a/utilities/blob_db/blob_log_format.cc b/utilities/blob_db/blob_log_format.cc index 051e9bb01..1e55f8e6d 100644 --- a/utilities/blob_db/blob_log_format.cc +++ b/utilities/blob_db/blob_log_format.cc @@ -32,9 +32,10 @@ BlobLogHeader& BlobLogHeader::operator=(BlobLogHeader&& in) noexcept { BlobLogFooter::BlobLogFooter() : magic_number_(kMagicNumber), blob_count_(0) {} -Status BlobLogFooter::DecodeFrom(Slice* input) { +Status BlobLogFooter::DecodeFrom(const Slice& input) { + Slice slice(input); uint32_t val; - if (!GetFixed32(input, &val)) { + if (!GetFixed32(&slice, &val)) { return Status::Corruption("Invalid Blob Footer: flags"); } @@ -55,33 +56,34 @@ Status BlobLogFooter::DecodeFrom(Slice* input) { return Status::Corruption("Invalid Blob Footer: flags_val"); } - if (!GetFixed64(input, &blob_count_)) { + if (!GetFixed64(&slice, &blob_count_)) { return Status::Corruption("Invalid Blob Footer: blob_count"); } ttlrange_t temp_ttl; - if (!GetFixed32(input, &temp_ttl.first) || - !GetFixed32(input, &temp_ttl.second)) { + if (!GetFixed32(&slice, &temp_ttl.first) || + !GetFixed32(&slice, &temp_ttl.second)) { return Status::Corruption("Invalid Blob Footer: ttl_range"); } if (has_ttl) { - printf("has ttl\n"); ttl_range_.reset(new ttlrange_t(temp_ttl)); } - if (!GetFixed64(input, &sn_range_.first) || - !GetFixed64(input, &sn_range_.second)) { + if (!GetFixed64(&slice, &sn_range_.first) || + !GetFixed64(&slice, &sn_range_.second)) { return Status::Corruption("Invalid Blob Footer: sn_range"); } tsrange_t temp_ts; - if (!GetFixed64(input, &temp_ts.first) || - !GetFixed64(input, &temp_ts.second)) { + if (!GetFixed64(&slice, &temp_ts.first) || + !GetFixed64(&slice, &temp_ts.second)) { return Status::Corruption("Invalid Blob Footer: ts_range"); } - if (has_ts) ts_range_.reset(new tsrange_t(temp_ts)); + if (has_ts) { + ts_range_.reset(new tsrange_t(temp_ts)); + } - if (!GetFixed32(input, &magic_number_) || magic_number_ != kMagicNumber) { + if (!GetFixed32(&slice, &magic_number_) || magic_number_ != kMagicNumber) { return Status::Corruption("Invalid Blob Footer: magic"); } @@ -163,18 +165,19 @@ void BlobLogHeader::EncodeTo(std::string* dst) const { } } -Status BlobLogHeader::DecodeFrom(Slice* input) { - if (!GetFixed32(input, &magic_number_) || magic_number_ != kMagicNumber) { +Status BlobLogHeader::DecodeFrom(const Slice& input) { + Slice slice(input); + if (!GetFixed32(&slice, &magic_number_) || magic_number_ != kMagicNumber) { return Status::Corruption("Invalid Blob Log Header: magic"); } // as of today, we only support 1 version - if (!GetFixed32(input, &version_) || version_ != kVersion1) { + if (!GetFixed32(&slice, &version_) || version_ != kVersion1) { return Status::Corruption("Invalid Blob Log Header: version"); } uint32_t val; - if (!GetFixed32(input, &val)) { + if (!GetFixed32(&slice, &val)) { return Status::Corruption("Invalid Blob Log Header: subtype"); } @@ -196,15 +199,15 @@ Status BlobLogHeader::DecodeFrom(Slice* input) { } ttlrange_t temp_ttl; - if (!GetFixed32(input, &temp_ttl.first) || - !GetFixed32(input, &temp_ttl.second)) { + if (!GetFixed32(&slice, &temp_ttl.first) || + !GetFixed32(&slice, &temp_ttl.second)) { return Status::Corruption("Invalid Blob Log Header: ttl"); } if (has_ttl) set_ttl_guess(temp_ttl); tsrange_t temp_ts; - if (!GetFixed64(input, &temp_ts.first) || - !GetFixed64(input, &temp_ts.second)) { + if (!GetFixed64(&slice, &temp_ts.first) || + !GetFixed64(&slice, &temp_ts.second)) { return Status::Corruption("Invalid Blob Log Header: timestamp"); } if (has_ts) set_ts_guess(temp_ts); diff --git a/utilities/blob_db/blob_log_format.h b/utilities/blob_db/blob_log_format.h index c688ed400..4f6896455 100644 --- a/utilities/blob_db/blob_log_format.h +++ b/utilities/blob_db/blob_log_format.h @@ -70,16 +70,35 @@ class BlobLogHeader { void set_ts_guess(const tsrange_t& ts) { ts_guess_.reset(new tsrange_t(ts)); } public: - // magic number + version + flags + ttl guess + timestamp range + // magic number + version + flags + ttl guess + timestamp range = 36 static const size_t kHeaderSize = 4 + 4 + 4 + 4 * 2 + 8 * 2; - // 32 void EncodeTo(std::string* dst) const; - Status DecodeFrom(Slice* input); + Status DecodeFrom(const Slice& input); BlobLogHeader(); + uint32_t magic_number() const { return magic_number_; } + + uint32_t version() const { return version_; } + + CompressionType compression() const { return compression_; } + + ttlrange_t ttl_range() const { + if (!ttl_guess_) { + return {0, 0}; + } + return *ttl_guess_; + } + + tsrange_t ts_range() const { + if (!ts_guess_) { + return {0, 0}; + } + return *ts_guess_; + } + bool HasTTL() const { return !!ttl_guess_; } bool HasTimestamp() const { return !!ts_guess_; } @@ -97,11 +116,11 @@ class BlobLogFooter { // EncodeTo(). Never use this constructor with DecodeFrom(). BlobLogFooter(); - uint64_t magic_number() const { return magic_number_; } + uint32_t magic_number() const { return magic_number_; } void EncodeTo(std::string* dst) const; - Status DecodeFrom(Slice* input); + Status DecodeFrom(const Slice& input); // convert this object to a human readable form std::string ToString() const; @@ -214,8 +233,18 @@ class BlobLogRecord { uint64_t GetTimeVal() const { return time_val_; } + char type() const { return type_; } + + char subtype() const { return subtype_; } + SequenceNumber GetSN() const { return sn_; } + uint32_t header_checksum() const { return header_cksum_; } + + uint32_t checksum() const { return checksum_; } + + uint32_t footer_checksum() const { return footer_cksum_; } + Status DecodeHeaderFrom(const Slice& hdrslice); Status DecodeFooterFrom(const Slice& footerslice); diff --git a/utilities/blob_db/blob_log_reader.cc b/utilities/blob_db/blob_log_reader.cc index c93a520ae..19c9bf325 100644 --- a/utilities/blob_db/blob_log_reader.cc +++ b/utilities/blob_db/blob_log_reader.cc @@ -36,7 +36,7 @@ Status Reader::ReadHeader(BlobLogHeader* header) { return Status::IOError("EOF reached before file header"); } - status = header->DecodeFrom(&buffer_); + status = header->DecodeFrom(buffer_); return status; } @@ -55,7 +55,9 @@ Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level, } status = record->DecodeHeaderFrom(buffer_); - if (!status.ok()) return status; + if (!status.ok()) { + return status; + } uint32_t header_crc = 0; uint32_t blob_crc = 0; diff --git a/utilities/blob_db/blob_log_reader.h b/utilities/blob_db/blob_log_reader.h index 5805ceb5e..cff26ed6a 100644 --- a/utilities/blob_db/blob_log_reader.h +++ b/utilities/blob_db/blob_log_reader.h @@ -70,6 +70,8 @@ class Reader { uint64_t GetNextByte() const { return next_byte_; } + const SequentialFileReader* file_reader() const { return file_.get(); } + private: char* GetReadBuffer() { return &(backing_store_[0]); }