Add support for plain table format to sst_dump.

Summary:
This diff enables the command line tool `sst_dump` to work for sst files
under plain table format.  Changes include:
  * In tools/sst_dump.cc:
    - add support for plain table format
    - display prefix_extractor information when --show_properties is on
  * In table/format.cc
    - Now the table magic number of a Footer can be later initialized
      via ReadFooterFromFile().
  * In table/meta_bocks:
    - add function ReadTableMagicNumber() that reads the magic number of
      the specified file.

Minor fixes:
 - remove a duplicate #include in table/table_test.cc
 - fix a commentary typo in include/rocksdb/memtablerep.h
 - fix lint errors.

Test Plan:
Runs sst_dump with both block-based and plain-table format files with
different arguments, specifically those with --show-properties and --from.

* sample output:
  https://reviews.facebook.net/P261

Reviewers: kailiu, sdong, xjin

CC: leveldb

Differential Revision: https://reviews.facebook.net/D15903
main
Yueh-Hsuan Chiang 11 years ago
parent 1560bb913e
commit 3ce8d9a988
  1. 17
      include/rocksdb/memtablerep.h
  2. 22
      table/format.cc
  3. 45
      table/format.h
  4. 105
      table/meta_blocks.cc
  5. 7
      table/meta_blocks.h
  6. 51
      table/table_properties.cc
  7. 117
      table/table_test.cc
  8. 72
      tools/sst_dump.cc

@ -21,7 +21,7 @@
// types built in: // types built in:
// - SkipListRep: This is the default; it is backed by a skip list. // - SkipListRep: This is the default; it is backed by a skip list.
// - HashSkipListRep: The memtable rep that is best used for keys that are // - HashSkipListRep: The memtable rep that is best used for keys that are
// structured like "prefix:suffix" where iteration withing a prefix is // structured like "prefix:suffix" where iteration within a prefix is
// common and iteration across different prefixes is rare. It is backed by // common and iteration across different prefixes is rare. It is backed by
// a hash map where each bucket is a skip list. // a hash map where each bucket is a skip list.
// - VectorRep: This is backed by an unordered std::vector. On iteration, the // - VectorRep: This is backed by an unordered std::vector. On iteration, the
@ -85,7 +85,7 @@ class MemTableRep {
// Initialize an iterator over the specified collection. // Initialize an iterator over the specified collection.
// The returned iterator is not valid. // The returned iterator is not valid.
// explicit Iterator(const MemTableRep* collection); // explicit Iterator(const MemTableRep* collection);
virtual ~Iterator() { }; virtual ~Iterator() {}
// Returns true iff the iterator is positioned at a valid node. // Returns true iff the iterator is positioned at a valid node.
virtual bool Valid() const = 0; virtual bool Valid() const = 0;
@ -143,7 +143,7 @@ class MemTableRep {
// new MemTableRep objects // new MemTableRep objects
class MemTableRepFactory { class MemTableRepFactory {
public: public:
virtual ~MemTableRepFactory() { }; virtual ~MemTableRepFactory() {}
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&, virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
Arena*) = 0; Arena*) = 0;
virtual const char* Name() const = 0; virtual const char* Name() const = 0;
@ -159,7 +159,8 @@ class MemTableRepFactory {
// bytes reserved for usage. // bytes reserved for usage.
class VectorRepFactory : public MemTableRepFactory { class VectorRepFactory : public MemTableRepFactory {
const size_t count_; const size_t count_;
public:
public:
explicit VectorRepFactory(size_t count = 0) : count_(count) { } explicit VectorRepFactory(size_t count = 0) : count_(count) { }
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&, virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
Arena*) override; Arena*) override;
@ -170,9 +171,9 @@ public:
// This uses a skip list to store keys. It is the default. // This uses a skip list to store keys. It is the default.
class SkipListFactory : public MemTableRepFactory { class SkipListFactory : public MemTableRepFactory {
public: public:
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&, virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
Arena*) override; Arena*) override;
virtual const char* Name() const override { virtual const char* Name() const override {
return "SkipListFactory"; return "SkipListFactory";
} }
@ -196,4 +197,4 @@ extern MemTableRepFactory* NewHashSkipListRepFactory(
extern MemTableRepFactory* NewHashLinkListRepFactory( extern MemTableRepFactory* NewHashLinkListRepFactory(
const SliceTransform* transform, size_t bucket_count = 50000); const SliceTransform* transform, size_t bucket_count = 50000);
} } // namespace rocksdb

@ -9,6 +9,8 @@
#include "table/format.h" #include "table/format.h"
#include <string>
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "table/block.h" #include "table/block.h"
@ -43,8 +45,8 @@ void Footer::EncodeTo(std::string* dst) const {
metaindex_handle_.EncodeTo(dst); metaindex_handle_.EncodeTo(dst);
index_handle_.EncodeTo(dst); index_handle_.EncodeTo(dst);
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu)); PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu));
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32)); PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32));
assert(dst->size() == original_size + kEncodedLength); assert(dst->size() == original_size + kEncodedLength);
} }
@ -52,13 +54,21 @@ Status Footer::DecodeFrom(Slice* input) {
assert(input != nullptr); assert(input != nullptr);
assert(input->size() >= kEncodedLength); assert(input->size() >= kEncodedLength);
const char* magic_ptr = input->data() + kEncodedLength - 8; const char* magic_ptr =
input->data() + kEncodedLength - kMagicNumberLengthByte;
const uint32_t magic_lo = DecodeFixed32(magic_ptr); const uint32_t magic_lo = DecodeFixed32(magic_ptr);
const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4);
const uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) | const uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) |
(static_cast<uint64_t>(magic_lo))); (static_cast<uint64_t>(magic_lo)));
if (magic != kTableMagicNumber) { if (HasInitializedTableMagicNumber()) {
return Status::InvalidArgument("not an sstable (bad magic number)"); if (magic != table_magic_number()) {
char buffer[80];
snprintf(buffer, sizeof(buffer) - 1,
"not an sstable (bad magic number --- %lx)", magic);
return Status::InvalidArgument(buffer);
}
} else {
set_table_magic_number(magic);
} }
Status result = metaindex_handle_.DecodeFrom(input); Status result = metaindex_handle_.DecodeFrom(input);
@ -221,7 +231,7 @@ Status UncompressBlockContents(const char* data, size_t n,
default: default:
return Status::Corruption("bad block type"); return Status::Corruption("bad block type");
} }
result->compression_type = kNoCompression; // not compressed any more result->compression_type = kNoCompression; // not compressed any more
return Status::OK(); return Status::OK();
} }

@ -21,6 +21,9 @@ class Block;
class RandomAccessFile; class RandomAccessFile;
struct ReadOptions; struct ReadOptions;
// the length of the magic number in bytes.
const int kMagicNumberLengthByte = 8;
// BlockHandle is a pointer to the extent of a file that stores a data // BlockHandle is a pointer to the extent of a file that stores a data
// block or a meta block. // block or a meta block.
class BlockHandle { class BlockHandle {
@ -63,12 +66,16 @@ class BlockHandle {
// end of every table file. // end of every table file.
class Footer { class Footer {
public: public:
// Constructs a footer without specifying its table magic number.
// In such case, the table magic number of such footer should be
// initialized via @ReadFooterFromFile().
Footer() : Footer(kInvalidTableMagicNumber) {}
// @table_magic_number serves two purposes: // @table_magic_number serves two purposes:
// 1. Identify different types of the tables. // 1. Identify different types of the tables.
// 2. Help us to identify if a given file is a valid sst. // 2. Help us to identify if a given file is a valid sst.
Footer(uint64_t table_magic_number) : explicit Footer(uint64_t table_magic_number)
kTableMagicNumber(table_magic_number) { : table_magic_number_(table_magic_number) {}
}
// The block handle for the metaindex block of the table // The block handle for the metaindex block of the table
const BlockHandle& metaindex_handle() const { return metaindex_handle_; } const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
@ -78,24 +85,52 @@ class Footer {
const BlockHandle& index_handle() const { const BlockHandle& index_handle() const {
return index_handle_; return index_handle_;
} }
void set_index_handle(const BlockHandle& h) { void set_index_handle(const BlockHandle& h) {
index_handle_ = h; index_handle_ = h;
} }
uint64_t table_magic_number() const { return table_magic_number_; }
void EncodeTo(std::string* dst) const; void EncodeTo(std::string* dst) const;
// Set the current footer based on the input slice. If table_magic_number_
// is not set (i.e., HasInitializedTableMagicNumber() is true), then this
// function will also initialize table_magic_number_. Otherwise, this
// function will verify whether the magic number specified in the input
// slice matches table_magic_number_ and update the current footer only
// when the test passes.
Status DecodeFrom(Slice* input); Status DecodeFrom(Slice* input);
// Encoded length of a Footer. Note that the serialization of a // Encoded length of a Footer. Note that the serialization of a
// Footer will always occupy exactly this many bytes. It consists // Footer will always occupy exactly this many bytes. It consists
// of two block handles and a magic number. // of two block handles and a magic number.
enum { enum {
kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8 kEncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8
}; };
const uint64_t kInvalidTableMagicNumber = 0;
private: private:
// Set the table_magic_number only when it was not previously
// initialized. Return true on success.
bool set_table_magic_number(uint64_t magic_number) {
if (HasInitializedTableMagicNumber()) {
table_magic_number_ = magic_number;
return true;
}
return false;
}
// return true if @table_magic_number_ is set to a value different
// from @kInvalidTableMagicNumber.
bool HasInitializedTableMagicNumber() const {
return (table_magic_number_ != kInvalidTableMagicNumber);
}
BlockHandle metaindex_handle_; BlockHandle metaindex_handle_;
BlockHandle index_handle_; BlockHandle index_handle_;
const uint64_t kTableMagicNumber; uint64_t table_magic_number_;
}; };
// Read the footer from file // Read the footer from file

@ -2,12 +2,13 @@
// This source code is licensed under the BSD-style license found in the // This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include <map> #include <map>
#include <string>
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/block.h" #include "table/block.h"
#include "table/format.h" #include "table/format.h"
#include "util/coding.h" #include "util/coding.h"
@ -104,9 +105,8 @@ bool NotifyCollectTableCollectorsOnAdd(
Status s = collector->Add(key, value); Status s = collector->Add(key, value);
all_succeeded = all_succeeded && s.ok(); all_succeeded = all_succeeded && s.ok();
if (!s.ok()) { if (!s.ok()) {
LogPropertiesCollectionError( LogPropertiesCollectionError(info_log, "Add" /* method */,
info_log, "Add", /* method */ collector->Name() collector->Name());
);
} }
} }
return all_succeeded; return all_succeeded;
@ -123,9 +123,8 @@ bool NotifyCollectTableCollectorsOnFinish(
all_succeeded = all_succeeded && s.ok(); all_succeeded = all_succeeded && s.ok();
if (!s.ok()) { if (!s.ok()) {
LogPropertiesCollectionError( LogPropertiesCollectionError(info_log, "Finish" /* method */,
info_log, "Finish", /* method */ collector->Name() collector->Name());
);
} else { } else {
builder->Add(user_collected_properties); builder->Add(user_collected_properties);
} }
@ -151,14 +150,8 @@ Status ReadProperties(
BlockContents block_contents; BlockContents block_contents;
ReadOptions read_options; ReadOptions read_options;
read_options.verify_checksums = false; read_options.verify_checksums = false;
Status s = ReadBlockContents( Status s = ReadBlockContents(file, read_options, handle, &block_contents, env,
file, false);
read_options,
handle,
&block_contents,
env,
false
);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -166,22 +159,20 @@ Status ReadProperties(
Block properties_block(block_contents); Block properties_block(block_contents);
std::unique_ptr<Iterator> iter( std::unique_ptr<Iterator> iter(
properties_block.NewIterator(BytewiseComparator()) properties_block.NewIterator(BytewiseComparator()));
);
// All pre-defined properties of type uint64_t // All pre-defined properties of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = { std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
{ TablePropertiesNames::kDataSize, &table_properties->data_size }, {TablePropertiesNames::kDataSize, &table_properties->data_size},
{ TablePropertiesNames::kIndexSize, &table_properties->index_size }, {TablePropertiesNames::kIndexSize, &table_properties->index_size},
{ TablePropertiesNames::kFilterSize, &table_properties->filter_size }, {TablePropertiesNames::kFilterSize, &table_properties->filter_size},
{ TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size }, {TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size},
{ TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size }, {TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size},
{ TablePropertiesNames::kNumDataBlocks, {TablePropertiesNames::kNumDataBlocks,
&table_properties->num_data_blocks }, &table_properties->num_data_blocks},
{ TablePropertiesNames::kNumEntries, &table_properties->num_entries }, {TablePropertiesNames::kNumEntries, &table_properties->num_entries},
{ TablePropertiesNames::kFormatVersion, &table_properties->format_version }, {TablePropertiesNames::kFormatVersion, &table_properties->format_version},
{ TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len }, {TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len}};
};
std::string last_key; std::string last_key;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@ -192,10 +183,8 @@ Status ReadProperties(
auto key = iter->key().ToString(); auto key = iter->key().ToString();
// properties block is strictly sorted with no duplicate key. // properties block is strictly sorted with no duplicate key.
assert( assert(last_key.empty() ||
last_key.empty() || BytewiseComparator()->Compare(key, last_key) > 0);
BytewiseComparator()->Compare(key, last_key) > 0
);
last_key = key; last_key = key;
auto raw_val = iter->value(); auto raw_val = iter->value();
@ -218,8 +207,7 @@ Status ReadProperties(
} else { } else {
// handle user-collected properties // handle user-collected properties
table_properties->user_collected_properties.insert( table_properties->user_collected_properties.insert(
std::make_pair(key, raw_val.ToString()) {key, raw_val.ToString()});
);
} }
} }
@ -244,21 +232,14 @@ Status ReadTableProperties(
BlockContents metaindex_contents; BlockContents metaindex_contents;
ReadOptions read_options; ReadOptions read_options;
read_options.verify_checksums = false; read_options.verify_checksums = false;
s = ReadBlockContents( s = ReadBlockContents(file, read_options, metaindex_handle,
file, &metaindex_contents, env, false);
read_options,
metaindex_handle,
&metaindex_contents,
env,
false
);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
Block metaindex_block(metaindex_contents); Block metaindex_block(metaindex_contents);
std::unique_ptr<Iterator> meta_iter( std::unique_ptr<Iterator> meta_iter(
metaindex_block.NewIterator(BytewiseComparator()) metaindex_block.NewIterator(BytewiseComparator()));
);
// -- Read property block // -- Read property block
meta_iter->Seek(kPropertiesBlock); meta_iter->Seek(kPropertiesBlock);
@ -266,21 +247,39 @@ Status ReadTableProperties(
if (meta_iter->Valid() && if (meta_iter->Valid() &&
meta_iter->key() == kPropertiesBlock && meta_iter->key() == kPropertiesBlock &&
meta_iter->status().ok()) { meta_iter->status().ok()) {
s = ReadProperties( s = ReadProperties(meta_iter->value(), file, env, info_log, properties);
meta_iter->value(),
file,
env,
info_log,
properties
);
} else { } else {
s = Status::Corruption( s = Status::Corruption(
"Unable to read the property block from the plain table" "Unable to read the property block from the plain table");
);
} }
return s; return s;
} }
Status ReadTableMagicNumber(const std::string& file_path,
const Options& options,
const EnvOptions& env_options,
uint64_t* table_magic_number) {
unique_ptr<RandomAccessFile> file;
Status s = options.env->NewRandomAccessFile(file_path, &file, env_options);
if (!s.ok()) {
return s;
}
uint64_t file_size;
options.env->GetFileSize(file_path, &file_size);
if (file_size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable");
}
Footer footer;
s = ReadFooterFromFile(file.get(), file_size, &footer);
if (!s.ok()) {
return s;
}
*table_magic_number = footer.table_magic_number();
return Status::OK();
}
} // namespace rocksdb } // namespace rocksdb

@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "db/builder.h"
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
@ -118,4 +119,10 @@ Status ReadTableProperties(
Logger* info_log, Logger* info_log,
TableProperties* properties); TableProperties* properties);
// Read the magic number of the specified file directly. The magic number
// of a valid sst table the last 8-byte of the file.
Status ReadTableMagicNumber(const std::string& file_path,
const Options& options,
const EnvOptions& env_options,
uint64_t* table_magic_number);
} // namespace rocksdb } // namespace rocksdb

@ -40,50 +40,31 @@ std::string TableProperties::ToString(
result.reserve(1024); result.reserve(1024);
// Basic Info // Basic Info
AppendProperty( AppendProperty(result, "# data blocks", num_data_blocks, prop_delim,
result, "# data blocks", num_data_blocks, prop_delim, kv_delim kv_delim);
);
AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim);
AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim); AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim);
AppendProperty( AppendProperty(result, "raw average key size",
result, num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0,
"raw average key size", prop_delim, kv_delim);
num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0, AppendProperty(result, "raw value size", raw_value_size, prop_delim,
prop_delim, kv_delim);
kv_delim AppendProperty(result, "raw average value size",
); num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0,
AppendProperty( prop_delim, kv_delim);
result, "raw value size", raw_value_size, prop_delim, kv_delim
);
AppendProperty(
result,
"raw average value size",
num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0,
prop_delim,
kv_delim
);
AppendProperty(result, "data block size", data_size, prop_delim, kv_delim); AppendProperty(result, "data block size", data_size, prop_delim, kv_delim);
AppendProperty(result, "index block size", index_size, prop_delim, kv_delim); AppendProperty(result, "index block size", index_size, prop_delim, kv_delim);
AppendProperty( AppendProperty(result, "filter block size", filter_size, prop_delim,
result, "filter block size", filter_size, prop_delim, kv_delim kv_delim);
); AppendProperty(result, "(estimated) table size",
AppendProperty( data_size + index_size + filter_size, prop_delim, kv_delim);
result,
"(estimated) table size",
data_size + index_size + filter_size,
prop_delim,
kv_delim
);
AppendProperty( AppendProperty(
result, result, "filter policy name",
"filter policy name",
filter_policy_name.empty() ? std::string("N/A") : filter_policy_name, filter_policy_name.empty() ? std::string("N/A") : filter_policy_name,
prop_delim, prop_delim, kv_delim);
kv_delim
);
return result; return result;
} }

@ -6,6 +6,9 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <inttypes.h>
#include <stdio.h>
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <string> #include <string>
@ -25,6 +28,7 @@
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "table/block.h" #include "table/block.h"
#include "table/meta_blocks.h"
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "table/block_based_table_factory.h" #include "table/block_based_table_factory.h"
#include "table/block_based_table_reader.h" #include "table/block_based_table_reader.h"
@ -945,10 +949,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
block_builder.Add(item.first, item.second); block_builder.Add(item.first, item.second);
} }
Slice content = block_builder.Finish(); Slice content = block_builder.Finish();
ASSERT_EQ( ASSERT_EQ(content.size() + kBlockTrailerSize, props.data_size);
content.size() + kBlockTrailerSize,
props.data_size
);
} }
TEST(BlockBasedTableTest, FilterPolicyNameProperties) { TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
@ -957,9 +958,7 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
std::vector<std::string> keys; std::vector<std::string> keys;
KVMap kvmap; KVMap kvmap;
Options options; Options options;
std::unique_ptr<const FilterPolicy> filter_policy( std::unique_ptr<const FilterPolicy> filter_policy(NewBloomFilterPolicy(10));
NewBloomFilterPolicy(10)
);
options.filter_policy = filter_policy.get(); options.filter_policy = filter_policy.get();
c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, c.Finish(options, GetPlainInternalComparator(options.comparator), &keys,
@ -1031,10 +1030,8 @@ TEST(BlockBasedTableTest, NumBlockStat) {
KVMap kvmap; KVMap kvmap;
c.Finish(options, GetPlainInternalComparator(options.comparator), &ks, c.Finish(options, GetPlainInternalComparator(options.comparator), &ks,
&kvmap); &kvmap);
ASSERT_EQ( ASSERT_EQ(kvmap.size(),
kvmap.size(), c.table_reader()->GetTableProperties().num_data_blocks);
c.table_reader()->GetTableProperties().num_data_blocks
);
} }
class BlockCacheProperties { class BlockCacheProperties {
@ -1049,32 +1046,26 @@ class BlockCacheProperties {
} }
// Check if the fetched props matches the expected ones. // Check if the fetched props matches the expected ones.
void AssertEqual( void AssertEqual(int64_t index_block_cache_miss,
long index_block_cache_miss, int64_t index_block_cache_hit, int64_t data_block_cache_miss,
long index_block_cache_hit, int64_t data_block_cache_hit) const {
long data_block_cache_miss,
long data_block_cache_hit) const {
ASSERT_EQ(index_block_cache_miss, this->index_block_cache_miss); ASSERT_EQ(index_block_cache_miss, this->index_block_cache_miss);
ASSERT_EQ(index_block_cache_hit, this->index_block_cache_hit); ASSERT_EQ(index_block_cache_hit, this->index_block_cache_hit);
ASSERT_EQ(data_block_cache_miss, this->data_block_cache_miss); ASSERT_EQ(data_block_cache_miss, this->data_block_cache_miss);
ASSERT_EQ(data_block_cache_hit, this->data_block_cache_hit); ASSERT_EQ(data_block_cache_hit, this->data_block_cache_hit);
ASSERT_EQ( ASSERT_EQ(index_block_cache_miss + data_block_cache_miss,
index_block_cache_miss + data_block_cache_miss, this->block_cache_miss);
this->block_cache_miss ASSERT_EQ(index_block_cache_hit + data_block_cache_hit,
); this->block_cache_hit);
ASSERT_EQ(
index_block_cache_hit + data_block_cache_hit,
this->block_cache_hit
);
} }
private: private:
long block_cache_miss = 0; int64_t block_cache_miss = 0;
long block_cache_hit = 0; int64_t block_cache_hit = 0;
long index_block_cache_miss = 0; int64_t index_block_cache_miss = 0;
long index_block_cache_hit = 0; int64_t index_block_cache_hit = 0;
long data_block_cache_miss = 0; int64_t data_block_cache_miss = 0;
long data_block_cache_hit = 0; int64_t data_block_cache_hit = 0;
}; };
TEST(BlockBasedTableTest, BlockCacheTest) { TEST(BlockBasedTableTest, BlockCacheTest) {
@ -1104,12 +1095,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
{ {
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
// index will be added to block cache. // index will be added to block cache.
props.AssertEqual( props.AssertEqual(1, // index block miss
1, // index block miss 0, 0, 0);
0,
0,
0
);
} }
// Only index block will be accessed // Only index block will be accessed
@ -1119,24 +1106,16 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// NOTE: to help better highlight the "detla" of each ticker, I use // NOTE: to help better highlight the "detla" of each ticker, I use
// <last_value> + <added_value> to indicate the increment of changed // <last_value> + <added_value> to indicate the increment of changed
// value; other numbers remain the same. // value; other numbers remain the same.
props.AssertEqual( props.AssertEqual(1, 0 + 1, // index block hit
1, 0, 0);
0 + 1, // index block hit
0,
0
);
} }
// Only data block will be accessed // Only data block will be accessed
{ {
iter->SeekToFirst(); iter->SeekToFirst();
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
props.AssertEqual( props.AssertEqual(1, 1, 0 + 1, // data block miss
1, 0);
1,
0 + 1, // data block miss
0
);
} }
// Data block will be in cache // Data block will be in cache
@ -1144,12 +1123,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
iter.reset(c.NewIterator()); iter.reset(c.NewIterator());
iter->SeekToFirst(); iter->SeekToFirst();
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
props.AssertEqual( props.AssertEqual(1, 1 + 1, /* index block hit */
1, 1, 0 + 1 /* data block hit */);
1 + 1, // index block hit
1,
0 + 1 // data block hit
);
} }
// release the iterator so that the block cache can reset correctly. // release the iterator so that the block cache can reset correctly.
iter.reset(); iter.reset();
@ -1175,12 +1150,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
c.Reopen(options); c.Reopen(options);
{ {
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
props.AssertEqual( props.AssertEqual(1, // index block miss
1, // index block miss 0, 0, 0);
0,
0,
0
);
} }
@ -1190,12 +1161,9 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// is only 1, index block will be purged after data block is inserted. // is only 1, index block will be purged after data block is inserted.
iter.reset(c.NewIterator()); iter.reset(c.NewIterator());
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
props.AssertEqual( props.AssertEqual(1 + 1, // index block miss
1 + 1, // index block miss 0, 0, // data block miss
0, 0);
0, // data block miss
0
);
} }
{ {
@ -1203,12 +1171,8 @@ TEST(BlockBasedTableTest, BlockCacheTest) {
// block's cache miss. // block's cache miss.
iter->SeekToFirst(); iter->SeekToFirst();
BlockCacheProperties props(options.statistics.get()); BlockCacheProperties props(options.statistics.get());
props.AssertEqual( props.AssertEqual(2, 0, 0 + 1, // data block miss
2, 0);
0,
0 + 1, // data block miss
0
);
} }
} }
@ -1315,7 +1279,6 @@ TEST(GeneralTableTest, ApproximateOffsetOfPlain) {
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
} }
static void DoCompressionTest(CompressionType comp) { static void DoCompressionTest(CompressionType comp) {
@ -1359,11 +1322,9 @@ TEST(GeneralTableTest, ApproximateOffsetOfCompressed) {
valid++; valid++;
} }
for(int i =0; i < valid; i++) for (int i = 0; i < valid; i++) {
{
DoCompressionTest(compression_state[i]); DoCompressionTest(compression_state[i]);
} }
} }
TEST(Harness, Randomized) { TEST(Harness, Randomized) {
@ -1374,8 +1335,8 @@ TEST(Harness, Randomized) {
for (int num_entries = 0; num_entries < 2000; for (int num_entries = 0; num_entries < 2000;
num_entries += (num_entries < 50 ? 1 : 200)) { num_entries += (num_entries < 50 ? 1 : 200)) {
if ((num_entries % 10) == 0) { if ((num_entries % 10) == 0) {
fprintf(stderr, "case %d of %d: num_entries = %d\n", fprintf(stderr, "case %d of %d: num_entries = %d\n", (i + 1),
(i + 1), int(args.size()), num_entries); static_cast<int>(args.size()), num_entries);
} }
for (int e = 0; e < num_entries; e++) { for (int e = 0; e < num_entries; e++) {
std::string v; std::string v;

@ -14,10 +14,14 @@
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/table_properties.h" #include "rocksdb/table_properties.h"
#include "table/block_based_table_factory.h"
#include "table/plain_table_factory.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_builder.h" #include "table/block_builder.h"
#include "table/meta_blocks.h"
#include "table/format.h" #include "table/format.h"
#include "util/ldb_cmd.h" #include "util/ldb_cmd.h"
#include "util/random.h" #include "util/random.h"
@ -44,6 +48,9 @@ class SstFileReader {
private: private:
Status NewTableReader(const std::string& file_path); Status NewTableReader(const std::string& file_path);
Status SetTableOptionsByMagicNumber(uint64_t table_magic_number,
RandomAccessFile* file,
uint64_t file_size);
std::string file_name_; std::string file_name_;
uint64_t read_num_; uint64_t read_num_;
@ -54,9 +61,9 @@ class SstFileReader {
Status init_result_; Status init_result_;
unique_ptr<TableReader> table_reader_; unique_ptr<TableReader> table_reader_;
unique_ptr<RandomAccessFile> file_; unique_ptr<RandomAccessFile> file_;
// table_options_ and internal_comparator_ will also be used in // options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations) // ReadSequential internally (specifically, seek-related operations)
Options table_options_; Options options_;
InternalKeyComparator internal_comparator_; InternalKeyComparator internal_comparator_;
}; };
@ -70,21 +77,68 @@ SstFileReader::SstFileReader(const std::string& file_path,
init_result_ = NewTableReader(file_name_); init_result_ = NewTableReader(file_name_);
} }
extern uint64_t kBlockBasedTableMagicNumber;
extern uint64_t kPlainTableMagicNumber;
Status SstFileReader::NewTableReader(const std::string& file_path) { Status SstFileReader::NewTableReader(const std::string& file_path) {
Status s = table_options_.env->NewRandomAccessFile(file_path, &file_, uint64_t magic_number;
soptions_); Status s =
ReadTableMagicNumber(file_path, options_, soptions_, &magic_number);
if (!s.ok()) {
return s;
}
if (magic_number == kPlainTableMagicNumber) {
soptions_.use_mmap_reads = true;
}
options_.comparator = &internal_comparator_;
s = options_.env->NewRandomAccessFile(file_path, &file_, soptions_);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
uint64_t file_size; uint64_t file_size;
table_options_.env->GetFileSize(file_path, &file_size); options_.env->GetFileSize(file_path, &file_size);
unique_ptr<TableFactory> table_factory; s = SetTableOptionsByMagicNumber(magic_number, file_.get(), file_size);
s = table_options_.table_factory->NewTableReader( if (!s.ok()) {
table_options_, soptions_, internal_comparator_, std::move(file_), return s;
file_size, &table_reader_); }
s = options_.table_factory->NewTableReader(
options_, soptions_, internal_comparator_, std::move(file_), file_size,
&table_reader_);
return s; return s;
} }
Status SstFileReader::SetTableOptionsByMagicNumber(uint64_t table_magic_number,
RandomAccessFile* file,
uint64_t file_size) {
TableProperties table_properties;
Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number,
options_.env, options_.info_log.get(),
&table_properties);
if (!s.ok()) {
return s;
}
if (table_magic_number == kBlockBasedTableMagicNumber) {
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
fprintf(stdout, "Sst file format: block-based\n");
} else if (table_magic_number == kPlainTableMagicNumber) {
options_.allow_mmap_reads = true;
options_.table_factory = std::make_shared<PlainTableFactory>(
table_properties.fixed_key_len, 2, 0.8);
options_.prefix_extractor = NewNoopTransform();
fprintf(stdout, "Sst file format: plain table\n");
} else {
char error_msg_buffer[80];
snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
"Unsupported table magic number --- %lx)", table_magic_number);
return Status::InvalidArgument(error_msg_buffer);
}
return Status::OK();
}
Status SstFileReader::ReadSequential(bool print_kv, Status SstFileReader::ReadSequential(bool print_kv,
uint64_t read_num, uint64_t read_num,
bool has_from, bool has_from,

Loading…
Cancel
Save