Add the property block for the plain table

Summary:
This is the last diff that adds the property block to plain table.
The format resembles that of the block-based table: https://github.com/facebook/rocksdb/wiki/Rocksdb-table-format

  [data block]
  [meta block 1: stats block]
  [meta block 2: future extended block]
  ...
  [meta block K: future extended block]  (we may add more meta blocks in the future)
  [metaindex block]
  [index block: we only have the placeholder here, we can add persistent index block in the future]
  [Footer: contains magic number, handle to metaindex block and index block]
  <end_of_file>

Test Plan: extended existing property block test.

Reviewers: haobo, sdong, dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D14523
main
Kai Liu 11 years ago
parent 5f5e5fc2e9
commit 2e9efcd6d8
  1. 258
      db/table_properties_collector_test.cc
  2. 1
      include/rocksdb/table_properties.h
  3. 4
      table/block_based_table_builder.cc
  4. 101
      table/block_based_table_reader.cc
  5. 6
      table/block_based_table_reader.h
  6. 144
      table/meta_blocks.cc
  7. 26
      table/meta_blocks.h
  8. 111
      table/plain_table_builder.cc
  9. 10
      table/plain_table_builder.h
  10. 64
      table/plain_table_reader.cc
  11. 23
      table/plain_table_reader.h
  12. 2
      table/table_properties.cc
  13. 133
      table/table_test.cc

@ -12,7 +12,9 @@
#include "db/table_properties_collector.h"
#include "rocksdb/table_properties.h"
#include "rocksdb/table.h"
#include "rocksdb/plain_table_factory.h"
#include "table/block_based_table_factory.h"
#include "table/meta_blocks.h"
#include "util/coding.h"
#include "util/testharness.h"
#include "util/testutil.h"
@ -20,8 +22,6 @@
namespace rocksdb {
class TablePropertiesTest {
private:
unique_ptr<TableReader> table_reader_;
};
// TODO(kailiu) the following classes should be moved to some more general
@ -93,22 +93,6 @@ void MakeBuilder(
options.compression));
}
void OpenTable(
const Options& options,
const std::string& contents,
std::unique_ptr<TableReader>* table_reader) {
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
auto s = options.table_factory->GetTableReader(
options,
EnvOptions(),
std::move(file),
contents.size(),
table_reader
);
ASSERT_OK(s);
}
// Collects keys that starts with "A" in a table.
class RegularKeysStartWithA: public TablePropertiesCollector {
public:
@ -141,23 +125,66 @@ class RegularKeysStartWithA: public TablePropertiesCollector {
uint32_t count_ = 0;
};
TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
Options options;
extern uint64_t kBlockBasedTableMagicNumber;
extern uint64_t kPlainTableMagicNumber;
void TestCustomizedTablePropertiesCollector(
uint64_t magic_number,
bool encode_as_internal,
const Options& options) {
// make sure the entries will be inserted with order.
std::map<std::string, std::string> kvs = {
{"About", "val5"}, // starts with 'A'
{"Abstract", "val2"}, // starts with 'A'
{"Around", "val7"}, // starts with 'A'
{"Beyond", "val3"},
{"Builder", "val1"},
{"Cancel", "val4"},
{"Find", "val6"},
{"About ", "val5"}, // starts with 'A'
{"Abstract", "val2"}, // starts with 'A'
{"Around ", "val7"}, // starts with 'A'
{"Beyond ", "val3"},
{"Builder ", "val1"},
{"Cancel ", "val4"},
{"Find ", "val6"},
};
// -- Step 1: build table
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable;
MakeBuilder(options, &writable, &builder);
for (const auto& kv : kvs) {
if (encode_as_internal) {
InternalKey ikey(kv.first, 0, ValueType::kTypeValue);
builder->Add(ikey.Encode(), kv.second);
} else {
builder->Add(kv.first, kv.second);
}
}
ASSERT_OK(builder->Finish());
// -- Step 2: Read properties
FakeRandomeAccessFile readable(writable->contents());
TableProperties props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
magic_number,
Env::Default(),
nullptr,
&props
);
ASSERT_OK(s);
auto user_collected = props.user_collected_properties;
ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest"));
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(3u, starts_with_A);
}
TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
// Test properties collectors with internal keys or regular keys
// for block based table
for (bool encode_as_internal : { true, false }) {
// -- Step 1: build table
Options options;
auto collector = new RegularKeysStartWithA();
if (encode_as_internal) {
options.table_properties_collectors = {
@ -167,97 +194,114 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
options.table_properties_collectors.resize(1);
options.table_properties_collectors[0].reset(collector);
}
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable;
MakeBuilder(options, &writable, &builder);
for (const auto& kv : kvs) {
if (encode_as_internal) {
InternalKey ikey(kv.first, 0, ValueType::kTypeValue);
builder->Add(ikey.Encode(), kv.second);
} else {
builder->Add(kv.first, kv.second);
}
}
ASSERT_OK(builder->Finish());
// -- Step 2: Open table
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& properties =
table_reader->GetTableProperties().user_collected_properties;
ASSERT_EQ("Rocksdb", properties.at("TablePropertiesTest"));
uint32_t starts_with_A = 0;
Slice key(properties.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(3u, starts_with_A);
TestCustomizedTablePropertiesCollector(
kBlockBasedTableMagicNumber,
encode_as_internal,
options
);
}
// test plain table
Options options;
options.table_properties_collectors.push_back(
std::make_shared<RegularKeysStartWithA>()
);
options.table_factory = std::make_shared<PlainTableFactory>(8, 8, 0);
TestCustomizedTablePropertiesCollector(
kPlainTableMagicNumber, true, options
);
}
TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
void TestInternalKeyPropertiesCollector(
uint64_t magic_number,
bool sanitized,
std::shared_ptr<TableFactory> table_factory) {
InternalKey keys[] = {
InternalKey("A", 0, ValueType::kTypeValue),
InternalKey("B", 0, ValueType::kTypeValue),
InternalKey("C", 0, ValueType::kTypeValue),
InternalKey("W", 0, ValueType::kTypeDeletion),
InternalKey("X", 0, ValueType::kTypeDeletion),
InternalKey("Y", 0, ValueType::kTypeDeletion),
InternalKey("Z", 0, ValueType::kTypeDeletion),
InternalKey("A ", 0, ValueType::kTypeValue),
InternalKey("B ", 0, ValueType::kTypeValue),
InternalKey("C ", 0, ValueType::kTypeValue),
InternalKey("W ", 0, ValueType::kTypeDeletion),
InternalKey("X ", 0, ValueType::kTypeDeletion),
InternalKey("Y ", 0, ValueType::kTypeDeletion),
InternalKey("Z ", 0, ValueType::kTypeDeletion),
};
for (bool sanitized : { false, true }) {
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable;
Options options;
if (sanitized) {
options.table_properties_collectors = {
std::make_shared<RegularKeysStartWithA>()
};
// with sanitization, even regular properties collector will be able to
// handle internal keys.
auto comparator = options.comparator;
// HACK: Set options.info_log to avoid writing log in
// SanitizeOptions().
options.info_log = std::make_shared<DumbLogger>();
options = SanitizeOptions(
"db", // just a place holder
nullptr, // with skip internal key comparator
nullptr, // don't care filter policy
options
);
options.comparator = comparator;
} else {
options.table_properties_collectors = {
std::make_shared<InternalKeyPropertiesCollector>()
};
}
MakeBuilder(options, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}
std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable;
Options options;
options.table_factory = table_factory;
if (sanitized) {
options.table_properties_collectors = {
std::make_shared<RegularKeysStartWithA>()
};
// with sanitization, even regular properties collector will be able to
// handle internal keys.
auto comparator = options.comparator;
// HACK: Set options.info_log to avoid writing log in
// SanitizeOptions().
options.info_log = std::make_shared<DumbLogger>();
options = SanitizeOptions(
"db", // just a place holder
nullptr, // with skip internal key comparator
nullptr, // don't care filter policy
options
);
options.comparator = comparator;
} else {
options.table_properties_collectors = {
std::make_shared<InternalKeyPropertiesCollector>()
};
}
ASSERT_OK(builder->Finish());
MakeBuilder(options, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& properties =
table_reader->GetTableProperties().user_collected_properties;
ASSERT_OK(builder->Finish());
FakeRandomeAccessFile readable(writable->contents());
TableProperties props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
magic_number,
Env::Default(),
nullptr,
&props
);
ASSERT_OK(s);
uint64_t deleted = GetDeletedKeys(properties);
ASSERT_EQ(4u, deleted);
auto user_collected = props.user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);
if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(properties.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
}
if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
}
}
TEST(TablePropertiesTest, InternalKeyPropertiesCollector) {
TestInternalKeyPropertiesCollector(
kBlockBasedTableMagicNumber,
true /* sanitize */,
std::make_shared<BlockBasedTableFactory>()
);
TestInternalKeyPropertiesCollector(
kBlockBasedTableMagicNumber,
true /* not sanitize */,
std::make_shared<BlockBasedTableFactory>()
);
TestInternalKeyPropertiesCollector(
kPlainTableMagicNumber,
false /* not sanitize */,
std::make_shared<PlainTableFactory>(8, 8, 0)
);
}
} // namespace rocksdb
int main(int argc, char** argv) {

@ -64,6 +64,7 @@ struct TablePropertiesNames {
static const std::string kFilterPolicy;
};
extern const std::string kPropertiesBlock;
// `TablePropertiesCollector` provides the mechanism for users to collect
// their own interested properties. This class is essentially a collection

@ -387,7 +387,7 @@ Status BlockBasedTableBuilder::Finish() {
&properties_block_handle
);
meta_index_builer.Add(BlockBasedTable::kPropertiesBlock,
meta_index_builer.Add(kPropertiesBlock,
properties_block_handle);
} // end of properties block writing
@ -459,7 +459,5 @@ uint64_t BlockBasedTableBuilder::FileSize() const {
const std::string BlockBasedTable::kFilterBlockPrefix =
"filter.";
const std::string BlockBasedTable::kPropertiesBlock =
"rocksdb.properties";
} // namespace rocksdb

@ -21,6 +21,7 @@
#include "table/block.h"
#include "table/filter_block.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/two_level_iterator.h"
#include "util/coding.h"
@ -250,10 +251,16 @@ Status BlockBasedTable::Open(const Options& options,
// Read the properties
meta_iter->Seek(kPropertiesBlock);
if (meta_iter->Valid() && meta_iter->key() == Slice(kPropertiesBlock)) {
if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) {
s = meta_iter->status();
if (s.ok()) {
s = ReadProperties(meta_iter->value(), rep, &rep->table_properties);
s = ReadProperties(
meta_iter->value(),
rep->file.get(),
rep->options.env,
rep->options.info_log.get(),
&rep->table_properties
);
}
if (!s.ok()) {
@ -401,96 +408,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter (
rep->options, block.data, block.heap_allocated);
}
Status BlockBasedTable::ReadProperties(
const Slice& handle_value, Rep* rep, TableProperties* table_properties) {
assert(table_properties);
Slice v = handle_value;
BlockHandle handle;
if (!handle.DecodeFrom(&v).ok()) {
return Status::InvalidArgument("Failed to decode properties block handle");
}
BlockContents block_contents;
Status s = ReadBlockContents(
rep->file.get(),
ReadOptions(),
handle,
&block_contents,
rep->options.env,
false
);
if (!s.ok()) {
return s;
}
Block properties_block(block_contents);
std::unique_ptr<Iterator> iter(
properties_block.NewIterator(BytewiseComparator())
);
// All pre-defined properties of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
{ TablePropertiesNames::kDataSize,
&table_properties->data_size },
{ TablePropertiesNames::kIndexSize,
&table_properties->index_size },
{ TablePropertiesNames::kFilterSize,
&table_properties->filter_size },
{ TablePropertiesNames::kRawKeySize,
&table_properties->raw_key_size },
{ TablePropertiesNames::kRawValueSize,
&table_properties->raw_value_size },
{ TablePropertiesNames::kNumDataBlocks,
&table_properties->num_data_blocks },
{ TablePropertiesNames::kNumEntries,
&table_properties->num_entries },
};
std::string last_key;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
s = iter->status();
if (!s.ok()) {
break;
}
auto key = iter->key().ToString();
// properties block is strictly sorted with no duplicate key.
assert(
last_key.empty() ||
BytewiseComparator()->Compare(key, last_key) > 0
);
last_key = key;
auto raw_val = iter->value();
auto pos = predefined_uint64_properties.find(key);
if (pos != predefined_uint64_properties.end()) {
// handle predefined rocksdb properties
uint64_t val;
if (!GetVarint64(&raw_val, &val)) {
// skip malformed value
auto error_msg =
"[Warning] detect malformed value in properties meta-block:"
"\tkey: " + key + "\tval: " + raw_val.ToString();
Log(rep->options.info_log, "%s", error_msg.c_str());
continue;
}
*(pos->second) = val;
} else if (key == TablePropertiesNames::kFilterPolicy) {
table_properties->filter_policy_name = raw_val.ToString();
} else {
// handle user-collected
table_properties->user_collected_properties.insert(
std::make_pair(key, raw_val.ToString())
);
}
}
return s;
}
Status BlockBasedTable::GetBlock(
const BlockBasedTable* table,
const BlockHandle& handle,

@ -38,7 +38,6 @@ using std::unique_ptr;
class BlockBasedTable : public TableReader {
public:
static const std::string kFilterBlockPrefix;
static const std::string kPropertiesBlock;
// Attempt to open the table that is stored in bytes [0..file_size)
// of "file", and read the metadata entries necessary to allow
@ -142,7 +141,6 @@ class BlockBasedTable : public TableReader {
void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value);
static Status ReadProperties(const Slice& handle_value, Rep* rep);
// Read the meta block from sst.
static Status ReadMetaBlock(
@ -156,10 +154,6 @@ class BlockBasedTable : public TableReader {
Rep* rep,
size_t* filter_size = nullptr);
// Read the table properties from properties block.
static Status ReadProperties(
const Slice& handle_value, Rep* rep, TableProperties* properties);
static void SetupCacheKeyPrefix(Rep* rep);
explicit BlockBasedTable(Rep* rep) :

@ -8,6 +8,7 @@
#include <map>
#include "rocksdb/table_properties.h"
#include "table/block.h"
#include "table/format.h"
#include "util/coding.h"
@ -131,4 +132,147 @@ bool NotifyCollectTableCollectorsOnFinish(
return all_succeeded;
}
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties) {
assert(table_properties);
Slice v = handle_value;
BlockHandle handle;
if (!handle.DecodeFrom(&v).ok()) {
return Status::InvalidArgument("Failed to decode properties block handle");
}
BlockContents block_contents;
Status s = ReadBlockContents(
file,
ReadOptions(),
handle,
&block_contents,
env,
false
);
if (!s.ok()) {
return s;
}
Block properties_block(block_contents);
std::unique_ptr<Iterator> iter(
properties_block.NewIterator(BytewiseComparator())
);
// All pre-defined properties of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
{ TablePropertiesNames::kDataSize, &table_properties->data_size },
{ TablePropertiesNames::kIndexSize, &table_properties->index_size },
{ TablePropertiesNames::kFilterSize, &table_properties->filter_size },
{ TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size },
{ TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size },
{ TablePropertiesNames::kNumDataBlocks,
&table_properties->num_data_blocks },
{ TablePropertiesNames::kNumEntries, &table_properties->num_entries },
};
std::string last_key;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
s = iter->status();
if (!s.ok()) {
break;
}
auto key = iter->key().ToString();
// properties block is strictly sorted with no duplicate key.
assert(
last_key.empty() ||
BytewiseComparator()->Compare(key, last_key) > 0
);
last_key = key;
auto raw_val = iter->value();
auto pos = predefined_uint64_properties.find(key);
if (pos != predefined_uint64_properties.end()) {
// handle predefined rocksdb properties
uint64_t val;
if (!GetVarint64(&raw_val, &val)) {
// skip malformed value
auto error_msg =
"[Warning] detect malformed value in properties meta-block:"
"\tkey: " + key + "\tval: " + raw_val.ToString();
Log(logger, "%s", error_msg.c_str());
continue;
}
*(pos->second) = val;
} else if (key == TablePropertiesNames::kFilterPolicy) {
table_properties->filter_policy_name = raw_val.ToString();
} else {
// handle user-collected properties
table_properties->user_collected_properties.insert(
std::make_pair(key, raw_val.ToString())
);
}
}
return s;
}
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties) {
// -- Read metaindex block
Footer footer(table_magic_number);
auto s = ReadFooterFromFile(file, file_size, &footer);
if (!s.ok()) {
return s;
}
auto metaindex_handle = footer.metaindex_handle();
BlockContents metaindex_contents;
s = ReadBlockContents(
file,
ReadOptions(),
metaindex_handle,
&metaindex_contents,
env,
false
);
if (!s.ok()) {
return s;
}
Block metaindex_block(metaindex_contents);
std::unique_ptr<Iterator> meta_iter(
metaindex_block.NewIterator(BytewiseComparator())
);
// -- Read property block
meta_iter->Seek(kPropertiesBlock);
TableProperties table_properties;
if (meta_iter->Valid() &&
meta_iter->key() == kPropertiesBlock &&
meta_iter->status().ok()) {
s = ReadProperties(
meta_iter->value(),
file,
env,
info_log,
properties
);
} else {
s = Status::Corruption(
"Unable to read the property block from the plain table"
);
}
return s;
}
} // namespace rocksdb

@ -15,9 +15,11 @@
namespace rocksdb {
class BlockHandle;
class BlockBuilder;
class BlockHandle;
class Env;
class Logger;
class RandomAccessFile;
struct TableProperties;
// An STL style comparator that does the bytewise comparator comparasion
@ -49,11 +51,6 @@ class MetaIndexBuilder {
Slice Finish();
private:
// * Key: meta block name
// * Value: block handle to that meta block
struct Rep;
Rep* rep_;
// store the sorted key/handle of the metablocks.
BytewiseSortedMap meta_block_handles_;
std::unique_ptr<BlockBuilder> meta_index_block_;
@ -103,4 +100,21 @@ bool NotifyCollectTableCollectorsOnFinish(
Logger* info_log,
PropertyBlockBuilder* builder);
// Read the properties from the table.
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties);
// Directly read the properties from the properties block of a plain table.
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties);
} // namespace rocksdb

@ -14,12 +14,40 @@
#include "table/block_builder.h"
#include "table/filter_block.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
namespace rocksdb {
namespace {
// a utility that helps writing block content to the file
// @offset will advance if @block_contents was successfully written.
// @block_handle the block handle this particular block.
Status WriteBlock(
const Slice& block_contents,
WritableFile* file,
uint64_t* offset,
BlockHandle* block_handle) {
block_handle->set_offset(*offset);
block_handle->set_size(block_contents.size());
Status s = file->Append(block_contents);
if (s.ok()) {
*offset += block_contents.size();
}
return s;
}
} // namespace
// kPlainTableMagicNumber was picked by running
// echo rocksdb.plain.table | sha1sum
// and taking the leading 64 bits.
extern const uint64_t kPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
PlainTableBuilder::PlainTableBuilder(const Options& options,
WritableFile* file,
int user_key_size, int key_prefix_len) :
@ -28,13 +56,16 @@ PlainTableBuilder::PlainTableBuilder(const Options& options,
PutFixed32(&version, 1 | 0x80000000);
file_->Append(Slice(version));
offset_ = 4;
}
PlainTableBuilder::~PlainTableBuilder() {
// for plain table, we put all the data in a big chuck.
properties_.num_data_blocks = 1;
// emphasize that currently plain table doesn't have persistent index or
// filter block.
properties_.index_size = 0;
properties_.filter_size = 0;
}
Status PlainTableBuilder::ChangeOptions(const Options& options) {
return Status::OK();
PlainTableBuilder::~PlainTableBuilder() {
}
void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
@ -52,7 +83,17 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
file_->Append(value);
offset_ += value_size + size.length();
num_entries_++;
properties_.num_entries++;
properties_.raw_key_size += key.size();
properties_.raw_value_size += value.size();
// notify property collectors
NotifyCollectTableCollectorsOnAdd(
key,
value,
options_.table_properties_collectors,
options_.info_log.get()
);
}
Status PlainTableBuilder::status() const {
@ -62,7 +103,63 @@ Status PlainTableBuilder::status() const {
Status PlainTableBuilder::Finish() {
assert(!closed_);
closed_ = true;
return Status::OK();
properties_.data_size = offset_;
// Write the following blocks
// 1. [meta block: properties]
// 2. [metaindex block]
// 3. [footer]
MetaIndexBuilder meta_index_builer;
PropertyBlockBuilder property_block_builder;
// -- Add basic properties
property_block_builder.AddTableProperty(properties_);
// -- Add user collected properties
NotifyCollectTableCollectorsOnFinish(
options_.table_properties_collectors,
options_.info_log.get(),
&property_block_builder
);
// -- Write property block
BlockHandle property_block_handle;
auto s = WriteBlock(
property_block_builder.Finish(),
file_,
&offset_,
&property_block_handle
);
if (!s.ok()) {
return s;
}
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
// -- write metaindex block
BlockHandle metaindex_block_handle;
s = WriteBlock(
meta_index_builer.Finish(),
file_,
&offset_,
&metaindex_block_handle
);
if (!s.ok()) {
return s;
}
// Write Footer
Footer footer(kPlainTableMagicNumber);
footer.set_metaindex_handle(metaindex_block_handle);
footer.set_index_handle(BlockHandle::NullBlockHandle());
std::string footer_encoding;
footer.EncodeTo(&footer_encoding);
s = file_->Append(footer_encoding);
if (s.ok()) {
offset_ += footer_encoding.size();
}
return s;
}
void PlainTableBuilder::Abandon() {
@ -70,7 +167,7 @@ void PlainTableBuilder::Abandon() {
}
uint64_t PlainTableBuilder::NumEntries() const {
return num_entries_;
return properties_.num_entries;
}
uint64_t PlainTableBuilder::FileSize() const {

@ -32,14 +32,6 @@ public:
// REQUIRES: Either Finish() or Abandon() has been called.
~PlainTableBuilder();
// Change the options used by this builder. Note: only some of the
// option fields can be changed after construction. If a field is
// not allowed to change dynamically and its value in the structure
// passed to the constructor is different from its value in the
// structure passed to this method, this method will return an error
// without changing any fields.
Status ChangeOptions(const Options& options);
// Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator.
// REQUIRES: Finish(), Abandon() have not been called
@ -72,7 +64,7 @@ private:
WritableFile* file_;
uint64_t offset_ = 0;
Status status_;
uint64_t num_entries_ = 0;
TableProperties properties_;
const size_t user_key_size_;
bool closed_ = false; // Either Finish() or Abandon() has been called.

@ -19,6 +19,7 @@
#include "table/block.h"
#include "table/filter_block.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/two_level_iterator.h"
#include "util/coding.h"
@ -41,6 +42,7 @@ public:
namespace rocksdb {
extern const uint64_t kPlainTableMagicNumber;
static uint32_t getBucketId(Slice const& s, size_t prefix_len,
uint32_t num_buckets) {
return MurmurHash(s.data(), prefix_len, 397) % num_buckets;
@ -49,18 +51,16 @@ static uint32_t getBucketId(Slice const& s, size_t prefix_len,
PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
uint64_t file_size, int user_key_size,
int key_prefix_len, int bloom_bits_per_key,
double hash_table_ratio) :
double hash_table_ratio,
const TableProperties& table_properties) :
hash_table_size_(0), soptions_(storage_options), file_size_(file_size),
user_key_size_(user_key_size), key_prefix_len_(key_prefix_len),
hash_table_ratio_(hash_table_ratio) {
if (bloom_bits_per_key > 0) {
filter_policy_ = NewBloomFilterPolicy(bloom_bits_per_key);
} else {
filter_policy_ = nullptr;
}
hash_table_ = nullptr;
data_start_offset_ = 0;
data_end_offset_ = file_size;
hash_table_ratio_(hash_table_ratio),
filter_policy_(bloom_bits_per_key > 0 ?
NewBloomFilterPolicy(bloom_bits_per_key) : nullptr),
table_properties_(table_properties),
data_start_offset_(0),
data_end_offset_(table_properties_.data_size) {
}
PlainTableReader::~PlainTableReader() {
@ -87,19 +87,38 @@ Status PlainTableReader::Open(const Options& options,
return Status::NotSupported("File is too large for PlainTableReader!");
}
PlainTableReader* t = new PlainTableReader(soptions, file_size,
user_key_size,
key_prefix_len,
bloom_num_bits,
hash_table_ratio);
t->file_ = std::move(file);
t->options_ = options;
Status s = t->PopulateIndex(file_size);
TableProperties table_properties;
auto s = ReadTableProperties(
file.get(),
file_size,
kPlainTableMagicNumber,
options.env,
options.info_log.get(),
&table_properties
);
if (!s.ok()) {
delete t;
return s;
}
table_reader->reset(t);
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
soptions,
file_size,
user_key_size,
key_prefix_len,
bloom_num_bits,
hash_table_ratio,
table_properties
));
new_reader->file_ = std::move(file);
new_reader->options_ = options;
// -- Populate Index
s = new_reader->PopulateIndex();
if (!s.ok()) {
return s;
}
*table_reader = std::move(new_reader);
return s;
}
@ -114,7 +133,7 @@ Iterator* PlainTableReader::NewIterator(const ReadOptions& options) {
return new PlainTableIterator(this);
}
Status PlainTableReader::PopulateIndex(uint64_t file_size) {
Status PlainTableReader::PopulateIndex() {
// Get mmapped memory to file_data_.
Status s = file_->Read(0, file_size_, &file_data_, nullptr);
if (!s.ok()) {
@ -124,7 +143,6 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
version_ ^= 0x80000000;
assert(version_ == 1);
data_start_offset_ = 4;
data_end_offset_ = file_size;
Slice key_slice;
Slice key_prefix_slice;
@ -140,7 +158,7 @@ Status PlainTableReader::PopulateIndex(uint64_t file_size) {
// are in order.
std::vector<std::pair<Slice, std::string>> prefix_index_pairs;
std::string current_prefix_index;
while (pos < file_size) {
while (pos < data_end_offset_) {
uint32_t key_offset = pos;
status_ = Next(pos, &key_slice, &value_slice, pos);
key_prefix_slice = Slice(key_slice.data(), key_prefix_len_);

@ -78,16 +78,21 @@ public:
void SetupForCompaction();
TableProperties& GetTableProperties() {
return tbl_props;
return table_properties_;
}
PlainTableReader(const EnvOptions& storage_options, uint64_t file_size,
int user_key_size, int key_prefix_len, int bloom_num_bits,
double hash_table_ratio);
PlainTableReader(
const EnvOptions& storage_options,
uint64_t file_size,
int user_key_size,
int key_prefix_len,
int bloom_num_bits,
double hash_table_ratio,
const TableProperties& table_properties);
~PlainTableReader();
private:
uint32_t* hash_table_;
uint32_t* hash_table_ = nullptr;
int hash_table_size_;
std::string sub_index_;
@ -99,8 +104,6 @@ private:
Slice file_data_;
uint32_t version_;
uint32_t file_size_;
uint32_t data_start_offset_;
uint32_t data_end_offset_;
const size_t user_key_size_;
const size_t key_prefix_len_;
const double hash_table_ratio_;
@ -108,7 +111,9 @@ private:
std::string filter_str_;
Slice filter_slice_;
TableProperties tbl_props;
TableProperties table_properties_;
uint32_t data_start_offset_;
uint32_t data_end_offset_;
static const size_t kNumInternalBytes = 8;
static const uint32_t kSubIndexMask = 0x80000000;
@ -125,7 +130,7 @@ private:
// any query to the table.
// This query will populate the hash table hash_table_, the second
// level of indexes sub_index_ and bloom filter filter_slice_ if enabled.
Status PopulateIndex(uint64_t file_size);
Status PopulateIndex();
// Check bloom filter to see whether it might contain this prefix
bool MayHavePrefix(const Slice& target_prefix);

@ -105,4 +105,6 @@ const std::string TablePropertiesNames::kNumEntries =
const std::string TablePropertiesNames::kFilterPolicy =
"rocksdb.filter.policy";
extern const std::string kPropertiesBlock = "rocksdb.properties";
} // namespace rocksdb

@ -15,17 +15,22 @@
#include "db/db_statistics.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "rocksdb/cache.h"
#include "rocksdb/db.h"
#include "rocksdb/plain_table_factory.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/memtablerep.h"
#include "table/meta_blocks.h"
#include "table/block_based_table_builder.h"
#include "table/block_based_table_factory.h"
#include "table/block_based_table_reader.h"
#include "table/block_builder.h"
#include "table/block.h"
#include "table/format.h"
#include "util/random.h"
#include "util/testharness.h"
#include "util/testutil.h"
@ -743,49 +748,6 @@ class Harness {
Constructor* constructor_;
};
// Test the empty key
TEST(Harness, SimpleEmptyKey) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 1);
Add("", "v");
Test(&rnd);
}
}
TEST(Harness, SimpleSingle) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 2);
Add("abc", "v");
Test(&rnd);
}
}
TEST(Harness, SimpleMulti) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 3);
Add("abc", "v");
Add("abcd", "v");
Add("ac", "v2");
Test(&rnd);
}
}
TEST(Harness, SimpleSpecialKey) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 4);
Add("\xff\xff", "v3");
Test(&rnd);
}
}
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
bool result = (val >= low) && (val <= high);
if (!result) {
@ -801,7 +763,7 @@ class TableTest { };
// This test include all the basic checks except those for index size and block
// size, which will be conducted in separated unit tests.
TEST(TableTest, BasicTableProperties) {
TEST(TableTest, BasicBlockedBasedTableProperties) {
BlockBasedTableConstructor c(BytewiseComparator());
c.Add("a1", "val1");
@ -845,6 +807,47 @@ TEST(TableTest, BasicTableProperties) {
);
}
extern const uint64_t kPlainTableMagicNumber;
TEST(TableTest, BasicPlainTableProperties) {
PlainTableFactory factory(8, 8, 0);
StringSink sink;
std::unique_ptr<TableBuilder> builder(factory.GetTableBuilder(
Options(),
&sink,
kNoCompression
));
for (char c = 'a'; c <= 'z'; ++c) {
std::string key(16, c);
std::string value(28, c + 42);
builder->Add(key, value);
}
ASSERT_OK(builder->Finish());
StringSource source(sink.contents(), 72242);
TableProperties props;
auto s = ReadTableProperties(
&source,
sink.contents().size(),
kPlainTableMagicNumber,
Env::Default(),
nullptr,
&props
);
ASSERT_OK(s);
ASSERT_EQ(0ul, props.index_size);
ASSERT_EQ(0ul, props.filter_size);
ASSERT_EQ(16ul * 26, props.raw_key_size);
ASSERT_EQ(28ul * 26, props.raw_value_size);
ASSERT_EQ(26ul, props.num_entries);
ASSERT_EQ(1ul, props.num_data_blocks);
// User collected keys
// internal keys
}
TEST(TableTest, FilterPolicyNameProperties) {
BlockBasedTableConstructor c(BytewiseComparator());
c.Add("a1", "val1");
@ -1292,6 +1295,48 @@ TEST(MemTableTest, Simple) {
delete memtable->Unref();
}
// Test the empty key
TEST(Harness, SimpleEmptyKey) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 1);
Add("", "v");
Test(&rnd);
}
}
TEST(Harness, SimpleSingle) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 2);
Add("abc", "v");
Test(&rnd);
}
}
TEST(Harness, SimpleMulti) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 3);
Add("abc", "v");
Add("abcd", "v");
Add("ac", "v2");
Test(&rnd);
}
}
TEST(Harness, SimpleSpecialKey) {
std::vector<TestArgs> args = GenerateArgList();
for (unsigned int i = 0; i < args.size(); i++) {
Init(args[i]);
Random rnd(test::RandomSeed() + 4);
Add("\xff\xff", "v3");
Test(&rnd);
}
}
} // namespace rocksdb

Loading…
Cancel
Save