Make table properties shareable

Summary:
We are going to expose properties of all tables to end users through "some" db interface.
However, current design doesn't naturally fit for this need, which is because:

1. If a table presents in table cache, we cannot simply return the reference to its table properties, because the table may be destroy after compaction (and we don't want to hold the ref of the version).
2. Copy table properties is OK, but it's slow.

Thus in this diff, I change the table reader's interface to return a shared pointer (for const table properties), instead a const refernce.

Test Plan: `make check` passed

Reviewers: haobo, sdong, dhruba

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D15999
main
kailiu 11 years ago
parent 0982c38020
commit 161ab42a8a
  1. 7
      db/simple_table_db_test.cc
  2. 10
      db/table_properties_collector_test.cc
  3. 10
      table/block_based_table_reader.cc
  4. 2
      table/block_based_table_reader.h
  5. 4
      table/format.cc
  6. 50
      table/meta_blocks.cc
  7. 25
      table/meta_blocks.h
  8. 17
      table/plain_table_reader.cc
  9. 8
      table/plain_table_reader.h
  10. 3
      table/table_reader.h
  11. 24
      table/table_test.cc
  12. 26
      tools/sst_dump.cc

@ -96,7 +96,7 @@ public:
void SetupForCompaction() override;
TableProperties& GetTableProperties() override;
std::shared_ptr<const TableProperties> GetTableProperties() const override;
~SimpleTableReader();
@ -172,7 +172,7 @@ struct SimpleTableReader::Rep {
unique_ptr<RandomAccessFile> file;
uint64_t index_start_offset;
int num_entries;
TableProperties table_properties;
std::shared_ptr<TableProperties> table_properties;
const static int user_key_size = 16;
const static int offset_length = 8;
@ -215,7 +215,8 @@ Status SimpleTableReader::Open(const Options& options,
void SimpleTableReader::SetupForCompaction() {
}
TableProperties& SimpleTableReader::GetTableProperties() {
std::shared_ptr<const TableProperties> SimpleTableReader::GetTableProperties()
const {
return rep_->table_properties;
}

@ -157,7 +157,7 @@ void TestCustomizedTablePropertiesCollector(
// -- Step 2: Read properties
FakeRandomeAccessFile readable(writable->contents());
TableProperties props;
TableProperties* props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
@ -166,9 +166,10 @@ void TestCustomizedTablePropertiesCollector(
nullptr,
&props
);
std::unique_ptr<TableProperties> props_guard(props);
ASSERT_OK(s);
auto user_collected = props.user_collected_properties;
auto user_collected = props->user_collected_properties;
ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest"));
@ -256,7 +257,7 @@ void TestInternalKeyPropertiesCollector(
ASSERT_OK(builder->Finish());
FakeRandomeAccessFile readable(writable->contents());
TableProperties props;
TableProperties* props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
@ -267,7 +268,8 @@ void TestInternalKeyPropertiesCollector(
);
ASSERT_OK(s);
auto user_collected = props.user_collected_properties;
std::unique_ptr<TableProperties> props_guard(props);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);

@ -62,7 +62,7 @@ struct BlockBasedTable::Rep {
unique_ptr<Block> index_block;
unique_ptr<FilterBlockReader> filter;
TableProperties table_properties;
std::shared_ptr<const TableProperties> table_properties;
};
BlockBasedTable::~BlockBasedTable() {
@ -255,9 +255,10 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
meta_iter->Seek(kPropertiesBlock);
if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) {
s = meta_iter->status();
TableProperties* table_properties = nullptr;
if (s.ok()) {
s = ReadProperties(meta_iter->value(), rep->file.get(), rep->options.env,
rep->options.info_log.get(), &rep->table_properties);
rep->options.info_log.get(), &table_properties);
}
if (!s.ok()) {
@ -265,6 +266,8 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
"[Warning] Encountered error while reading data from properties "
"block " + s.ToString();
Log(rep->options.info_log, "%s", err_msg.c_str());
} else {
rep->table_properties.reset(table_properties);
}
}
@ -339,7 +342,8 @@ void BlockBasedTable::SetupForCompaction() {
compaction_optimized_ = true;
}
const TableProperties& BlockBasedTable::GetTableProperties() {
std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
const {
return rep_->table_properties;
}

@ -86,7 +86,7 @@ class BlockBasedTable : public TableReader {
// posix_fadvise
void SetupForCompaction() override;
const TableProperties& GetTableProperties() override;
std::shared_ptr<const TableProperties> GetTableProperties() const override;
~BlockBasedTable();

@ -10,6 +10,7 @@
#include "table/format.h"
#include <string>
#include <inttypes.h>
#include "port/port.h"
#include "rocksdb/env.h"
@ -64,7 +65,8 @@ Status Footer::DecodeFrom(Slice* input) {
if (magic != table_magic_number()) {
char buffer[80];
snprintf(buffer, sizeof(buffer) - 1,
"not an sstable (bad magic number --- %lx)", magic);
"not an sstable (bad magic number --- %#" PRIx64 ")",
magic);
return Status::InvalidArgument(buffer);
}
} else {

@ -133,12 +133,9 @@ bool NotifyCollectTableCollectorsOnFinish(
return all_succeeded;
}
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties) {
Status ReadProperties(const Slice& handle_value, RandomAccessFile* file,
Env* env, Logger* logger,
TableProperties** table_properties) {
assert(table_properties);
Slice v = handle_value;
@ -161,18 +158,22 @@ Status ReadProperties(
std::unique_ptr<Iterator> iter(
properties_block.NewIterator(BytewiseComparator()));
auto new_table_properties = new TableProperties();
// All pre-defined properties of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
{TablePropertiesNames::kDataSize, &table_properties->data_size},
{TablePropertiesNames::kIndexSize, &table_properties->index_size},
{TablePropertiesNames::kFilterSize, &table_properties->filter_size},
{TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size},
{TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size},
{TablePropertiesNames::kDataSize, &new_table_properties->data_size},
{TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
{TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
{TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
{TablePropertiesNames::kRawValueSize,
&new_table_properties->raw_value_size},
{TablePropertiesNames::kNumDataBlocks,
&table_properties->num_data_blocks},
{TablePropertiesNames::kNumEntries, &table_properties->num_entries},
{TablePropertiesNames::kFormatVersion, &table_properties->format_version},
{TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len}};
&new_table_properties->num_data_blocks},
{TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
{TablePropertiesNames::kFormatVersion,
&new_table_properties->format_version},
{TablePropertiesNames::kFixedKeyLen,
&new_table_properties->fixed_key_len}, };
std::string last_key;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@ -203,24 +204,25 @@ Status ReadProperties(
}
*(pos->second) = val;
} else if (key == TablePropertiesNames::kFilterPolicy) {
table_properties->filter_policy_name = raw_val.ToString();
new_table_properties->filter_policy_name = raw_val.ToString();
} else {
// handle user-collected properties
table_properties->user_collected_properties.insert(
new_table_properties->user_collected_properties.insert(
{key, raw_val.ToString()});
}
}
if (s.ok()) {
*table_properties = new_table_properties;
} else {
delete new_table_properties;
}
return s;
}
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties) {
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties) {
// -- Read metaindex block
Footer footer(table_magic_number);
auto s = ReadFooterFromFile(file, file_size, &footer);

@ -103,21 +103,20 @@ bool NotifyCollectTableCollectorsOnFinish(
PropertyBlockBuilder* builder);
// Read the properties from the table.
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties);
// @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified.
Status ReadProperties(const Slice& handle_value, RandomAccessFile* file,
Env* env, Logger* logger,
TableProperties** table_properties);
// Directly read the properties from the properties block of a plain table.
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties);
// @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified.
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties);
// Read the magic number of the specified file directly. The magic number
// of a valid sst table the last 8-byte of the file.

@ -87,15 +87,15 @@ PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
const InternalKeyComparator& icomparator,
uint64_t file_size, int bloom_bits_per_key,
double hash_table_ratio,
const TableProperties& table_properties)
const TableProperties* table_properties)
: soptions_(storage_options),
internal_comparator_(icomparator),
file_size_(file_size),
kHashTableRatio(hash_table_ratio),
kBloomBitsPerKey(bloom_bits_per_key),
table_properties_(table_properties),
data_end_offset_(table_properties_.data_size),
user_key_len_(table_properties.fixed_key_len) {}
data_end_offset_(table_properties_->data_size),
user_key_len_(table_properties->fixed_key_len) {}
PlainTableReader::~PlainTableReader() {
delete[] hash_table_;
@ -117,17 +117,16 @@ Status PlainTableReader::Open(const Options& options,
return Status::NotSupported("File is too large for PlainTableReader!");
}
TableProperties table_properties;
TableProperties* props = nullptr;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
options.env, options.info_log.get(),
&table_properties);
options.env, options.info_log.get(), &props);
if (!s.ok()) {
return s;
}
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
soptions, internal_comparator, file_size, bloom_bits_per_key,
hash_table_ratio, table_properties));
std::unique_ptr<PlainTableReader> new_reader(
new PlainTableReader(soptions, internal_comparator, file_size,
bloom_bits_per_key, hash_table_ratio, props));
new_reader->file_ = std::move(file);
new_reader->options_ = options;

@ -64,13 +64,15 @@ class PlainTableReader: public TableReader {
void SetupForCompaction();
const TableProperties& GetTableProperties() { return table_properties_; }
std::shared_ptr<const TableProperties> GetTableProperties() const {
return table_properties_;
}
PlainTableReader(const EnvOptions& storage_options,
const InternalKeyComparator& internal_comparator,
uint64_t file_size, int bloom_num_bits,
double hash_table_ratio,
const TableProperties& table_properties);
const TableProperties* table_properties);
~PlainTableReader();
private:
@ -95,7 +97,7 @@ class PlainTableReader: public TableReader {
const int kBloomBitsPerKey;
DynamicBloom* bloom_ = nullptr;
TableProperties table_properties_;
std::shared_ptr<const TableProperties> table_properties_;
const uint32_t data_start_offset_ = 0;
const uint32_t data_end_offset_;
const size_t user_key_len_;

@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
namespace rocksdb {
@ -47,7 +48,7 @@ class TableReader {
// posix_fadvise
virtual void SetupForCompaction() = 0;
virtual const TableProperties& GetTableProperties() = 0;
virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
// Calls (*result_handler)(handle_context, ...) repeatedly, starting with
// the entry found after a call to Seek(key), until result_handler returns

@ -932,7 +932,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &keys,
&kvmap);
auto& props = c.table_reader()->GetTableProperties();
auto& props = *c.table_reader()->GetTableProperties();
ASSERT_EQ(kvmap.size(), props.num_entries);
auto raw_key_size = kvmap.size() * 2ul;
@ -963,7 +963,7 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &keys,
&kvmap);
auto& props = c.table_reader()->GetTableProperties();
auto& props = *c.table_reader()->GetTableProperties();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
}
@ -1005,8 +1005,7 @@ TEST(BlockBasedTableTest, IndexSizeStat) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &ks,
&kvmap);
auto index_size =
c.table_reader()->GetTableProperties().index_size;
auto index_size = c.table_reader()->GetTableProperties()->index_size;
ASSERT_GT(index_size, last_index_size);
last_index_size = index_size;
}
@ -1031,7 +1030,7 @@ TEST(BlockBasedTableTest, NumBlockStat) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &ks,
&kvmap);
ASSERT_EQ(kvmap.size(),
c.table_reader()->GetTableProperties().num_data_blocks);
c.table_reader()->GetTableProperties()->num_data_blocks);
}
class BlockCacheProperties {
@ -1237,18 +1236,19 @@ TEST(PlainTableTest, BasicPlainTableProperties) {
StringSource source(sink.contents(), 72242, true);
TableProperties props;
TableProperties* props = nullptr;
std::unique_ptr<TableProperties> props_guard;
auto s = ReadTableProperties(&source, sink.contents().size(),
kPlainTableMagicNumber, Env::Default(), nullptr,
&props);
ASSERT_OK(s);
ASSERT_EQ(0ul, props.index_size);
ASSERT_EQ(0ul, props.filter_size);
ASSERT_EQ(16ul * 26, props.raw_key_size);
ASSERT_EQ(28ul * 26, props.raw_value_size);
ASSERT_EQ(26ul, props.num_entries);
ASSERT_EQ(1ul, props.num_data_blocks);
ASSERT_EQ(0ul, props->index_size);
ASSERT_EQ(0ul, props->filter_size);
ASSERT_EQ(16ul * 26, props->raw_key_size);
ASSERT_EQ(28ul * 26, props->raw_value_size);
ASSERT_EQ(26ul, props->num_entries);
ASSERT_EQ(1ul, props->num_data_blocks);
}
TEST(GeneralTableTest, ApproximateOffsetOfPlain) {

@ -7,6 +7,7 @@
#include <map>
#include <string>
#include <vector>
#include <inttypes.h>
#include "db/dbformat.h"
#include "db/memtable.h"
@ -43,7 +44,8 @@ class SstFileReader {
bool has_to,
const std::string& to_key);
Status ReadTableProperties(TableProperties* table_properties);
Status ReadTableProperties(
std::shared_ptr<const TableProperties>* table_properties);
uint64_t GetReadNumber() { return read_num_; }
private:
@ -112,10 +114,11 @@ Status SstFileReader::NewTableReader(const std::string& file_path) {
Status SstFileReader::SetTableOptionsByMagicNumber(uint64_t table_magic_number,
RandomAccessFile* file,
uint64_t file_size) {
TableProperties table_properties;
TableProperties* table_properties;
Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number,
options_.env, options_.info_log.get(),
&table_properties);
std::unique_ptr<TableProperties> props_guard(table_properties);
if (!s.ok()) {
return s;
}
@ -126,13 +129,14 @@ Status SstFileReader::SetTableOptionsByMagicNumber(uint64_t table_magic_number,
} else if (table_magic_number == kPlainTableMagicNumber) {
options_.allow_mmap_reads = true;
options_.table_factory = std::make_shared<PlainTableFactory>(
table_properties.fixed_key_len, 2, 0.8);
table_properties->fixed_key_len, 2, 0.8);
options_.prefix_extractor = NewNoopTransform();
fprintf(stdout, "Sst file format: plain table\n");
} else {
char error_msg_buffer[80];
snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
"Unsupported table magic number --- %lx)", table_magic_number);
"Unsupported table magic number --- %#" PRIx64,
table_magic_number);
return Status::InvalidArgument(error_msg_buffer);
}
@ -192,7 +196,8 @@ Status SstFileReader::ReadSequential(bool print_kv,
return ret;
}
Status SstFileReader::ReadTableProperties(TableProperties* table_properties) {
Status SstFileReader::ReadTableProperties(
std::shared_ptr<const TableProperties>* table_properties) {
if (!table_reader_) {
return init_result_;
}
@ -335,18 +340,19 @@ int main(int argc, char** argv) {
}
}
if (show_properties) {
rocksdb::TableProperties table_properties;
std::shared_ptr<const rocksdb::TableProperties> table_properties;
st = reader.ReadTableProperties(&table_properties);
if (!st.ok()) {
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
} else {
fprintf(stdout,
"Table Properties:\n"
"------------------------------\n"
" %s", table_properties.ToString("\n ", ": ").c_str());
"Table Properties:\n"
"------------------------------\n"
" %s",
table_properties->ToString("\n ", ": ").c_str());
fprintf(stdout, "# deleted keys: %zd\n",
rocksdb::GetDeletedKeys(
table_properties.user_collected_properties));
table_properties->user_collected_properties));
}
}
}

Loading…
Cancel
Save