From 6f4154d6939773d3bb8bc131f09f573d04c10f51 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Tue, 13 Jun 2017 10:59:22 -0700 Subject: [PATCH] record index partition properties Summary: When Partitioning index/filter is enabled the user might need to check the index block size as well as the top-level index size via sst_dump. This patch records i) number of partitions, ii) top-level index size and make it accessible through sst_dump. The number of partitions for filters is the same as that of indexes. The top-level index for filters has a similar size to top-level index for indexes, so it is not repeated. Closes https://github.com/facebook/rocksdb/pull/2437 Differential Revision: D5224225 Pulled By: maysamyabandeh fbshipit-source-id: 5324598c75793523aef1bb7ee225a5475e95a9cb --- include/rocksdb/table_properties.h | 6 ++++++ table/block_based_table_builder.cc | 15 +++++++++++---- table/block_based_table_reader.cc | 2 +- table/index_builder.cc | 26 +++++++++++++++++++++++++- table/index_builder.h | 2 ++ table/meta_blocks.cc | 8 ++++++++ table/table_properties.cc | 12 ++++++++++++ 7 files changed, 65 insertions(+), 6 deletions(-) diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 7e8e9ad95..6559b1f3a 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -31,6 +31,8 @@ typedef std::map UserCollectedProperties; struct TablePropertiesNames { static const std::string kDataSize; static const std::string kIndexSize; + static const std::string kIndexPartitions; + static const std::string kTopLevelIndexSize; static const std::string kFilterSize; static const std::string kRawKeySize; static const std::string kRawValueSize; @@ -134,6 +136,10 @@ struct TableProperties { uint64_t data_size = 0; // the size of index block. uint64_t index_size = 0; + // Total number of index partitions if kTwoLevelIndexSearch is used + uint64_t index_partitions = 0; + // Size of the top-level index if kTwoLevelIndexSearch is used + uint64_t top_level_index_size = 0; // the size of filter block. uint64_t filter_size = 0; // total raw key size diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index f5a2a5dd7..910a70fb2 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -249,6 +249,7 @@ struct BlockBasedTableBuilder::Rep { InternalKeySliceTransform internal_prefix_transform; std::unique_ptr index_builder; + PartitionedIndexBuilder* p_index_builder_ = nullptr; std::string last_key; const CompressionType compression_type; @@ -297,12 +298,11 @@ struct BlockBasedTableBuilder::Rep { table_options, data_block)), column_family_id(_column_family_id), column_family_name(_column_family_name) { - PartitionedIndexBuilder* p_index_builder = nullptr; if (table_options.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { - p_index_builder = PartitionedIndexBuilder::CreateIndexBuilder( + p_index_builder_ = PartitionedIndexBuilder::CreateIndexBuilder( &internal_comparator, table_options); - index_builder.reset(p_index_builder); + index_builder.reset(p_index_builder_); } else { index_builder.reset(IndexBuilder::CreateIndexBuilder( table_options.index_type, &internal_comparator, @@ -312,7 +312,7 @@ struct BlockBasedTableBuilder::Rep { filter_builder = nullptr; } else { filter_builder.reset( - CreateFilterBlockBuilder(_ioptions, table_options, p_index_builder)); + CreateFilterBlockBuilder(_ioptions, table_options, p_index_builder_)); } for (auto& collector_factories : *int_tbl_prop_collector_factories) { @@ -721,6 +721,13 @@ Status BlockBasedTableBuilder::Finish() { } property_collectors_names += "]"; r->props.property_collectors_names = property_collectors_names; + if (r->table_options.index_type == + BlockBasedTableOptions::kTwoLevelIndexSearch) { + assert(r->p_index_builder_ != nullptr); + r->props.index_partitions = r->p_index_builder_->NumPartitions(); + r->props.top_level_index_size = + r->p_index_builder_->EstimateTopLevelIndexSize(r->offset); + } // Add basic properties property_block_builder.AddTableProperty(r->props); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index a72aaeafb..67a79e940 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1851,7 +1851,7 @@ Status BlockBasedTable::CreateIndexReader( } default: { std::string error_message = - "Unrecognized index type: " + ToString(rep_->index_type); + "Unrecognized index type: " + ToString(index_type_on_file); return Status::InvalidArgument(error_message.c_str()); } } diff --git a/table/index_builder.cc b/table/index_builder.cc index 8ae4527dc..fdde77bc8 100644 --- a/table/index_builder.cc +++ b/table/index_builder.cc @@ -146,14 +146,38 @@ Status PartitionedIndexBuilder::Finish( } } +// Estimate size excluding the top-level index +// It is assumed that this method is called before writing index partition +// starts size_t PartitionedIndexBuilder::EstimatedSize() const { size_t total = 0; for (auto it = entries_.begin(); it != entries_.end(); ++it) { total += it->value->EstimatedSize(); } - total += index_block_builder_.CurrentSizeEstimate(); total += sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize(); return total; } + +// Since when this method is called we do not know the index block offsets yet, +// the top-level index does not exist. Hence we estimate the block offsets and +// create a temporary top-level index. +size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize( + uint64_t offset) const { + BlockBuilder tmp_builder( + table_opt_.index_block_restart_interval); // tmp top-level index builder + for (auto it = entries_.begin(); it != entries_.end(); ++it) { + std::string tmp_handle_encoding; + uint64_t size = it->value->EstimatedSize(); + BlockHandle tmp_block_handle(offset, size); + tmp_block_handle.EncodeTo(&tmp_handle_encoding); + tmp_builder.Add(it->key, tmp_handle_encoding); + offset += size; + } + return tmp_builder.CurrentSizeEstimate(); +} + +size_t PartitionedIndexBuilder::NumPartitions() const { + return entries_.size(); +} } // namespace rocksdb diff --git a/table/index_builder.h b/table/index_builder.h index 14f2512b2..1871abfc5 100644 --- a/table/index_builder.h +++ b/table/index_builder.h @@ -300,6 +300,8 @@ class PartitionedIndexBuilder : public IndexBuilder { const BlockHandle& last_partition_block_handle) override; virtual size_t EstimatedSize() const override; + size_t EstimateTopLevelIndexSize(uint64_t) const; + size_t NumPartitions() const; inline bool ShouldCutFilterBlock() { // Current policy is to align the partitions of index and filters diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 5011c7b39..6af536fbc 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -67,6 +67,10 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); Add(TablePropertiesNames::kDataSize, props.data_size); Add(TablePropertiesNames::kIndexSize, props.index_size); + if (props.index_partitions != 0) { + Add(TablePropertiesNames::kIndexPartitions, props.index_partitions); + Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size); + } Add(TablePropertiesNames::kNumEntries, props.num_entries); Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); Add(TablePropertiesNames::kFilterSize, props.filter_size); @@ -187,6 +191,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, std::unordered_map predefined_uint64_properties = { {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, {TablePropertiesNames::kIndexSize, &new_table_properties->index_size}, + {TablePropertiesNames::kIndexPartitions, + &new_table_properties->index_partitions}, + {TablePropertiesNames::kTopLevelIndexSize, + &new_table_properties->top_level_index_size}, {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size}, {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size}, {TablePropertiesNames::kRawValueSize, diff --git a/table/table_properties.cc b/table/table_properties.cc index bdf6eed4c..b03928e88 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -93,6 +93,12 @@ std::string TableProperties::ToString( AppendProperty(result, "data block size", data_size, prop_delim, kv_delim); AppendProperty(result, "index block size", index_size, prop_delim, kv_delim); + if (index_partitions != 0) { + AppendProperty(result, "# index partitions", index_partitions, prop_delim, + kv_delim); + AppendProperty(result, "top-level index size", top_level_index_size, prop_delim, + kv_delim); + } AppendProperty(result, "filter block size", filter_size, prop_delim, kv_delim); AppendProperty(result, "(estimated) table size", @@ -139,6 +145,8 @@ std::string TableProperties::ToString( void TableProperties::Add(const TableProperties& tp) { data_size += tp.data_size; index_size += tp.index_size; + index_partitions += tp.index_partitions; + top_level_index_size += tp.top_level_index_size; filter_size += tp.filter_size; raw_key_size += tp.raw_key_size; raw_value_size += tp.raw_value_size; @@ -150,6 +158,10 @@ const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size"; const std::string TablePropertiesNames::kIndexSize = "rocksdb.index.size"; +const std::string TablePropertiesNames::kIndexPartitions = + "rocksdb.index.partitions"; +const std::string TablePropertiesNames::kTopLevelIndexSize = + "rocksdb.top-level.index.size"; const std::string TablePropertiesNames::kFilterSize = "rocksdb.filter.size"; const std::string TablePropertiesNames::kRawKeySize =