Extend format 3 to partitioned index/filters (#3958)

Summary:
format_version 3 changes the format of index blocks by storing user keys instead of the internal keys, which saves 8-bytes per key. This patch extends the format to top-level indexes in partitioned index/filters.
Closes https://github.com/facebook/rocksdb/pull/3958

Differential Revision: D8294615

Pulled By: maysamyabandeh

fbshipit-source-id: 17666cc16b8076c363972e2308e31547e835f0fe
main
Maysam Yabandeh 7 years ago committed by Facebook Github Bot
parent 5504a056f8
commit b73652169e
  1. 8
      db/db_test_util.cc
  2. 16
      table/block_based_table_reader.cc
  3. 17
      table/index_builder.cc
  4. 1
      table/index_builder.h
  5. 27
      table/partitioned_filter_block.cc
  6. 17
      table/partitioned_filter_block.h
  7. 17
      table/partitioned_filter_block_test.cc

@ -449,12 +449,14 @@ Options DBTestBase::GetOptions(
break; break;
} }
case kBlockBasedTableWithPartitionedIndexFormat3: { case kBlockBasedTableWithPartitionedIndexFormat3: {
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; table_options.format_version = 3;
options.prefix_extractor.reset(NewNoopTransform());
// Format 3 changes the binary index format. Since partitioned index is a // Format 3 changes the binary index format. Since partitioned index is a
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to // super-set of simple indexes, we are also using kTwoLevelIndexSearch to
// test this format. // test this format.
table_options.format_version = 3; table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
// The top-level index in partition filters are also affected by format 3.
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
table_options.partition_filters = true;
break; break;
} }
case kBlockBasedTableWithIndexRestartInterval: { case kBlockBasedTableWithIndexRestartInterval: {

@ -237,16 +237,18 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
return NewTwoLevelIterator( return NewTwoLevelIterator(
new BlockBasedTable::PartitionedIndexIteratorState( new BlockBasedTable::PartitionedIndexIteratorState(
table_, &partition_map_, index_key_includes_seq_), table_, &partition_map_, index_key_includes_seq_),
index_block_->NewIterator( index_block_->NewIterator(icomparator_,
icomparator_, icomparator_->user_comparator(), nullptr, true)); icomparator_->user_comparator(), nullptr,
true, nullptr, index_key_includes_seq_));
} else { } else {
auto ro = ReadOptions(); auto ro = ReadOptions();
ro.fill_cache = fill_cache; ro.fill_cache = fill_cache;
bool kIsIndex = true; bool kIsIndex = true;
return new BlockBasedTableIterator( return new BlockBasedTableIterator(
table_, ro, *icomparator_, table_, ro, *icomparator_,
index_block_->NewIterator( index_block_->NewIterator(icomparator_,
icomparator_, icomparator_->user_comparator(), nullptr, true), icomparator_->user_comparator(), nullptr,
true, nullptr, index_key_includes_seq_),
false, false,
/* prefix_extractor */ nullptr, kIsIndex, index_key_includes_seq_); /* prefix_extractor */ nullptr, kIsIndex, index_key_includes_seq_);
} }
@ -262,7 +264,7 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
BlockIter biter; BlockIter biter;
BlockHandle handle; BlockHandle handle;
index_block_->NewIterator(icomparator_, icomparator_->user_comparator(), index_block_->NewIterator(icomparator_, icomparator_->user_comparator(),
&biter, true); &biter, true, nullptr, index_key_includes_seq_);
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset // Read the first block offset
biter.SeekToFirst(); biter.SeekToFirst();
@ -1308,7 +1310,9 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
return new PartitionedFilterBlockReader( return new PartitionedFilterBlockReader(
rep->prefix_filtering ? prefix_extractor : nullptr, rep->prefix_filtering ? prefix_extractor : nullptr,
rep->whole_key_filtering, std::move(block), nullptr, rep->whole_key_filtering, std::move(block), nullptr,
rep->ioptions.statistics, rep->internal_comparator, this); rep->ioptions.statistics, rep->internal_comparator, this,
rep_->table_properties == nullptr ||
!rep_->table_properties->index_key_is_user_key);
} }
case Rep::FilterType::kBlockFilter: case Rep::FilterType::kBlockFilter:

@ -66,6 +66,8 @@ PartitionedIndexBuilder::PartitionedIndexBuilder(
: IndexBuilder(comparator), : IndexBuilder(comparator),
index_block_builder_(table_opt.index_block_restart_interval, index_block_builder_(table_opt.index_block_restart_interval,
table_opt.format_version), table_opt.format_version),
index_block_builder_without_seq_(table_opt.index_block_restart_interval,
table_opt.format_version),
sub_index_builder_(nullptr), sub_index_builder_(nullptr),
table_opt_(table_opt), table_opt_(table_opt),
seperator_is_key_plus_seq_(false) {} seperator_is_key_plus_seq_(false) {}
@ -149,11 +151,20 @@ Status PartitionedIndexBuilder::Finish(
std::string handle_encoding; std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding); last_partition_block_handle.EncodeTo(&handle_encoding);
index_block_builder_.Add(last_entry.key, handle_encoding); index_block_builder_.Add(last_entry.key, handle_encoding);
if (!seperator_is_key_plus_seq_) {
index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
handle_encoding);
}
entries_.pop_front(); entries_.pop_front();
} }
// If there is no sub_index left, then return the 2nd level index. // If there is no sub_index left, then return the 2nd level index.
if (UNLIKELY(entries_.empty())) { if (UNLIKELY(entries_.empty())) {
index_blocks->index_block_contents = index_block_builder_.Finish(); if (seperator_is_key_plus_seq_) {
index_blocks->index_block_contents = index_block_builder_.Finish();
} else {
index_blocks->index_block_contents =
index_block_builder_without_seq_.Finish();
}
return Status::OK(); return Status::OK();
} else { } else {
// Finish the next partition index in line and Incomplete() to indicate we // Finish the next partition index in line and Incomplete() to indicate we
@ -192,7 +203,9 @@ size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize(
uint64_t size = it->value->EstimatedSize(); uint64_t size = it->value->EstimatedSize();
BlockHandle tmp_block_handle(offset, size); BlockHandle tmp_block_handle(offset, size);
tmp_block_handle.EncodeTo(&tmp_handle_encoding); tmp_block_handle.EncodeTo(&tmp_handle_encoding);
tmp_builder.Add(it->key, tmp_handle_encoding); tmp_builder.Add(
seperator_is_key_plus_seq_ ? it->key : ExtractUserKey(it->key),
tmp_handle_encoding);
offset += size; offset += size;
} }
return tmp_builder.CurrentSizeEstimate(); return tmp_builder.CurrentSizeEstimate();

@ -368,6 +368,7 @@ class PartitionedIndexBuilder : public IndexBuilder {
}; };
std::list<Entry> entries_; // list of partitioned indexes and their keys std::list<Entry> entries_; // list of partitioned indexes and their keys
BlockBuilder index_block_builder_; // top-level index builder BlockBuilder index_block_builder_; // top-level index builder
BlockBuilder index_block_builder_without_seq_; // same for user keys
// the active partition index builder // the active partition index builder
ShortenedIndexBuilder* sub_index_builder_; ShortenedIndexBuilder* sub_index_builder_;
// the last key in the active partition index builder // the last key in the active partition index builder

@ -24,6 +24,7 @@ PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
: FullFilterBlockBuilder(prefix_extractor, whole_key_filtering, : FullFilterBlockBuilder(prefix_extractor, whole_key_filtering,
filter_bits_builder), filter_bits_builder),
index_on_filter_block_builder_(index_block_restart_interval), index_on_filter_block_builder_(index_block_restart_interval),
index_on_filter_block_builder_without_seq_(index_block_restart_interval),
p_index_builder_(p_index_builder), p_index_builder_(p_index_builder),
filters_in_partition_(0), filters_in_partition_(0),
num_added_(0) { num_added_(0) {
@ -65,6 +66,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
std::string handle_encoding; std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding); last_partition_block_handle.EncodeTo(&handle_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding); index_on_filter_block_builder_.Add(last_entry.key, handle_encoding);
if (!p_index_builder_->seperator_is_key_plus_seq()) {
index_on_filter_block_builder_without_seq_.Add(
ExtractUserKey(last_entry.key), handle_encoding);
}
filters.pop_front(); filters.pop_front();
} else { } else {
MaybeCutAFilterBlock(); MaybeCutAFilterBlock();
@ -74,7 +79,11 @@ Slice PartitionedFilterBlockBuilder::Finish(
if (UNLIKELY(filters.empty())) { if (UNLIKELY(filters.empty())) {
*status = Status::OK(); *status = Status::OK();
if (finishing_filters) { if (finishing_filters) {
return index_on_filter_block_builder_.Finish(); if (p_index_builder_->seperator_is_key_plus_seq()) {
return index_on_filter_block_builder_.Finish();
} else {
return index_on_filter_block_builder_without_seq_.Finish();
}
} else { } else {
// This is the rare case where no key was added to the filter // This is the rare case where no key was added to the filter
return Slice(); return Slice();
@ -91,12 +100,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
PartitionedFilterBlockReader::PartitionedFilterBlockReader( PartitionedFilterBlockReader::PartitionedFilterBlockReader(
const SliceTransform* prefix_extractor, bool _whole_key_filtering, const SliceTransform* prefix_extractor, bool _whole_key_filtering,
BlockContents&& contents, FilterBitsReader* /*filter_bits_reader*/, BlockContents&& contents, FilterBitsReader* /*filter_bits_reader*/,
Statistics* stats, const Comparator& comparator, Statistics* stats, const InternalKeyComparator comparator,
const BlockBasedTable* table) const BlockBasedTable* table, const bool index_key_includes_seq)
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
prefix_extractor_(prefix_extractor), prefix_extractor_(prefix_extractor),
comparator_(comparator), comparator_(comparator),
table_(table) { table_(table),
index_key_includes_seq_(index_key_includes_seq) {
idx_on_fltr_blk_.reset(new Block(std::move(contents), idx_on_fltr_blk_.reset(new Block(std::move(contents),
kDisableGlobalSequenceNumber, kDisableGlobalSequenceNumber,
0 /* read_amp_bytes_per_bit */, stats)); 0 /* read_amp_bytes_per_bit */, stats));
@ -113,7 +123,8 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length];
BlockIter biter; BlockIter biter;
BlockHandle handle; BlockHandle handle;
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true); idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
&biter, true, nullptr, index_key_includes_seq_);
biter.SeekToFirst(); biter.SeekToFirst();
for (; biter.Valid(); biter.Next()) { for (; biter.Valid(); biter.Next()) {
auto input = biter.value(); auto input = biter.value();
@ -207,7 +218,8 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
Slice PartitionedFilterBlockReader::GetFilterPartitionHandle( Slice PartitionedFilterBlockReader::GetFilterPartitionHandle(
const Slice& entry) { const Slice& entry) {
BlockIter iter; BlockIter iter;
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &iter, true); idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
&iter, true, nullptr, index_key_includes_seq_);
iter.Seek(entry); iter.Seek(entry);
if (UNLIKELY(!iter.Valid())) { if (UNLIKELY(!iter.Valid())) {
return Slice(); return Slice();
@ -269,7 +281,8 @@ void PartitionedFilterBlockReader::CacheDependencies(
auto rep = table_->rep_; auto rep = table_->rep_;
BlockIter biter; BlockIter biter;
BlockHandle handle; BlockHandle handle;
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true); idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
&biter, true, nullptr, index_key_includes_seq_);
// Index partitions are assumed to be consecuitive. Prefetch them all. // Index partitions are assumed to be consecuitive. Prefetch them all.
// Read the first block offset // Read the first block offset
biter.SeekToFirst(); biter.SeekToFirst();

@ -41,6 +41,8 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
private: private:
// Filter data // Filter data
BlockBuilder index_on_filter_block_builder_; // top-level index builder BlockBuilder index_on_filter_block_builder_; // top-level index builder
BlockBuilder
index_on_filter_block_builder_without_seq_; // same for user keys
struct FilterEntry { struct FilterEntry {
std::string key; std::string key;
Slice filter; Slice filter;
@ -68,13 +70,11 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
class PartitionedFilterBlockReader : public FilterBlockReader, class PartitionedFilterBlockReader : public FilterBlockReader,
public Cleanable { public Cleanable {
public: public:
explicit PartitionedFilterBlockReader(const SliceTransform* prefix_extractor, explicit PartitionedFilterBlockReader(
bool whole_key_filtering, const SliceTransform* prefix_extractor, bool whole_key_filtering,
BlockContents&& contents, BlockContents&& contents, FilterBitsReader* filter_bits_reader,
FilterBitsReader* filter_bits_reader, Statistics* stats, const InternalKeyComparator comparator,
Statistics* stats, const BlockBasedTable* table, const bool index_key_includes_seq);
const Comparator& comparator,
const BlockBasedTable* table);
virtual ~PartitionedFilterBlockReader(); virtual ~PartitionedFilterBlockReader();
virtual bool IsBlockBased() override { return false; } virtual bool IsBlockBased() override { return false; }
@ -98,8 +98,9 @@ class PartitionedFilterBlockReader : public FilterBlockReader,
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
std::unique_ptr<Block> idx_on_fltr_blk_; std::unique_ptr<Block> idx_on_fltr_blk_;
const Comparator& comparator_; const InternalKeyComparator comparator_;
const BlockBasedTable* table_; const BlockBasedTable* table_;
const bool index_key_includes_seq_;
std::unordered_map<uint64_t, std::unordered_map<uint64_t,
BlockBasedTable::CachableEntry<FilterBlockReader>> BlockBasedTable::CachableEntry<FilterBlockReader>>
filter_map_; filter_map_;

@ -111,7 +111,7 @@ class PartitionedFilterBlockTest : public testing::Test {
std::unique_ptr<MockedBlockBasedTable> table; std::unique_ptr<MockedBlockBasedTable> table;
PartitionedFilterBlockReader* NewReader( PartitionedFilterBlockReader* NewReader(
PartitionedFilterBlockBuilder* builder) { PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
BlockHandle bh; BlockHandle bh;
Status status; Status status;
Slice slice; Slice slice;
@ -127,13 +127,14 @@ class PartitionedFilterBlockTest : public testing::Test {
ioptions, env_options, table_options_, icomp, false))); ioptions, env_options, table_options_, icomp, false)));
auto reader = new PartitionedFilterBlockReader( auto reader = new PartitionedFilterBlockReader(
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr, nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
nullptr, *icomp.user_comparator(), table.get()); nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
return reader; return reader;
} }
void VerifyReader(PartitionedFilterBlockBuilder* builder, void VerifyReader(PartitionedFilterBlockBuilder* builder,
bool empty = false) { PartitionedIndexBuilder* pib, bool empty = false) {
std::unique_ptr<PartitionedFilterBlockReader> reader(NewReader(builder)); std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder, pib));
// Querying added keys // Querying added keys
const bool no_io = true; const bool no_io = true;
for (auto key : keys) { for (auto key : keys) {
@ -182,7 +183,7 @@ class PartitionedFilterBlockTest : public testing::Test {
builder->Add(keys[i]); builder->Add(keys[i]);
CutABlock(pib.get(), keys[i]); CutABlock(pib.get(), keys[i]);
VerifyReader(builder.get()); VerifyReader(builder.get(), pib.get());
return CountNumOfIndexPartitions(pib.get()); return CountNumOfIndexPartitions(pib.get());
} }
@ -202,7 +203,7 @@ class PartitionedFilterBlockTest : public testing::Test {
builder->Add(keys[i]); builder->Add(keys[i]);
CutABlock(pib.get(), keys[i]); CutABlock(pib.get(), keys[i]);
VerifyReader(builder.get()); VerifyReader(builder.get(), pib.get());
} }
void TestBlockPerAllKeys() { void TestBlockPerAllKeys() {
@ -220,7 +221,7 @@ class PartitionedFilterBlockTest : public testing::Test {
builder->Add(keys[i]); builder->Add(keys[i]);
CutABlock(pib.get(), keys[i]); CutABlock(pib.get(), keys[i]);
VerifyReader(builder.get()); VerifyReader(builder.get(), pib.get());
} }
void CutABlock(PartitionedIndexBuilder* builder, void CutABlock(PartitionedIndexBuilder* builder,
@ -261,7 +262,7 @@ TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder()); std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get())); std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
const bool empty = true; const bool empty = true;
VerifyReader(builder.get(), empty); VerifyReader(builder.get(), pib.get(), empty);
} }
TEST_F(PartitionedFilterBlockTest, OneBlock) { TEST_F(PartitionedFilterBlockTest, OneBlock) {

Loading…
Cancel
Save