Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070)

Summary:
Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073).

Context:
Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written.
- Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object
- For PartitionedFilter:
   - Unified `filters` and `filter_gc` lists into one `std::deque<FilterEntry> filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry
   - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter`
- Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload
- This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070

Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run

Reviewed By: pdillinger

Differential Revision: D31884933

Pulled By: hx235

fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4
main
Hui Xiao 3 years ago committed by Facebook GitHub Bot
parent a64c8ca7a8
commit 1ababeb76a
  1. 3
      HISTORY.md
  2. 7
      table/block_based/block_based_filter_block.cc
  3. 4
      table/block_based/block_based_filter_block.h
  4. 11
      table/block_based/block_based_table_builder.cc
  5. 9
      table/block_based/filter_block.h
  6. 9
      table/block_based/full_filter_block.cc
  7. 4
      table/block_based/full_filter_block.h
  8. 26
      table/block_based/partitioned_filter_block.cc
  9. 13
      table/block_based/partitioned_filter_block.h
  10. 3
      table/block_based/partitioned_filter_block_test.cc

@ -25,6 +25,9 @@
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
### Performance Improvements
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
## 6.26.0 (2021-10-20)
### Bug Fixes
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.

@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
}
}
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
// In this impl we ignore BlockHandle
Slice BlockBasedFilterBlockBuilder::Finish(
const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* /* filter_data */) {
// In this impl we ignore BlockHandle and filter_data
*status = Status::OK();
if (!start_.empty()) {

@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
return start_.empty() && filter_offsets_.empty();
}
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish;
private:

@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock(
rep_->filter_builder->EstimateEntriesAdded();
Status s = Status::Incomplete();
while (ok() && s.IsIncomplete()) {
// filter_data is used to store the transferred filter data payload from
// FilterBlockBuilder and deallocate the payload by going out of scope.
// Otherwise, the payload will unnecessarily remain until
// BlockBasedTableBuilder is deallocated.
//
// See FilterBlockBuilder::Finish() for more on the difference in
// transferred filter data payload among different FilterBlockBuilder
// subtypes.
std::unique_ptr<const char[]> filter_data;
Slice filter_content =
rep_->filter_builder->Finish(filter_block_handle, &s);
rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
assert(s.ok() || s.IsIncomplete());
rep_->props.filter_size += filter_content.size();
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,

@ -73,7 +73,14 @@ class FilterBlockBuilder {
assert(dont_care_status.ok());
return ret;
}
virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0;
// If filter_data is not nullptr, Finish() may transfer ownership of
// underlying filter data to the caller, so that it can be freed as soon as
// possible.
virtual Slice Finish(
const BlockHandle& tmp /* only used in PartitionedFilterBlock as
last_partition_block_handle */
,
Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
};
// A FilterBlockReader is used to parse filter from SST table.

@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() {
last_prefix_recorded_ = false;
}
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
Slice FullFilterBlockBuilder::Finish(
const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* filter_data) {
Reset();
// In this impl we ignore BlockHandle
*status = Status::OK();
if (any_added_) {
any_added_ = false;
return filter_bits_builder_->Finish(&filter_data_);
Slice filter_content =
filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_);
return filter_content;
}
return Slice();
}

@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
virtual void Add(const Slice& key_without_ts) override;
virtual bool IsEmpty() const override { return !any_added_; }
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish;
protected:

@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
if (!p_index_builder_->ShouldCutFilterBlock()) {
return;
}
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
// Add the prefix of the next key before finishing the partition without
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3
@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
}
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
std::unique_ptr<const char[]> filter_data;
Slice filter = filter_bits_builder_->Finish(&filter_data);
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
filters.push_back({index_key, filter, std::move(filter_data)});
keys_added_to_partition_ = 0;
Reset();
}
@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
}
Slice PartitionedFilterBlockBuilder::Finish(
const BlockHandle& last_partition_block_handle, Status* status) {
const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data) {
if (finishing_filters == true) {
// Record the handle of the last written filter block in the index
FilterEntry& last_entry = filters.front();
std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding;
@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
&handle_delta_encoding_slice);
if (!p_index_builder_->seperator_is_key_plus_seq()) {
index_on_filter_block_builder_without_seq_.Add(
ExtractUserKey(last_entry.key), handle_encoding,
ExtractUserKey(last_filter_entry_key), handle_encoding,
&handle_delta_encoding_slice);
}
filters.pop_front();
} else {
MaybeCutAFilterBlock(nullptr);
}
@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish(
// partitions
if (UNLIKELY(filters.empty())) {
*status = Status::OK();
last_filter_data.reset();
if (finishing_filters) {
// Simplest to just add them all at the end
total_added_in_built_ = 0;
@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish(
// indicate we expect more calls to Finish
*status = Status::Incomplete();
finishing_filters = true;
return filters.front().filter;
last_filter_entry_key = filters.front().key;
Slice filter = filters.front().filter;
last_filter_data = std::move(filters.front().filter_data);
if (filter_data != nullptr) {
*filter_data = std::move(last_filter_data);
}
filters.pop_front();
return filter;
}
}

@ -5,6 +5,7 @@
#pragma once
#include <deque>
#include <list>
#include <string>
#include <unordered_map>
@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
void Add(const Slice& key) override;
size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& last_partition_block_handle,
Status* status) override;
virtual Slice Finish(
const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
private:
// Filter data
@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
struct FilterEntry {
std::string key;
Slice filter;
std::unique_ptr<const char[]> filter_data;
};
std::list<FilterEntry> filters; // list of partitioned indexes and their keys
std::deque<FilterEntry> filters; // list of partitioned filters and keys used
// in building the index
std::string last_filter_entry_key;
std::unique_ptr<const char[]> last_filter_data;
std::unique_ptr<IndexBuilder> value;
std::vector<std::unique_ptr<const char[]>> filter_gc;
bool finishing_filters =
false; // true if Finish is called once but not complete yet.
// The policy of when cut a filter block and Finish it

@ -136,8 +136,9 @@ class PartitionedFilterBlockTest
BlockHandle bh;
Status status;
Slice slice;
std::unique_ptr<const char[]> filter_data;
do {
slice = builder->Finish(bh, &status);
slice = builder->Finish(bh, &status, &filter_data);
bh = Write(slice);
} while (status.IsIncomplete());

Loading…
Cancel
Save