Fix memtable-only iterator regression (#10705)

Summary:
when there is a single memtable without range tombstones and no SST files in the database, DBIter should wrap memtable iterator directly. Currently we create a merging iterator on top of the memtable iterator, and have DBIter wrap around it. This causes iterator regression and this PR fixes this issue.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10705

Test Plan:
- `make check`
- Performance:
  - Set up: `./db_bench -benchmarks=filluniquerandom -write_buffer_size=$((1 << 30)) -num=10000`
  - Benchmark: `./db_bench -benchmarks=seekrandom -use_existing_db=true -avoid_flush_during_recovery=true -write_buffer_size=$((1 << 30)) -num=10000 -threads=16 -duration=60 -seek_nexts=$seek_nexts`
```
seek_nexts    main op/sec    https://github.com/facebook/rocksdb/issues/10705      RocksDB v7.6
0             5746568        5749033     5786180
30            2411690        3006466     2837699
1000          102556         128902      124667
```

Reviewed By: ajkr

Differential Revision: D39644221

Pulled By: cbi42

fbshipit-source-id: 8063ff611ba31b0e5670041da3927c8c54b2097d
main
Changyu Bi 2 years ago committed by Facebook GitHub Bot
parent 9e01de9066
commit 749b849a34
  1. 68
      db/arena_wrapped_db_iter.cc
  2. 2
      db/arena_wrapped_db_iter.h
  3. 3
      db/column_family.cc
  4. 28
      db/db_impl/db_impl.cc
  5. 1
      db/db_range_del_test.cc
  6. 54
      db/memtable_list.cc
  7. 6
      db/memtable_list.h
  8. 67
      db/version_set.cc
  9. 41
      table/merging_iterator.cc
  10. 26
      table/merging_iterator.h

@ -58,48 +58,60 @@ Status ArenaWrappedDBIter::Refresh() {
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
while (true) { auto reinit_internal_iter = [&]() {
if (sv_number_ != cur_sv_number) { Env* env = db_iter_->env();
Env* env = db_iter_->env(); db_iter_->~DBIter();
db_iter_->~DBIter(); arena_.~Arena();
arena_.~Arena(); new (&arena_) Arena();
new (&arena_) Arena();
SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
if (read_callback_) { if (read_callback_) {
read_callback_->Refresh(latest_seq); read_callback_->Refresh(latest_seq);
} }
Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
sv->current, latest_seq, sv->current, latest_seq,
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
allow_refresh_); allow_refresh_);
InternalIterator* internal_iter = db_impl_->NewInternalIterator( InternalIterator* internal_iter = db_impl_->NewInternalIterator(
read_options_, cfd_, sv, &arena_, latest_seq, read_options_, cfd_, sv, &arena_, latest_seq,
/* allow_unprepared_value */ true, /* db_iter */ this); /* allow_unprepared_value */ true, /* db_iter */ this);
SetIterUnderDBIter(internal_iter); SetIterUnderDBIter(internal_iter);
};
while (true) {
if (sv_number_ != cur_sv_number) {
reinit_internal_iter();
break; break;
} else { } else {
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
// Refresh range-tombstones in MemTable // Refresh range-tombstones in MemTable
if (!read_options_.ignore_range_deletions) { if (!read_options_.ignore_range_deletions) {
assert(memtable_range_tombstone_iter_ != nullptr); SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
if (memtable_range_tombstone_iter_ != nullptr) { auto t = sv->mem->NewRangeTombstoneIterator(
SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_); read_options_, latest_seq, false /* immutable_memtable */);
auto t = sv->mem->NewRangeTombstoneIterator( if (!t || t->empty()) {
read_options_, latest_seq, false /* immutable_memtable */); if (memtable_range_tombstone_iter_) {
delete *memtable_range_tombstone_iter_;
if (t == nullptr || t->empty()) {
*memtable_range_tombstone_iter_ = nullptr; *memtable_range_tombstone_iter_ = nullptr;
}
delete t;
} else { // current mutable memtable has range tombstones
if (!memtable_range_tombstone_iter_) {
delete t;
cfd_->ReturnThreadLocalSuperVersion(sv);
// The memtable under DBIter did not have range tombstone before
// refresh.
reinit_internal_iter();
break;
} else { } else {
delete *memtable_range_tombstone_iter_;
*memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator( *memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
std::unique_ptr<FragmentedRangeTombstoneIterator>(t), std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
&cfd_->internal_comparator(), nullptr, nullptr); &cfd_->internal_comparator(), nullptr, nullptr);
} }
cfd_->ReturnThreadLocalSuperVersion(sv);
} }
cfd_->ReturnThreadLocalSuperVersion(sv);
} }
// Refresh latest sequence number // Refresh latest sequence number
db_iter_->set_sequence(latest_seq); db_iter_->set_sequence(latest_seq);

@ -107,6 +107,8 @@ class ArenaWrappedDBIter : public Iterator {
ReadCallback* read_callback_; ReadCallback* read_callback_;
bool expose_blob_index_ = false; bool expose_blob_index_ = false;
bool allow_refresh_ = true; bool allow_refresh_ = true;
// If this is nullptr, it means the mutable memtable does not contain range
// tombstone when added under this DBIter.
TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr; TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr;
}; };

@ -1144,7 +1144,8 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena); MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
merge_iter_builder.AddIterator( merge_iter_builder.AddIterator(
super_version->mem->NewIterator(read_opts, &arena)); super_version->mem->NewIterator(read_opts, &arena));
super_version->imm->AddIterators(read_opts, &merge_iter_builder); super_version->imm->AddIterators(read_opts, &merge_iter_builder,
false /* add_range_tombstone_iter */);
ScopedArenaIterator memtable_iter(merge_iter_builder.Finish()); ScopedArenaIterator memtable_iter(merge_iter_builder.Finish());
auto read_seq = super_version->current->version_set()->LastSequence(); auto read_seq = super_version->current->version_set()->LastSequence();

@ -1803,31 +1803,31 @@ InternalIterator* DBImpl::NewInternalIterator(
&cfd->internal_comparator(), arena, &cfd->internal_comparator(), arena,
!read_options.total_order_seek && !read_options.total_order_seek &&
super_version->mutable_cf_options.prefix_extractor != nullptr); super_version->mutable_cf_options.prefix_extractor != nullptr);
// Collect iterator for mutable mem // Collect iterator for mutable memtable
merge_iter_builder.AddIterator( auto mem_iter = super_version->mem->NewIterator(read_options, arena);
super_version->mem->NewIterator(read_options, arena));
Status s; Status s;
if (!read_options.ignore_range_deletions) { if (!read_options.ignore_range_deletions) {
TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
auto range_del_iter = super_version->mem->NewRangeTombstoneIterator( auto range_del_iter = super_version->mem->NewRangeTombstoneIterator(
read_options, sequence, false /* immutable_memtable */); read_options, sequence, false /* immutable_memtable */);
if (range_del_iter == nullptr || range_del_iter->empty()) { if (range_del_iter == nullptr || range_del_iter->empty()) {
delete range_del_iter; delete range_del_iter;
merge_iter_builder.AddRangeTombstoneIterator(nullptr);
} else { } else {
merge_iter_builder.AddRangeTombstoneIterator( mem_tombstone_iter = new TruncatedRangeDelIterator(
new TruncatedRangeDelIterator( std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter), &cfd->ioptions()->internal_comparator, nullptr /* smallest */,
&cfd->ioptions()->internal_comparator, nullptr /* smallest */, nullptr /* largest */);
nullptr /* largest */));
} }
merge_iter_builder.AddPointAndTombstoneIterator(mem_iter,
mem_tombstone_iter);
} else {
merge_iter_builder.AddIterator(mem_iter);
} }
// Collect all needed child iterators for immutable memtables // Collect all needed child iterators for immutable memtables
if (s.ok()) { if (s.ok()) {
super_version->imm->AddIterators(read_options, &merge_iter_builder); super_version->imm->AddIterators(read_options, &merge_iter_builder,
if (!read_options.ignore_range_deletions) { !read_options.ignore_range_deletions);
s = super_version->imm->AddRangeTombstoneIterators(read_options, arena,
merge_iter_builder);
}
} }
TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s); TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s);
if (s.ok()) { if (s.ok()) {

@ -2303,7 +2303,6 @@ TEST_F(DBRangeDelTest, TombstoneOnlyLevel) {
InternalIterator* level_iter = sv->current->TEST_GetLevelIterator( InternalIterator* level_iter = sv->current->TEST_GetLevelIterator(
read_options, &merge_iter_builder, 1 /* level */, true); read_options, &merge_iter_builder, 1 /* level */, true);
// This is needed to make LevelIterator range tombstone aware // This is needed to make LevelIterator range tombstone aware
merge_iter_builder.AddIterator(level_iter);
auto miter = merge_iter_builder.Finish(); auto miter = merge_iter_builder.Finish();
auto k = Key(3); auto k = Key(3);
IterKey target; IterKey target;

@ -210,30 +210,6 @@ Status MemTableListVersion::AddRangeTombstoneIterators(
return Status::OK(); return Status::OK();
} }
Status MemTableListVersion::AddRangeTombstoneIterators(
const ReadOptions& read_opts, Arena* /*arena*/,
MergeIteratorBuilder& builder) {
// Except for snapshot read, using kMaxSequenceNumber is OK because these
// are immutable memtables.
SequenceNumber read_seq = read_opts.snapshot != nullptr
? read_opts.snapshot->GetSequenceNumber()
: kMaxSequenceNumber;
for (auto& m : memlist_) {
auto range_del_iter = m->NewRangeTombstoneIterator(
read_opts, read_seq, true /* immutale_memtable */);
if (range_del_iter == nullptr || range_del_iter->empty()) {
delete range_del_iter;
builder.AddRangeTombstoneIterator(nullptr);
} else {
builder.AddRangeTombstoneIterator(new TruncatedRangeDelIterator(
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
&m->GetInternalKeyComparator(), nullptr /* smallest */,
nullptr /* largest */));
}
}
return Status::OK();
}
void MemTableListVersion::AddIterators( void MemTableListVersion::AddIterators(
const ReadOptions& options, std::vector<InternalIterator*>* iterator_list, const ReadOptions& options, std::vector<InternalIterator*>* iterator_list,
Arena* arena) { Arena* arena) {
@ -242,11 +218,33 @@ void MemTableListVersion::AddIterators(
} }
} }
void MemTableListVersion::AddIterators( void MemTableListVersion::AddIterators(const ReadOptions& options,
const ReadOptions& options, MergeIteratorBuilder* merge_iter_builder) { MergeIteratorBuilder* merge_iter_builder,
bool add_range_tombstone_iter) {
for (auto& m : memlist_) { for (auto& m : memlist_) {
merge_iter_builder->AddIterator( auto mem_iter = m->NewIterator(options, merge_iter_builder->GetArena());
m->NewIterator(options, merge_iter_builder->GetArena())); if (!add_range_tombstone_iter || options.ignore_range_deletions) {
merge_iter_builder->AddIterator(mem_iter);
} else {
// Except for snapshot read, using kMaxSequenceNumber is OK because these
// are immutable memtables.
SequenceNumber read_seq = options.snapshot != nullptr
? options.snapshot->GetSequenceNumber()
: kMaxSequenceNumber;
TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
auto range_del_iter = m->NewRangeTombstoneIterator(
options, read_seq, true /* immutale_memtable */);
if (range_del_iter == nullptr || range_del_iter->empty()) {
delete range_del_iter;
} else {
mem_tombstone_iter = new TruncatedRangeDelIterator(
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
&m->GetInternalKeyComparator(), nullptr /* smallest */,
nullptr /* largest */);
}
merge_iter_builder->AddPointAndTombstoneIterator(mem_iter,
mem_tombstone_iter);
}
} }
} }

@ -111,15 +111,13 @@ class MemTableListVersion {
Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena, Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena,
RangeDelAggregator* range_del_agg); RangeDelAggregator* range_del_agg);
Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena,
MergeIteratorBuilder& builder);
void AddIterators(const ReadOptions& options, void AddIterators(const ReadOptions& options,
std::vector<InternalIterator*>* iterator_list, std::vector<InternalIterator*>* iterator_list,
Arena* arena); Arena* arena);
void AddIterators(const ReadOptions& options, void AddIterators(const ReadOptions& options,
MergeIteratorBuilder* merge_iter_builder); MergeIteratorBuilder* merge_iter_builder,
bool add_range_tombstone_iter);
uint64_t GetTotalNumEntries() const; uint64_t GetTotalNumEntries() const;

@ -941,18 +941,18 @@ namespace {
class LevelIterator final : public InternalIterator { class LevelIterator final : public InternalIterator {
public: public:
// @param read_options Must outlive this iterator. // @param read_options Must outlive this iterator.
LevelIterator(TableCache* table_cache, const ReadOptions& read_options, LevelIterator(
const FileOptions& file_options, TableCache* table_cache, const ReadOptions& read_options,
const InternalKeyComparator& icomparator, const FileOptions& file_options, const InternalKeyComparator& icomparator,
const LevelFilesBrief* flevel, const LevelFilesBrief* flevel,
const std::shared_ptr<const SliceTransform>& prefix_extractor, const std::shared_ptr<const SliceTransform>& prefix_extractor,
bool should_sample, HistogramImpl* file_read_hist, bool should_sample, HistogramImpl* file_read_hist,
TableReaderCaller caller, bool skip_filters, int level, TableReaderCaller caller, bool skip_filters, int level,
RangeDelAggregator* range_del_agg, RangeDelAggregator* range_del_agg,
const std::vector<AtomicCompactionUnitBoundary>* const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
compaction_boundaries = nullptr, nullptr,
bool allow_unprepared_value = false, bool allow_unprepared_value = false,
MergeIteratorBuilder* merge_iter_builder = nullptr) TruncatedRangeDelIterator**** range_tombstone_iter_ptr_ = nullptr)
: table_cache_(table_cache), : table_cache_(table_cache),
read_options_(read_options), read_options_(read_options),
file_options_(file_options), file_options_(file_options),
@ -975,10 +975,8 @@ class LevelIterator final : public InternalIterator {
to_return_sentinel_(false) { to_return_sentinel_(false) {
// Empty level is not supported. // Empty level is not supported.
assert(flevel_ != nullptr && flevel_->num_files > 0); assert(flevel_ != nullptr && flevel_->num_files > 0);
if (merge_iter_builder && !read_options.ignore_range_deletions) { if (range_tombstone_iter_ptr_) {
// lazily initialize range_tombstone_iter_ together with file_iter_ *range_tombstone_iter_ptr_ = &range_tombstone_iter_;
merge_iter_builder->AddRangeTombstoneIterator(nullptr,
&range_tombstone_iter_);
} }
} }
@ -1840,14 +1838,22 @@ InternalIterator* Version::TEST_GetLevelIterator(
int level, bool allow_unprepared_value) { int level, bool allow_unprepared_value) {
auto* arena = merge_iter_builder->GetArena(); auto* arena = merge_iter_builder->GetArena();
auto* mem = arena->AllocateAligned(sizeof(LevelIterator)); auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
return new (mem) LevelIterator( TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
auto level_iter = new (mem) LevelIterator(
cfd_->table_cache(), read_options, file_options_, cfd_->table_cache(), read_options, file_options_,
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
mutable_cf_options_.prefix_extractor, should_sample_file_read(), mutable_cf_options_.prefix_extractor, should_sample_file_read(),
cfd_->internal_stats()->GetFileReadHist(level), cfd_->internal_stats()->GetFileReadHist(level),
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
nullptr /* range_del_agg */, nullptr /* compaction_boundaries */, nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
allow_unprepared_value, merge_iter_builder); allow_unprepared_value, &tombstone_iter_ptr);
if (read_options.ignore_range_deletions) {
merge_iter_builder->AddIterator(level_iter);
} else {
merge_iter_builder->AddPointAndTombstoneIterator(
level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
}
return level_iter;
} }
uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const { uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const {
@ -1927,10 +1933,10 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
auto* arena = merge_iter_builder->GetArena(); auto* arena = merge_iter_builder->GetArena();
if (level == 0) { if (level == 0) {
// Merge all level zero files together since they may overlap // Merge all level zero files together since they may overlap
TruncatedRangeDelIterator* iter = nullptr; TruncatedRangeDelIterator* tombstone_iter = nullptr;
for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) { for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) {
const auto& file = storage_info_.LevelFilesBrief(0).files[i]; const auto& file = storage_info_.LevelFilesBrief(0).files[i];
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( auto table_iter = cfd_->table_cache()->NewIterator(
read_options, soptions, cfd_->internal_comparator(), read_options, soptions, cfd_->internal_comparator(),
*file.file_metadata, /*range_del_agg=*/nullptr, *file.file_metadata, /*range_del_agg=*/nullptr,
mutable_cf_options_.prefix_extractor, nullptr, mutable_cf_options_.prefix_extractor, nullptr,
@ -1938,9 +1944,13 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
TableReaderCaller::kUserIterator, arena, TableReaderCaller::kUserIterator, arena,
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
/*smallest_compaction_key=*/nullptr, /*smallest_compaction_key=*/nullptr,
/*largest_compaction_key=*/nullptr, allow_unprepared_value, &iter)); /*largest_compaction_key=*/nullptr, allow_unprepared_value,
if (!read_options.ignore_range_deletions) { &tombstone_iter);
merge_iter_builder->AddRangeTombstoneIterator(iter); if (read_options.ignore_range_deletions) {
merge_iter_builder->AddIterator(table_iter);
} else {
merge_iter_builder->AddPointAndTombstoneIterator(table_iter,
tombstone_iter);
} }
} }
if (should_sample) { if (should_sample) {
@ -1957,14 +1967,21 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
// walks through the non-overlapping files in the level, opening them // walks through the non-overlapping files in the level, opening them
// lazily. // lazily.
auto* mem = arena->AllocateAligned(sizeof(LevelIterator)); auto* mem = arena->AllocateAligned(sizeof(LevelIterator));
merge_iter_builder->AddIterator(new (mem) LevelIterator( TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr;
auto level_iter = new (mem) LevelIterator(
cfd_->table_cache(), read_options, soptions, cfd_->table_cache(), read_options, soptions,
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level),
mutable_cf_options_.prefix_extractor, should_sample_file_read(), mutable_cf_options_.prefix_extractor, should_sample_file_read(),
cfd_->internal_stats()->GetFileReadHist(level), cfd_->internal_stats()->GetFileReadHist(level),
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
/*range_del_agg=*/nullptr, /*compaction_boundaries=*/nullptr, /*range_del_agg=*/nullptr, /*compaction_boundaries=*/nullptr,
allow_unprepared_value, merge_iter_builder)); allow_unprepared_value, &tombstone_iter_ptr);
if (read_options.ignore_range_deletions) {
merge_iter_builder->AddIterator(level_iter);
} else {
merge_iter_builder->AddPointAndTombstoneIterator(
level_iter, nullptr /* tombstone_iter */, tombstone_iter_ptr);
}
} }
} }

@ -1295,22 +1295,40 @@ void MergeIteratorBuilder::AddIterator(InternalIterator* iter) {
} }
} }
void MergeIteratorBuilder::AddRangeTombstoneIterator( void MergeIteratorBuilder::AddPointAndTombstoneIterator(
TruncatedRangeDelIterator* iter, TruncatedRangeDelIterator*** iter_ptr) { InternalIterator* point_iter, TruncatedRangeDelIterator* tombstone_iter,
if (!use_merging_iter) { TruncatedRangeDelIterator*** tombstone_iter_ptr) {
// tombstone_iter_ptr != nullptr means point_iter is a LevelIterator.
bool add_range_tombstone = tombstone_iter ||
!merge_iter->range_tombstone_iters_.empty() ||
tombstone_iter_ptr;
if (!use_merging_iter && (add_range_tombstone || first_iter)) {
use_merging_iter = true; use_merging_iter = true;
if (first_iter) { if (first_iter) {
merge_iter->AddIterator(first_iter); merge_iter->AddIterator(first_iter);
first_iter = nullptr; first_iter = nullptr;
} }
} }
merge_iter->AddRangeTombstoneIterator(iter); if (use_merging_iter) {
if (iter_ptr) { merge_iter->AddIterator(point_iter);
// This is needed instead of setting to &range_tombstone_iters_[i] directly if (add_range_tombstone) {
// here since the memory address of range_tombstone_iters_[i] might change // If there was a gap, fill in nullptr as empty range tombstone iterators.
// during vector resizing. while (merge_iter->range_tombstone_iters_.size() <
range_del_iter_ptrs_.emplace_back( merge_iter->children_.size() - 1) {
merge_iter->range_tombstone_iters_.size() - 1, iter_ptr); merge_iter->AddRangeTombstoneIterator(nullptr);
}
merge_iter->AddRangeTombstoneIterator(tombstone_iter);
}
if (tombstone_iter_ptr) {
// This is needed instead of setting to &range_tombstone_iters_[i]
// directly here since the memory address of range_tombstone_iters_[i]
// might change during vector resizing.
range_del_iter_ptrs_.emplace_back(
merge_iter->range_tombstone_iters_.size() - 1, tombstone_iter_ptr);
}
} else {
first_iter = point_iter;
} }
} }
@ -1323,8 +1341,7 @@ InternalIterator* MergeIteratorBuilder::Finish(ArenaWrappedDBIter* db_iter) {
for (auto& p : range_del_iter_ptrs_) { for (auto& p : range_del_iter_ptrs_) {
*(p.second) = &(merge_iter->range_tombstone_iters_[p.first]); *(p.second) = &(merge_iter->range_tombstone_iters_[p.first]);
} }
if (db_iter) { if (db_iter && !merge_iter->range_tombstone_iters_.empty()) {
assert(!merge_iter->range_tombstone_iters_.empty());
// memtable is always the first level // memtable is always the first level
db_iter->SetMemtableRangetombstoneIter( db_iter->SetMemtableRangetombstoneIter(
&merge_iter->range_tombstone_iters_.front()); &merge_iter->range_tombstone_iters_.front());

@ -38,6 +38,8 @@ extern InternalIterator* NewMergingIterator(
class MergingIterator; class MergingIterator;
// A builder class to build a merging iterator by adding iterators one by one. // A builder class to build a merging iterator by adding iterators one by one.
// User should call only one of AddIterator() or AddPointAndTombstoneIterator()
// exclusively for the same builder.
class MergeIteratorBuilder { class MergeIteratorBuilder {
public: public:
// comparator: the comparator used in merging comparator // comparator: the comparator used in merging comparator
@ -49,16 +51,20 @@ class MergeIteratorBuilder {
// Add iter to the merging iterator. // Add iter to the merging iterator.
void AddIterator(InternalIterator* iter); void AddIterator(InternalIterator* iter);
// Add a range tombstone iterator to underlying merge iterator. // Add a point key iterator and a range tombstone iterator.
// See MergingIterator::AddRangeTombstoneIterator() for more detail. // `tombstone_iter_ptr` should and only be set by LevelIterator.
// // *tombstone_iter_ptr will be set to where the merging iterator stores
// If `iter_ptr` is not nullptr, *iter_ptr will be set to where the merging // `tombstone_iter` when MergeIteratorBuilder::Finish() is called. This is
// iterator stores `iter` when MergeIteratorBuilder::Finish() is called. This // used by LevelIterator to update range tombstone iters when switching to a
// is used by level iterator to update range tombstone iters when switching to // different SST file. If a single point iterator with a nullptr range
// a different SST file. // tombstone iterator is provided, and the point iterator is not a level
void AddRangeTombstoneIterator( // iterator, then this builder will return the point iterator directly,
TruncatedRangeDelIterator* iter, // instead of creating a merging iterator on top of it. Internally, if all
TruncatedRangeDelIterator*** iter_ptr = nullptr); // point iterators are not LevelIterator, then range tombstone iterator is
// only added to the merging iter if there is a non-null `tombstone_iter`.
void AddPointAndTombstoneIterator(
InternalIterator* point_iter, TruncatedRangeDelIterator* tombstone_iter,
TruncatedRangeDelIterator*** tombstone_iter_ptr = nullptr);
// Get arena used to build the merging iterator. It is called one a child // Get arena used to build the merging iterator. It is called one a child
// iterator needs to be allocated. // iterator needs to be allocated.

Loading…
Cancel
Save