Lazily initialize RangeDelAggregator stripe map entries (#4497)

Summary:
When there are no range deletions, flush and compaction perform a binary search
on an effectively empty map every time they call ShouldDelete. This PR lazily
initializes each stripe map entry so that the binary search can be elided in
these cases.

After this PR, the total amount of time spent in compactions is 52.541331s, and the total amount of time spent in flush is 5.532608s, the former of which is a significant improvement from the results after #4495.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4497

Differential Revision: D10428610

Pulled By: abhimadan

fbshipit-source-id: 6f7e1ce3698fac3ef86d1197955e6b72e0931a0f
main
Abhishek Madan 6 years ago committed by Facebook Github Bot
parent d6ec288703
commit 45f213b558
  1. 32
      db/db_range_del_test.cc
  2. 56
      db/range_del_aggregator.cc
  3. 10
      db/range_del_aggregator.h

@ -1357,6 +1357,38 @@ TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) {
db_->ReleaseSnapshot(snapshot); db_->ReleaseSnapshot(snapshot);
} }
TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) {
const int kFileBytes = 1 << 20;
Options options = CurrentOptions();
options.compression = kNoCompression;
options.disable_auto_compactions = true;
options.target_file_size_base = kFileBytes;
Reopen(options);
ASSERT_OK(Put(Key(0), "a"));
const Snapshot* snapshot = db_->GetSnapshot();
ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
Key(10)));
db_->Flush(FlushOptions());
ReadOptions read_opts;
read_opts.snapshot = snapshot;
auto* iter = db_->NewIterator(read_opts);
iter->SeekToFirst();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(Key(0), iter->key());
iter->Next();
ASSERT_FALSE(iter->Valid());
delete iter;
db_->ReleaseSnapshot(snapshot);
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace rocksdb } // namespace rocksdb

@ -441,11 +441,9 @@ RangeDelAggregator::RangeDelAggregator(const InternalKeyComparator& icmp,
void RangeDelAggregator::InitRep(const std::vector<SequenceNumber>& snapshots) { void RangeDelAggregator::InitRep(const std::vector<SequenceNumber>& snapshots) {
assert(rep_ == nullptr); assert(rep_ == nullptr);
rep_.reset(new Rep()); rep_.reset(new Rep());
for (auto snapshot : snapshots) { rep_->snapshots_ = snapshots;
rep_->stripe_map_.emplace(snapshot, NewRangeDelMap());
}
// Data newer than any snapshot falls in this catch-all stripe // Data newer than any snapshot falls in this catch-all stripe
rep_->stripe_map_.emplace(kMaxSequenceNumber, NewRangeDelMap()); rep_->snapshots_.emplace_back(kMaxSequenceNumber);
rep_->pinned_iters_mgr_.StartPinning(); rep_->pinned_iters_mgr_.StartPinning();
} }
@ -474,11 +472,11 @@ bool RangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed,
RangeDelPositioningMode mode) { RangeDelPositioningMode mode) {
assert(IsValueType(parsed.type)); assert(IsValueType(parsed.type));
assert(rep_ != nullptr); assert(rep_ != nullptr);
auto& tombstone_map = GetRangeDelMap(parsed.sequence); auto* tombstone_map = GetRangeDelMapIfExists(parsed.sequence);
if (tombstone_map.IsEmpty()) { if (tombstone_map == nullptr || tombstone_map->IsEmpty()) {
return false; return false;
} }
return tombstone_map.ShouldDelete(parsed, mode); return tombstone_map->ShouldDelete(parsed, mode);
} }
bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, bool RangeDelAggregator::IsRangeOverlapped(const Slice& start,
@ -492,7 +490,7 @@ bool RangeDelAggregator::IsRangeOverlapped(const Slice& start,
ParsedInternalKey start_ikey(start, kMaxSequenceNumber, kMaxValue); ParsedInternalKey start_ikey(start, kMaxSequenceNumber, kMaxValue);
ParsedInternalKey end_ikey(end, 0, static_cast<ValueType>(0)); ParsedInternalKey end_ikey(end, 0, static_cast<ValueType>(0));
for (const auto& stripe : rep_->stripe_map_) { for (const auto& stripe : rep_->stripe_map_) {
if (stripe.second->IsRangeOverlapped(start_ikey, end_ikey)) { if (stripe.second.first->IsRangeOverlapped(start_ikey, end_ikey)) {
return true; return true;
} }
} }
@ -587,24 +585,42 @@ void RangeDelAggregator::InvalidateRangeDelMapPositions() {
return; return;
} }
for (auto& stripe : rep_->stripe_map_) { for (auto& stripe : rep_->stripe_map_) {
stripe.second->InvalidatePosition(); stripe.second.first->InvalidatePosition();
} }
} }
RangeDelMap& RangeDelAggregator::GetRangeDelMap(SequenceNumber seq) { RangeDelMap* RangeDelAggregator::GetRangeDelMapIfExists(SequenceNumber seq) {
assert(rep_ != nullptr); assert(rep_ != nullptr);
// The stripe includes seqnum for the snapshot above and excludes seqnum for // The stripe includes seqnum for the snapshot above and excludes seqnum for
// the snapshot below. // the snapshot below.
StripeMap::iterator iter; if (rep_->stripe_map_.empty()) {
if (seq > 0) { return nullptr;
// upper_bound() checks strict inequality so need to subtract one
iter = rep_->stripe_map_.upper_bound(seq - 1);
} else {
iter = rep_->stripe_map_.begin();
} }
StripeMap::iterator iter = rep_->stripe_map_.lower_bound(seq);
if (iter == rep_->stripe_map_.end()) {
return nullptr;
}
size_t snapshot_idx = iter->second.second;
if (snapshot_idx > 0 && seq <= rep_->snapshots_[snapshot_idx - 1]) {
return nullptr;
}
return iter->second.first.get();
}
RangeDelMap& RangeDelAggregator::GetRangeDelMap(SequenceNumber seq) {
assert(rep_ != nullptr);
// The stripe includes seqnum for the snapshot above and excludes seqnum for
// the snapshot below.
std::vector<SequenceNumber>::iterator iter =
std::lower_bound(rep_->snapshots_.begin(), rep_->snapshots_.end(), seq);
// catch-all stripe justifies this assertion in either of above cases // catch-all stripe justifies this assertion in either of above cases
assert(iter != rep_->stripe_map_.end()); assert(iter != rep_->snapshots_.end());
return *iter->second; if (rep_->stripe_map_.find(*iter) == rep_->stripe_map_.end()) {
rep_->stripe_map_.emplace(
*iter,
std::make_pair(NewRangeDelMap(), iter - rep_->snapshots_.begin()));
}
return *rep_->stripe_map_[*iter].first;
} }
bool RangeDelAggregator::IsEmpty() { bool RangeDelAggregator::IsEmpty() {
@ -612,7 +628,7 @@ bool RangeDelAggregator::IsEmpty() {
return true; return true;
} }
for (const auto& stripe : rep_->stripe_map_) { for (const auto& stripe : rep_->stripe_map_) {
if (!stripe.second->IsEmpty()) { if (!stripe.second.first->IsEmpty()) {
return false; return false;
} }
} }
@ -696,7 +712,7 @@ std::unique_ptr<RangeDelIterator> RangeDelAggregator::NewIterator() {
new MergingRangeDelIter(icmp_.user_comparator())); new MergingRangeDelIter(icmp_.user_comparator()));
if (rep_ != nullptr) { if (rep_ != nullptr) {
for (const auto& stripe : rep_->stripe_map_) { for (const auto& stripe : rep_->stripe_map_) {
iter->AddIterator(stripe.second->NewIterator()); iter->AddIterator(stripe.second.first->NewIterator());
} }
} }
return std::move(iter); return std::move(iter);

@ -200,10 +200,15 @@ class RangeDelAggregator {
private: private:
// Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e., // Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e.,
// their seqnums are greater than the next smaller snapshot's seqnum. // their seqnums are greater than the next smaller snapshot's seqnum, and the
typedef std::map<SequenceNumber, std::unique_ptr<RangeDelMap>> StripeMap; // corresponding index into the list of snapshots. Each entry is lazily
// initialized.
typedef std::map<SequenceNumber,
std::pair<std::unique_ptr<RangeDelMap>, size_t>>
StripeMap;
struct Rep { struct Rep {
std::vector<SequenceNumber> snapshots_;
StripeMap stripe_map_; StripeMap stripe_map_;
PinnedIteratorsManager pinned_iters_mgr_; PinnedIteratorsManager pinned_iters_mgr_;
std::list<std::string> pinned_slices_; std::list<std::string> pinned_slices_;
@ -215,6 +220,7 @@ class RangeDelAggregator {
void InitRep(const std::vector<SequenceNumber>& snapshots); void InitRep(const std::vector<SequenceNumber>& snapshots);
std::unique_ptr<RangeDelMap> NewRangeDelMap(); std::unique_ptr<RangeDelMap> NewRangeDelMap();
RangeDelMap* GetRangeDelMapIfExists(SequenceNumber seq);
RangeDelMap& GetRangeDelMap(SequenceNumber seq); RangeDelMap& GetRangeDelMap(SequenceNumber seq);
SequenceNumber upper_bound_; SequenceNumber upper_bound_;

Loading…
Cancel
Save