diff --git a/HISTORY.md b/HISTORY.md index f24b33d15..d9e08b413 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -16,6 +16,7 @@ * Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction. * Fixed a potential timer crash when open close DB concurrently. * Fixed a race condition for `alive_log_files_` in non-two-write-queues mode. The race is between the write_thread_ in WriteToWAL() and another thread executing `FindObsoleteFiles()`. The race condition will be caught if `__glibcxx_requires_nonempty` is enabled. +* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed. ### Public API changes * Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect. diff --git a/db/arena_wrapped_db_iter.cc b/db/arena_wrapped_db_iter.cc index 520588afe..20d4655be 100644 --- a/db/arena_wrapped_db_iter.cc +++ b/db/arena_wrapped_db_iter.cc @@ -58,30 +58,55 @@ Status ArenaWrappedDBIter::Refresh() { uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); - if (sv_number_ != cur_sv_number) { - Env* env = db_iter_->env(); - db_iter_->~DBIter(); - arena_.~Arena(); - new (&arena_) Arena(); + while (true) { + if (sv_number_ != cur_sv_number) { + Env* env = db_iter_->env(); + db_iter_->~DBIter(); + arena_.~Arena(); + new (&arena_) Arena(); - SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); - SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); - if (read_callback_) { - read_callback_->Refresh(latest_seq); - } - Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, - sv->current, latest_seq, - sv->mutable_cf_options.max_sequential_skip_in_iterations, - cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, - allow_refresh_); + SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); + SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + if (read_callback_) { + read_callback_->Refresh(latest_seq); + } + Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, + sv->current, latest_seq, + sv->mutable_cf_options.max_sequential_skip_in_iterations, + cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, + allow_refresh_); - InternalIterator* internal_iter = db_impl_->NewInternalIterator( - read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), - latest_seq, /* allow_unprepared_value */ true); - SetIterUnderDBIter(internal_iter); - } else { - db_iter_->set_sequence(db_impl_->GetLatestSequenceNumber()); - db_iter_->set_valid(false); + InternalIterator* internal_iter = db_impl_->NewInternalIterator( + read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), + latest_seq, /* allow_unprepared_value */ true); + SetIterUnderDBIter(internal_iter); + break; + } else { + SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + // Refresh range-tombstones in MemTable + if (!read_options_.ignore_range_deletions) { + SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_); + ReadRangeDelAggregator* range_del_agg = + db_iter_->GetRangeDelAggregator(); + std::unique_ptr range_del_iter; + range_del_iter.reset( + sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq)); + range_del_agg->AddTombstones(std::move(range_del_iter)); + cfd_->ReturnThreadLocalSuperVersion(sv); + } + // Refresh latest sequence number + db_iter_->set_sequence(latest_seq); + db_iter_->set_valid(false); + // Check again if the latest super version number is changed + uint64_t latest_sv_number = cfd_->GetSuperVersionNumber(); + if (latest_sv_number != cur_sv_number) { + // If the super version number is changed after refreshing, + // fallback to Re-Init the InternalIterator + cur_sv_number = latest_sv_number; + continue; + } + break; + } } return Status::OK(); } diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 683b701f0..845114339 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -1724,6 +1724,34 @@ TEST_F(DBRangeDelTest, OverlappedKeys) { ASSERT_EQ(0, NumTableFilesAtLevel(1)); } +TEST_F(DBRangeDelTest, IteratorRefresh) { + // Refreshing an iterator after a range tombstone is added should cause the + // deleted range of keys to disappear. + for (bool sv_changed : {false, true}) { + ASSERT_OK(db_->Put(WriteOptions(), "key1", "value1")); + ASSERT_OK(db_->Put(WriteOptions(), "key2", "value2")); + + auto* iter = db_->NewIterator(ReadOptions()); + ASSERT_OK(iter->status()); + + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + "key2", "key3")); + + if (sv_changed) { + ASSERT_OK(db_->Flush(FlushOptions())); + } + + ASSERT_OK(iter->Refresh()); + ASSERT_OK(iter->status()); + iter->SeekToFirst(); + ASSERT_EQ("key1", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + delete iter; + } +} + #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE