Snapshot release triggered compaction without multiple tombstones (#8357)

Summary:
This is a duplicate of https://github.com/facebook/rocksdb/issues/4948 by mzhaom to fix tests after rebase.

This change is a follow-up to https://github.com/facebook/rocksdb/issues/4927, which made this possible by allowing tombstone dropping/seqnum zeroing optimizations on the last key in the compaction. Now the `largest_seqno != 0` condition suffices to prevent snapshot release triggered compaction from entering an infinite loop.

The issues caused by the extraneous condition `level_and_file.second->num_deletions > 1` are:

- files could have `largest_seqno > 0` forever making it impossible to tell they cannot contain any covering keys
- it doesn't trigger compaction when there are many overwritten keys. Some MyRocks use case actually doesn't use Delete but instead calls Put with empty value to "delete" keys, so we'd like to be able to trigger compaction in this case too.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8357

Test Plan: - make check

Reviewed By: jay-zhuang

Differential Revision: D28855340

Pulled By: ajkr

fbshipit-source-id: a261b51eecafec492499e6d01e8e43112f801798
main
Andrew Kryczka 3 years ago committed by Facebook GitHub Bot
parent 799cf37cb1
commit 9167ece586
  1. 1
      HISTORY.md
  2. 3
      db/version_set.cc
  3. 12
      utilities/blob_db/blob_db_test.cc
  4. 18
      utilities/transactions/write_prepared_transaction_test.cc

@ -2,6 +2,7 @@
## Unreleased
### Behavior Changes
* Added API comments clarifying safe usage of Disable/EnableManualCompaction and EventListener callbacks for compaction.
* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present.
## 6.21.0 (2021-05-21)
### Bug Fixes

@ -3046,8 +3046,7 @@ void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction() {
bottommost_files_mark_threshold_ = kMaxSequenceNumber;
for (auto& level_and_file : bottommost_files_) {
if (!level_and_file.second->being_compacted &&
level_and_file.second->fd.largest_seqno != 0 &&
level_and_file.second->num_deletions > 1) {
level_and_file.second->fd.largest_seqno != 0) {
// largest_seqno might be nonzero due to containing the final key in an
// earlier compaction, whose seqnum we didn't zero out. Multiple deletions
// ensures the file really contains deleted or overwritten keys.

@ -571,7 +571,6 @@ TEST_F(BlobDBTest, EnableDisableCompressionGC) {
Random rnd(301);
BlobDBOptions bdb_options;
bdb_options.min_blob_size = 0;
bdb_options.enable_garbage_collection = true;
bdb_options.garbage_collection_cutoff = 1.0;
bdb_options.disable_background_tasks = true;
bdb_options.compression = kSnappyCompression;
@ -600,6 +599,11 @@ TEST_F(BlobDBTest, EnableDisableCompressionGC) {
ASSERT_EQ(2, blob_files.size());
ASSERT_EQ(kNoCompression, blob_files[1]->GetCompressionType());
// Enable GC. If we do it earlier the snapshot release triggered compaction
// may compact files and trigger GC before we can verify there are two files.
bdb_options.enable_garbage_collection = true;
Reopen(bdb_options);
// Trigger compaction
ASSERT_OK(blob_db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
blob_db_impl()->TEST_DeleteObsoleteFiles();
@ -638,7 +642,6 @@ TEST_F(BlobDBTest, ChangeCompressionGC) {
Random rnd(301);
BlobDBOptions bdb_options;
bdb_options.min_blob_size = 0;
bdb_options.enable_garbage_collection = true;
bdb_options.garbage_collection_cutoff = 1.0;
bdb_options.disable_background_tasks = true;
bdb_options.compression = kLZ4Compression;
@ -668,6 +671,11 @@ TEST_F(BlobDBTest, ChangeCompressionGC) {
ASSERT_EQ(2, blob_files.size());
ASSERT_EQ(kSnappyCompression, blob_files[1]->GetCompressionType());
// Enable GC. If we do it earlier the snapshot release triggered compaction
// may compact files and trigger GC before we can verify there are two files.
bdb_options.enable_garbage_collection = true;
Reopen(bdb_options);
ASSERT_OK(blob_db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
VerifyDB(data);

@ -2587,6 +2587,7 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction) {
const size_t snapshot_cache_bits = 7; // same as default
const size_t commit_cache_bits = 0; // minimum commit cache
UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
options.disable_auto_compactions = true;
ASSERT_OK(ReOpen());
ASSERT_OK(db->Put(WriteOptions(), "key1", "value1_1"));
@ -2606,7 +2607,13 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction) {
VerifyKeys({{"key1", "value1_1"}}, snapshot2);
// Add a flush to avoid compaction to fallback to trivial move.
// The callback might be called twice, record the calling state to
// prevent double calling.
bool callback_finished = false;
auto callback = [&](void*) {
if (callback_finished) {
return;
}
// Release snapshot1 after CompactionIterator init.
// CompactionIterator need to figure out the earliest snapshot
// that can see key1:value1_2 is kMaxSequenceNumber, not
@ -2615,6 +2622,7 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction) {
// Add some keys to advance max_evicted_seq.
ASSERT_OK(db->Put(WriteOptions(), "key3", "value3"));
ASSERT_OK(db->Put(WriteOptions(), "key4", "value4"));
callback_finished = true;
};
SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
callback);
@ -2636,6 +2644,7 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction2) {
const size_t snapshot_cache_bits = 7; // same as default
const size_t commit_cache_bits = 0; // minimum commit cache
UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
options.disable_auto_compactions = true;
ASSERT_OK(ReOpen());
ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));
@ -2686,6 +2695,7 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction3) {
const size_t snapshot_cache_bits = 7; // same as default
const size_t commit_cache_bits = 1; // commit cache size = 2
UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
options.disable_auto_compactions = true;
ASSERT_OK(ReOpen());
// Add a dummy key to evict v2 commit cache, but keep v1 commit cache.
@ -2715,11 +2725,18 @@ TEST_P(WritePreparedTransactionTest, ReleaseSnapshotDuringCompaction3) {
add_dummy();
auto* s2 = db->GetSnapshot();
// The callback might be called twice, record the calling state to
// prevent double calling.
bool callback_finished = false;
auto callback = [&](void*) {
if (callback_finished) {
return;
}
db->ReleaseSnapshot(s1);
// Add some dummy entries to trigger s1 being cleanup from old_commit_map.
add_dummy();
add_dummy();
callback_finished = true;
};
SyncPoint::GetInstance()->SetCallBack("CompactionIterator:AfterInit",
callback);
@ -2737,6 +2754,7 @@ TEST_P(WritePreparedTransactionTest, ReleaseEarliestSnapshotDuringCompaction) {
const size_t snapshot_cache_bits = 7; // same as default
const size_t commit_cache_bits = 0; // minimum commit cache
UpdateTransactionDBOptions(snapshot_cache_bits, commit_cache_bits);
options.disable_auto_compactions = true;
ASSERT_OK(ReOpen());
ASSERT_OK(db->Put(WriteOptions(), "key1", "value1"));

Loading…
Cancel
Save