diff --git a/db/compaction_iteration_stats.h b/db/compaction_iteration_stats.h index 52a666e4e..ddb534622 100644 --- a/db/compaction_iteration_stats.h +++ b/db/compaction_iteration_stats.h @@ -16,6 +16,8 @@ struct CompactionIterationStats { int64_t num_record_drop_obsolete = 0; int64_t num_record_drop_range_del = 0; int64_t num_range_del_drop_obsolete = 0; + // Deletions obsoleted before bottom level due to file gap optimization. + int64_t num_optimized_del_drop_obsolete = 0; uint64_t total_filter_time = 0; // Input statistics diff --git a/db/compaction_iterator.cc b/db/compaction_iterator.cc index 08ae19734..547260292 100644 --- a/db/compaction_iterator.cc +++ b/db/compaction_iterator.cc @@ -111,6 +111,7 @@ void CompactionIterator::ResetRecordCounts() { iter_stats_.num_record_drop_obsolete = 0; iter_stats_.num_record_drop_range_del = 0; iter_stats_.num_range_del_drop_obsolete = 0; + iter_stats_.num_optimized_del_drop_obsolete = 0; } void CompactionIterator::SeekToFirst() { @@ -426,6 +427,9 @@ void CompactionIterator::NextFromInput() { // Can compact out this SingleDelete. ++iter_stats_.num_record_drop_obsolete; ++iter_stats_.num_single_del_fallthru; + if (!bottommost_level_) { + ++iter_stats_.num_optimized_del_drop_obsolete; + } } else { // Output SingleDelete valid_ = true; @@ -467,6 +471,9 @@ void CompactionIterator::NextFromInput() { // Note: Dropping this Delete will not affect TransactionDB // write-conflict checking since it is earlier than any snapshot. ++iter_stats_.num_record_drop_obsolete; + if (!bottommost_level_) { + ++iter_stats_.num_optimized_del_drop_obsolete; + } input_->Next(); } else if (ikey_.type == kTypeMerge) { if (!merge_helper_->HasOperator()) { diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 75f5ab6c8..1d023ca45 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -1014,6 +1014,10 @@ void CompactionJob::RecordDroppedKeys( RecordTick(stats_, COMPACTION_RANGE_DEL_DROP_OBSOLETE, c_iter_stats.num_range_del_drop_obsolete); } + if (c_iter_stats.num_optimized_del_drop_obsolete > 0) { + RecordTick(stats_, COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, + c_iter_stats.num_optimized_del_drop_obsolete); + } } Status CompactionJob::FinishCompactionOutputFile( diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index bca188a98..ca77d5b93 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2684,6 +2684,46 @@ TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) { ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound()); } +TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) { + // Deletions can be dropped when compacted to non-last level if they fall + // outside the lower-level files' key-ranges. + const int kNumL0Files = 4; + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + + // put key 1 and 3 in separate L1, L2 files. + // So key 0, 2, and 4+ fall outside these levels' key-ranges. + for (int level = 2; level >= 1; --level) { + for (int i = 0; i < 2; ++i) { + Put(Key(2 * i + 1), "val"); + Flush(); + } + MoveFilesToLevel(level); + ASSERT_EQ(2, NumTableFilesAtLevel(level)); + } + + // Delete keys in range [1, 4]. These L0 files will be compacted with L1: + // - Tombstones for keys 2 and 4 can be dropped early. + // - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges. + for (int i = 0; i < kNumL0Files; ++i) { + Put(Key(0), "val"); // sentinel to prevent trivial move + Delete(Key(i + 1)); + Flush(); + } + dbfull()->TEST_WaitForCompact(); + + for (int i = 0; i < kNumL0Files; ++i) { + std::string value; + ASSERT_TRUE(db_->Get(ReadOptions(), Key(i + 1), &value).IsNotFound()); + } + ASSERT_EQ(2, options.statistics->getTickerCount( + COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE)); + ASSERT_EQ(2, + options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE)); +} + INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, ::testing::Values(std::make_tuple(1, true), std::make_tuple(1, false), diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index b4629358e..731ff7809 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -105,8 +105,9 @@ enum Tickers : uint32_t { COMPACTION_KEY_DROP_OBSOLETE, // The key is obsolete. COMPACTION_KEY_DROP_RANGE_DEL, // key was covered by a range tombstone. COMPACTION_KEY_DROP_USER, // user compaction function has dropped the key. - COMPACTION_RANGE_DEL_DROP_OBSOLETE, // all keys in range were deleted. + // Deletions obsoleted before bottom level due to file gap optimization. + COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, // Number of keys written to the database via the Put and Write call's NUMBER_KEYS_WRITTEN, @@ -264,7 +265,9 @@ const std::vector> TickersNameMap = { {COMPACTION_KEY_DROP_RANGE_DEL, "rocksdb.compaction.key.drop.range_del"}, {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"}, {COMPACTION_RANGE_DEL_DROP_OBSOLETE, - "rocksdb.compaction.range_del.drop.obsolete"}, + "rocksdb.compaction.range_del.drop.obsolete"}, + {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, + "rocksdb.compaction.optimized.del.drop.obsolete"}, {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"}, {NUMBER_KEYS_READ, "rocksdb.number.keys.read"}, {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},