From b6d8e3674119c2d15f0d1143720871603683c502 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Wed, 4 Nov 2020 10:43:17 -0800 Subject: [PATCH] Compute NeedCompact() after table builder Finish() (#7627) Summary: In `BuildTable()`, we call `builder->Finish()` before evaluating `builder->NeedCompact()`. However, we call `builder->NeedCompact()` before `builder->Finish()` in compaction job. This can be wrong because the table properties collectors may rely on the success of `Finish()` to provide correct result for `NeedCompact()`. Test plan (on devserver): make check Pull Request resolved: https://github.com/facebook/rocksdb/pull/7627 Reviewed By: ajkr Differential Revision: D24728741 Pulled By: riversand963 fbshipit-source-id: 5a0dce244e14eb1106c4f87021e6bebca82b486e --- HISTORY.md | 1 + db/compaction/compaction_job.cc | 2 +- db/db_table_properties_test.cc | 48 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index b4fc6df1b..7dae6eac4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,6 +11,7 @@ * Reverted a behavior change silently introduced in 6.14.2, in which the effects of the `ignore_unknown_options` flag (used in option parsing/loading functions) changed. * Reverted a behavior change silently introduced in 6.14, in which options parsing/loading functions began returning `NotFound` instead of `InvalidArgument` for option names not available in the present version. * Fixed MultiGet bugs it doesn't return valid data with user defined timestamp. +* Fixed a potential bug caused by evaluating `TableBuilder::NeedCompact()` before `TableBuilder::Finish()` in compaction job. For example, the `NeedCompact()` method of `CompactOnDeletionCollector` returned by built-in `CompactOnDeletionCollectorFactory` requires `BlockBasedTable::Finish()` to return the correct result. The bug can cause a compaction-generated file not to be marked for future compaction based on deletion ratio. ### Public API Change * Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options. diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 6485c3de2..edf0cbdc6 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1439,7 +1439,6 @@ Status CompactionJob::FinishCompactionOutputFile( ExtractInternalKeyFooter(meta->smallest.Encode()) != PackSequenceAndType(0, kTypeRangeDeletion)); } - meta->marked_for_compaction = sub_compact->builder->NeedCompact(); } const uint64_t current_entries = sub_compact->builder->NumEntries(); if (s.ok()) { @@ -1454,6 +1453,7 @@ Status CompactionJob::FinishCompactionOutputFile( const uint64_t current_bytes = sub_compact->builder->FileSize(); if (s.ok()) { meta->fd.file_size = current_bytes; + meta->marked_for_compaction = sub_compact->builder->NeedCompact(); } sub_compact->current_output()->finished = true; sub_compact->total_bytes += current_bytes; diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc index 2a0a3c11b..89f36fda5 100644 --- a/db/db_table_properties_test.cc +++ b/db/db_table_properties_test.cc @@ -421,6 +421,54 @@ TEST_P(DBTablePropertiesTest, DeletionTriggeredCompactionMarking) { ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_READ_BYTES_MARKED)); } +TEST_P(DBTablePropertiesTest, RatioBasedDeletionTriggeredCompactionMarking) { + constexpr int kNumKeys = 1000; + constexpr int kWindowSize = 0; + constexpr int kNumDelsTrigger = 0; + constexpr double kDeletionRatio = 0.1; + std::shared_ptr compact_on_del = + NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger, + kDeletionRatio); + + Options opts = CurrentOptions(); + opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + opts.table_properties_collector_factories.emplace_back(compact_on_del); + + Reopen(opts); + + // Add an L2 file to prevent tombstones from dropping due to obsolescence + // during flush + Put(Key(0), "val"); + Flush(); + MoveFilesToLevel(2); + + auto* listener = new DeletionTriggeredCompactionTestListener(); + opts.listeners.emplace_back(listener); + Reopen(opts); + + // Generate one L0 with kNumKeys Put. + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(Put(Key(i), "not important")); + } + ASSERT_OK(Flush()); + + // Generate another L0 with kNumKeys Delete. + // This file, due to deletion ratio, will trigger compaction: 2@0 files to L1. + // The resulting L1 file has only one tombstone for user key 'Key(0)'. + // Again, due to deletion ratio, a compaction will be triggered: 1@1 + 1@2 + // files to L2. However, the resulting file is empty because the tombstone + // and value are both dropped. + for (int i = 0; i < kNumKeys; ++i) { + ASSERT_OK(Delete(Key(i))); + } + ASSERT_OK(Flush()); + + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + for (int i = 0; i < 3; ++i) { + ASSERT_EQ(0, NumTableFilesAtLevel(i)); + } +} + INSTANTIATE_TEST_CASE_P( DBTablePropertiesTest, DBTablePropertiesTest,