From 1663f77d2abde0f50197498ae0af3af0a853835d Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Thu, 20 Oct 2022 17:11:38 -0700 Subject: [PATCH] Fix no internal time recorded for small preclude_last_level (#10829) Summary: When the `preclude_last_level_data_seconds` or `preserve_internal_time_seconds` is smaller than 100 (seconds), no seqno->time information was recorded. Also make sure all data will be compacted to the last level even if there's no write to record the time information. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10829 Test Plan: added unittest Reviewed By: siying Differential Revision: D40443934 Pulled By: jay-zhuang fbshipit-source-id: 2ecf1361daf9f3e5c3385aee6dc924fa59e2813a --- db/compaction/compaction_iterator.cc | 2 +- db/compaction/tiered_compaction_test.cc | 54 +++++++++++++++++++++++++ db/db_impl/db_impl.cc | 5 ++- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index 7f2bed465..b7238d857 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -1104,7 +1104,7 @@ void CompactionIterator::DecideOutputLevel() { // if the key is newer than the cutoff sequence or within the earliest // snapshot, it should output to the penultimate level. - if (ikey_.sequence >= preclude_last_level_min_seqno_ || + if (ikey_.sequence > preclude_last_level_min_seqno_ || ikey_.sequence > earliest_snapshot_) { output_to_penultimate_level_ = true; } diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 864d281cd..867ad0676 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1434,6 +1434,60 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimePartial) { Close(); } +TEST_F(PrecludeLastLevelTest, SmallPrecludeTime) { + const int kNumTrigger = 4; + const int kNumLevels = 7; + const int kNumKeys = 100; + + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleUniversal; + options.preclude_last_level_data_seconds = 60; + options.preserve_internal_time_seconds = 0; + options.env = mock_env_.get(); + options.level0_file_num_compaction_trigger = kNumTrigger; + options.num_levels = kNumLevels; + options.last_level_temperature = Temperature::kCold; + DestroyAndReopen(options); + + Random rnd(301); + + dbfull()->TEST_WaitForPeridicTaskRun([&] { + mock_clock_->MockSleepForSeconds(static_cast(rnd.Uniform(10) + 1)); + }); + + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(Put(Key(i), rnd.RandomString(100))); + dbfull()->TEST_WaitForPeridicTaskRun([&] { + mock_clock_->MockSleepForSeconds(static_cast(rnd.Uniform(2))); + }); + } + ASSERT_OK(Flush()); + + TablePropertiesCollection tables_props; + ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); + ASSERT_EQ(tables_props.size(), 1); + ASSERT_FALSE(tables_props.begin()->second->seqno_to_time_mapping.empty()); + SeqnoToTimeMapping tp_mapping; + ASSERT_OK( + tp_mapping.Add(tables_props.begin()->second->seqno_to_time_mapping)); + ASSERT_OK(tp_mapping.Sort()); + ASSERT_FALSE(tp_mapping.Empty()); + auto seqs = tp_mapping.TEST_GetInternalMapping(); + ASSERT_FALSE(seqs.empty()); + + // Wait more than preclude_last_level time, then make sure all the data is + // compacted to the last level even there's no write (no seqno -> time + // information was flushed to any SST). + mock_clock_->MockSleepForSeconds(100); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel()); + ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0); + ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0); + + Close(); +} + #endif // !defined(ROCKSDB_LITE) } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f34cdfc15..d0b6f085b 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -865,8 +865,11 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() { uint64_t seqno_time_cadence = 0; if (min_time_duration != std::numeric_limits::max()) { + // round up to 1 when the time_duration is smaller than + // kMaxSeqnoTimePairsPerCF seqno_time_cadence = - min_time_duration / SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF; + (min_time_duration + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF - 1) / + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF; } Status s;