diff --git a/HISTORY.md b/HISTORY.md index de1d720ad..45cf403ad 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,9 @@ * Fixed a bug in LockWAL() leading to re-locking mutex (#11020). * Fixed a heap use after free bug in async scan prefetching when the scan thread and another thread try to read and load the same seek block into cache. +### New Features +* When an SstPartitionerFactory is configured, CompactRange() now automatically selects for compaction any files overlapping a partition boundary that is in the compaction range, even if no actual entries are in the requested compaction range. With this feature, manual compaction can be used to (re-)establish SST partition points when SstPartitioner changes, without a full compaction. + ## 7.9.0 (11/21/2022) ### Performance Improvements * Fixed an iterator performance regression for delete range users when scanning through a consecutive sequence of range tombstones (#10877). diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index ed9a5a7ae..0227c06e4 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -1026,6 +1026,70 @@ TEST_F(DBCompactionTest, CompactionSstPartitioner) { ASSERT_EQ("B", Get("bbbb1")); } +TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 3; + + DestroyAndReopen(options); + + // create first file and flush to l0 + ASSERT_OK(Put("000015", "A")); + ASSERT_OK(Put("000025", "B")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + // create second file and flush to l0 + ASSERT_OK(Put("000015", "A2")); + ASSERT_OK(Put("000025", "B2")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + // CONTROL 1: compact without partitioner + CompactRangeOptions compact_options; + compact_options.bottommost_level_compaction = + BottommostLevelCompaction::kForceOptimized; + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + // Check (compacted but no partitioning yet) + std::vector files; + dbfull()->GetLiveFilesMetaData(&files); + ASSERT_EQ(1, files.size()); + + // Install partitioner + std::shared_ptr factory( + NewSstPartitionerFixedPrefixFactory(5)); + options.sst_partitioner_factory = factory; + Reopen(options); + + // CONTROL 2: request compaction on range with no partition boundary and no + // overlap with actual entries + Slice from("000017"); + Slice to("000019"); + ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to)); + + // Check (no partitioning yet) + files.clear(); + dbfull()->GetLiveFilesMetaData(&files); + ASSERT_EQ(1, files.size()); + ASSERT_EQ("A2", Get("000015")); + ASSERT_EQ("B2", Get("000025")); + + // TEST: request compaction overlapping with partition boundary but no + // actual entries + // NOTE: `to` is INCLUSIVE + from = Slice("000019"); + to = Slice("000020"); + ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to)); + + // Check (must be partitioned) + files.clear(); + dbfull()->GetLiveFilesMetaData(&files); + ASSERT_EQ(2, files.size()); + ASSERT_EQ("A2", Get("000015")); + ASSERT_EQ("B2", Get("000025")); +} + TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 31e2d07ce..095336f64 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1087,6 +1087,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, { SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); Version* current_version = super_version->current; + + // Might need to query the partitioner + SstPartitionerFactory* partitioner_factory = + current_version->cfd()->ioptions()->sst_partitioner_factory.get(); + std::unique_ptr partitioner; + if (partitioner_factory && begin != nullptr && end != nullptr) { + SstPartitioner::Context context; + context.is_full_compaction = false; + context.is_manual_compaction = true; + context.output_level = /*unknown*/ -1; + // Small lies about compaction range + context.smallest_user_key = *begin; + context.largest_user_key = *end; + partitioner = partitioner_factory->CreatePartitioner(context); + } + ReadOptions ro; ro.total_order_seek = true; bool overlap; @@ -1094,14 +1110,50 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, level < current_version->storage_info()->num_non_empty_levels(); level++) { overlap = true; + + // Whether to look at specific keys within files for overlap with + // compaction range, other than largest and smallest keys of the file + // known in Version metadata. + bool check_overlap_within_file = false; if (begin != nullptr && end != nullptr) { + // Typically checking overlap within files in this case + check_overlap_within_file = true; + // WART: Not known why we don't check within file in one-sided bound + // cases + if (partitioner) { + // Especially if the partitioner is new, the manual compaction + // might be used to enforce the partitioning. Checking overlap + // within files might miss cases where compaction is needed to + // partition the files, as in this example: + // * File has two keys "001" and "111" + // * Compaction range is ["011", "101") + // * Partition boundary at "100" + // In cases like this, file-level overlap with the compaction + // range is sufficient to force any partitioning that is needed + // within the compaction range. + // + // But if there's no partitioning boundary within the compaction + // range, we can be sure there's no need to fix partitioning + // within that range, thus safe to check overlap within file. + // + // Use a hypothetical trivial move query to check for partition + // boundary in range. (NOTE: in defiance of all conventions, + // `begin` and `end` here are both INCLUSIVE bounds, which makes + // this analogy to CanDoTrivialMove() accurate even when `end` is + // the first key in a partition.) + if (!partitioner->CanDoTrivialMove(*begin, *end)) { + check_overlap_within_file = false; + } + } + } + if (check_overlap_within_file) { Status status = current_version->OverlapWithLevelIterator( ro, file_options_, *begin, *end, level, &overlap); if (!status.ok()) { - overlap = current_version->storage_info()->OverlapInLevel( - level, begin, end); + check_overlap_within_file = false; } - } else { + } + if (!check_overlap_within_file) { overlap = current_version->storage_info()->OverlapInLevel(level, begin, end); }