diff --git a/HISTORY.md b/HISTORY.md index 25135c053..171b68fdc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -64,6 +64,9 @@ ### Performance Improvements * When compiled with folly (Meta-internal integration; experimental in open source build), improve the locking performance (CPU efficiency) of LRUCache by using folly DistributedMutex in place of standard mutex. +### Performance Improvements +* Rather than doing total sort against all files in a level, SortFileByOverlappingRatio() to only find the top 50 files based on score. This can improve write throughput for the use cases where data is loaded in increasing key order and there are a lot of files in one LSM-tree, where applying compaction results is the bottleneck. + ## 7.3.0 (05/20/2022) ### Bug Fixes * Fixed a bug where manual flush would block forever even though flush options had wait=false. diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 7da018aad..c65ec1eec 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -1349,6 +1349,61 @@ TEST_F(CompactionPickerTest, CompactionPriRoundRobin) { } } +TEST_F(CompactionPickerTest, CompactionPriMinOverlappingManyFiles) { + NewVersionStorage(6, kCompactionStyleLevel); + ioptions_.compaction_pri = kMinOverlappingRatio; + mutable_cf_options_.max_bytes_for_level_base = 15000000; + mutable_cf_options_.max_bytes_for_level_multiplier = 10; + + // file 7 and 8 over lap with the same file, but file 8 is smaller so + // it will be picked. + Add(2, 13U, "010", "011", + 6100U); // Overlaps with a large file. Not picked + Add(2, 14U, "020", "021", + 6100U); // Overlaps with a large file. Not picked + Add(2, 15U, "030", "031", + 6100U); // Overlaps with a large file. Not picked + Add(2, 16U, "040", "041", + 6100U); // Overlaps with a large file. Not picked + Add(2, 17U, "050", "051", + 6100U); // Overlaps with a large file. Not picked + Add(2, 18U, "060", "061", + 6100U); // Overlaps with a large file. Not picked + Add(2, 19U, "070", "071", + 6100U); // Overlaps with a large file. Not picked + Add(2, 20U, "080", "081", + 6100U); // Overlaps with a large file. Not picked + + Add(2, 6U, "150", "167", 60000000U); // Overlaps with file 26, 27 + Add(2, 7U, "168", "169", 60000000U); // Overlaps with file 27 + Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 28, but the file + // itself is larger. Should be picked. + Add(2, 9U, "610", "611", + 6100U); // Overlaps with a large file. Not picked + Add(2, 10U, "620", "621", + 6100U); // Overlaps with a large file. Not picked + Add(2, 11U, "630", "631", + 6100U); // Overlaps with a large file. Not picked + Add(2, 12U, "640", "641", + 6100U); // Overlaps with a large file. Not picked + + Add(3, 31U, "001", "100", 260000000U); + Add(3, 26U, "160", "165", 260000000U); + Add(3, 27U, "166", "170", 260000000U); + Add(3, 28U, "180", "400", 260000000U); + Add(3, 29U, "401", "500", 260000000U); + Add(3, 30U, "601", "700", 260000000U); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + // Picking file 8 because overlapping ratio is the biggest. + ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); +} + // This test exhibits the bug where we don't properly reset parent_index in // PickCompaction() TEST_F(CompactionPickerTest, ParentIndexResetBug) { diff --git a/db/version_set.cc b/db/version_set.cc index 162dd00ee..66d55d4f7 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3189,11 +3189,15 @@ void SortFileByOverlappingRatio( ttl_boost_score; } - std::sort(temp->begin(), temp->end(), - [&](const Fsize& f1, const Fsize& f2) -> bool { - return file_to_order[f1.file->fd.GetNumber()] < - file_to_order[f2.file->fd.GetNumber()]; - }); + size_t num_to_sort = temp->size() > VersionStorageInfo::kNumberFilesToSort + ? VersionStorageInfo::kNumberFilesToSort + : temp->size(); + + std::partial_sort(temp->begin(), temp->begin() + num_to_sort, temp->end(), + [&](const Fsize& f1, const Fsize& f2) -> bool { + return file_to_order[f1.file->fd.GetNumber()] < + file_to_order[f2.file->fd.GetNumber()]; + }); } void SortFileByRoundRobin(const InternalKeyComparator& icmp,