diff --git a/db/version_set.cc b/db/version_set.cc index a28b186e1..09429ae0b 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2982,22 +2982,31 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC( uint64_t sum_total_blob_bytes = oldest_meta->GetTotalBlobBytes(); uint64_t sum_garbage_blob_bytes = oldest_meta->GetGarbageBlobBytes(); - while (count < blob_files_.size()) { + assert(cutoff_count <= blob_files_.size()); + + for (; count < cutoff_count; ++count) { const auto& meta = blob_files_[count]; assert(meta); if (!meta->GetLinkedSsts().empty()) { + // Found the beginning of the next batch of blob files break; } - if (++count > cutoff_count) { - return; - } - sum_total_blob_bytes += meta->GetTotalBlobBytes(); sum_garbage_blob_bytes += meta->GetGarbageBlobBytes(); } + if (count < blob_files_.size()) { + const auto& meta = blob_files_[count]; + assert(meta); + + if (meta->GetLinkedSsts().empty()) { + // Some files in the oldest batch are not eligible for GC + return; + } + } + if (sum_garbage_blob_bytes < blob_garbage_collection_force_threshold * sum_total_blob_bytes) { return; diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 341dd4d64..422afaf49 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -540,7 +540,131 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCEmpty) { ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } -TEST_F(VersionStorageInfoTest, ForcedBlobGC) { +TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { + // Test the edge case when all blob files are part of the oldest batch. + // We have one L0 SST file #1, and four blob files #10, #11, #12, and #13. + // The oldest blob file used by SST #1 is blob file #10. + + constexpr int level = 0; + + constexpr uint64_t sst = 1; + + constexpr uint64_t first_blob = 10; + constexpr uint64_t second_blob = 11; + constexpr uint64_t third_blob = 12; + constexpr uint64_t fourth_blob = 13; + + { + constexpr char smallest[] = "bar1"; + constexpr char largest[] = "foo1"; + constexpr uint64_t file_size = 1000; + + Add(level, sst, smallest, largest, file_size, first_blob); + } + + { + constexpr uint64_t total_blob_count = 10; + constexpr uint64_t total_blob_bytes = 100000; + constexpr uint64_t garbage_blob_count = 2; + constexpr uint64_t garbage_blob_bytes = 15000; + + AddBlob(first_blob, total_blob_count, total_blob_bytes, + BlobFileMetaData::LinkedSsts{sst}, garbage_blob_count, + garbage_blob_bytes); + } + + { + constexpr uint64_t total_blob_count = 4; + constexpr uint64_t total_blob_bytes = 400000; + constexpr uint64_t garbage_blob_count = 3; + constexpr uint64_t garbage_blob_bytes = 235000; + + AddBlob(second_blob, total_blob_count, total_blob_bytes, + BlobFileMetaData::LinkedSsts{}, garbage_blob_count, + garbage_blob_bytes); + } + + { + constexpr uint64_t total_blob_count = 20; + constexpr uint64_t total_blob_bytes = 1000000; + constexpr uint64_t garbage_blob_count = 8; + constexpr uint64_t garbage_blob_bytes = 400000; + + AddBlob(third_blob, total_blob_count, total_blob_bytes, + BlobFileMetaData::LinkedSsts{}, garbage_blob_count, + garbage_blob_bytes); + } + + { + constexpr uint64_t total_blob_count = 128; + constexpr uint64_t total_blob_bytes = 1000000; + constexpr uint64_t garbage_blob_count = 67; + constexpr uint64_t garbage_blob_bytes = 600000; + + AddBlob(fourth_blob, total_blob_count, total_blob_bytes, + BlobFileMetaData::LinkedSsts{}, garbage_blob_count, + garbage_blob_bytes); + } + + UpdateVersionStorageInfo(); + + assert(vstorage_.num_levels() > 0); + const auto& level_files = vstorage_.LevelFiles(level); + + assert(level_files.size() == 1); + assert(level_files[0] && level_files[0]->fd.GetNumber() == sst); + + // No blob files eligible for GC due to the age cutoff + + { + constexpr double age_cutoff = 0.1; + constexpr double force_threshold = 0.0; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); + } + + // Part of the oldest batch of blob files (specifically, #12 and #13) is + // ineligible for GC due to the age cutoff + + { + constexpr double age_cutoff = 0.5; + constexpr double force_threshold = 0.0; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); + } + + // Oldest batch is eligible based on age cutoff but its overall garbage ratio + // is below threshold + + { + constexpr double age_cutoff = 1.0; + constexpr double force_threshold = 0.6; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); + } + + // Oldest batch is eligible based on age cutoff and its overall garbage ratio + // meets threshold + + { + constexpr double age_cutoff = 1.0; + constexpr double force_threshold = 0.5; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); + ASSERT_EQ(ssts_to_be_compacted.size(), 1); + + const autovector> + expected_ssts_to_be_compacted{{level, level_files[0]}}; + + ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); + } +} + +TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { // Add three L0 SSTs (1, 2, and 3) and four blob files (10, 11, 12, and 13). // The first two SSTs have the same oldest blob file, namely, the very oldest // one (10), while the third SST's oldest blob file reference points to the @@ -695,6 +819,46 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGC) { ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]); } + + // Now try the last two cases again with a greater than necessary age cutoff + + // Oldest batch is eligible based on age cutoff but its overall garbage ratio + // is below threshold + + { + constexpr double age_cutoff = 0.75; + constexpr double force_threshold = 0.6; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); + } + + // Oldest batch is eligible based on age cutoff and its overall garbage ratio + // meets threshold + + { + constexpr double age_cutoff = 0.75; + constexpr double force_threshold = 0.5; + vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + + auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); + ASSERT_EQ(ssts_to_be_compacted.size(), 2); + + std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(), + [](const std::pair& lhs, + const std::pair& rhs) { + assert(lhs.second); + assert(rhs.second); + return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber(); + }); + + const autovector> + expected_ssts_to_be_compacted{{level, level_files[0]}, + {level, level_files[1]}}; + + ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); + ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]); + } } class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase {