From 7b1d6c438a71bab04680e066c101eabdac14540a Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 9 Sep 2020 10:23:52 -0700 Subject: [PATCH] Fix the handling of the case when a blob file with a lower number gets added in VersionBuilder (#7349) Summary: When multiple background jobs are generating blob files in parallel, it is actually possible for a blob file to be added with a file number that is lower than the highest one in the base version. (This is a harmless race condition.) The patch fixes the handling of this case and adds a unit test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7349 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D23542453 Pulled By: ltamasi fbshipit-source-id: 4ff6f3654bc58c391d10b9870e1cc40b5e3fa8e4 --- db/version_builder.cc | 9 ++-- db/version_builder_test.cc | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/db/version_builder.cc b/db/version_builder.cc index aecf44623..da500d5e0 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -770,10 +770,11 @@ class VersionBuilder::Rep { ++base_it; } else if (delta_blob_file_number < base_blob_file_number) { - // Note: blob file numbers are strictly increasing over time and - // once blob files get marked obsolete, they never reappear. Thus, - // this case is not possible. - assert(false); + const auto& delta = delta_it->second; + + auto meta = CreateMetaDataForNewBlobFile(delta); + + AddBlobFileIfNeeded(vstorage, meta, &found_first_non_empty); ++delta_it; } else { diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index 86d811580..5759151d7 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -1028,6 +1028,102 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { UnrefFilesInVersion(&new_vstorage); } +TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { + // When multiple background jobs (flushes/compactions) are executing in + // parallel, it is possible for the VersionEdit adding blob file K to be + // applied *after* the VersionEdit adding blob file N (for N > K). This test + // case makes sure this is handled correctly. + + // Add blob file #4 (referenced by table file #3) to base version. + constexpr uint64_t base_table_file_number = 3; + constexpr uint64_t base_blob_file_number = 4; + constexpr uint64_t base_total_blob_count = 100; + constexpr uint64_t base_total_blob_bytes = 1 << 20; + + constexpr char checksum_method[] = "SHA1"; + constexpr char checksum_value[] = "\xfa\xce\xb0\x0c"; + constexpr uint64_t garbage_blob_count = 0; + constexpr uint64_t garbage_blob_bytes = 0; + + AddDummyFile(base_table_file_number, base_blob_file_number); + AddBlob(base_blob_file_number, base_total_blob_count, base_total_blob_bytes, + checksum_method, checksum_value, + BlobFileMetaData::LinkedSsts{base_table_file_number}, + garbage_blob_count, garbage_blob_bytes); + + EnvOptions env_options; + constexpr TableCache* table_cache = nullptr; + constexpr VersionSet* version_set = nullptr; + + VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, + version_set); + + VersionEdit edit; + + // Add blob file #2 (referenced by table file #1). + constexpr int level = 0; + constexpr uint64_t table_file_number = 1; + constexpr uint32_t path_id = 0; + constexpr uint64_t file_size = 1 << 12; + constexpr char smallest[] = "key1"; + constexpr char largest[] = "key987"; + constexpr SequenceNumber smallest_seqno = 0; + constexpr SequenceNumber largest_seqno = 0; + constexpr bool marked_for_compaction = false; + + constexpr uint64_t blob_file_number = 2; + static_assert(blob_file_number < base_blob_file_number, + "Added blob file should have a smaller file number"); + + constexpr uint64_t total_blob_count = 234; + constexpr uint64_t total_blob_bytes = 1 << 22; + + edit.AddFile(level, table_file_number, path_id, file_size, + GetInternalKey(smallest), GetInternalKey(largest), + smallest_seqno, largest_seqno, marked_for_compaction, + blob_file_number, kUnknownOldestAncesterTime, + kUnknownFileCreationTime, checksum_value, checksum_method); + edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, + checksum_method, checksum_value); + + ASSERT_OK(builder.Apply(&edit)); + + constexpr bool force_consistency_checks = true; + VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, + kCompactionStyleLevel, &vstorage_, + force_consistency_checks); + + ASSERT_OK(builder.SaveTo(&new_vstorage)); + + const auto& new_blob_files = new_vstorage.GetBlobFiles(); + ASSERT_EQ(new_blob_files.size(), 2); + + const auto base_meta = + GetBlobFileMetaData(new_blob_files, base_blob_file_number); + + ASSERT_NE(base_meta, nullptr); + ASSERT_EQ(base_meta->GetBlobFileNumber(), base_blob_file_number); + ASSERT_EQ(base_meta->GetTotalBlobCount(), base_total_blob_count); + ASSERT_EQ(base_meta->GetTotalBlobBytes(), base_total_blob_bytes); + ASSERT_EQ(base_meta->GetGarbageBlobCount(), garbage_blob_count); + ASSERT_EQ(base_meta->GetGarbageBlobBytes(), garbage_blob_bytes); + ASSERT_EQ(base_meta->GetChecksumMethod(), checksum_method); + ASSERT_EQ(base_meta->GetChecksumValue(), checksum_value); + + const auto added_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); + + ASSERT_NE(added_meta, nullptr); + ASSERT_EQ(added_meta->GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(added_meta->GetTotalBlobCount(), total_blob_count); + ASSERT_EQ(added_meta->GetTotalBlobBytes(), total_blob_bytes); + ASSERT_EQ(added_meta->GetGarbageBlobCount(), garbage_blob_count); + ASSERT_EQ(added_meta->GetGarbageBlobBytes(), garbage_blob_bytes); + ASSERT_EQ(added_meta->GetChecksumMethod(), checksum_method); + ASSERT_EQ(added_meta->GetChecksumValue(), checksum_value); + + UnrefFilesInVersion(&new_vstorage); +} + TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { // Initialize base version. The first table file points to a valid blob file // in this version; the second one does not refer to any blob files.