From 4d60cbc629ce119a57e2b1257f2f5a83604ad7e5 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Sat, 10 Dec 2022 15:07:42 -0800 Subject: [PATCH] Use VersionBuilder for CF import ordering/validation (#11028) Summary: Besides the existing ordering and validation, more is coming to VersionBuilder/VersionStorageInfo, like migration of epoch_numbers from older RocksDB versions. We should start using those common classes for importing CFs, instead of duplicating their ordering, validation, and migration logic. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11028 Test Plan: rely on existing tests Reviewed By: hx235 Differential Revision: D41865427 Pulled By: ajkr fbshipit-source-id: 873f5cd87b8902a2380c3b71373ce0b0db3a0c50 --- db/import_column_family_job.cc | 99 +++++++++++++++------------------ db/import_column_family_test.cc | 7 +-- 2 files changed, 49 insertions(+), 57 deletions(-) diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 34985666a..ae342cc0b 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -45,40 +45,6 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, auto num_files = files_to_import_.size(); if (num_files == 0) { return Status::InvalidArgument("The list of files is empty"); - } else if (num_files > 1) { - // Verify that passed files don't have overlapping ranges in any particular - // level. - int min_level = 1; // Check for overlaps in Level 1 and above. - int max_level = -1; - for (const auto& file_metadata : metadata_) { - if (file_metadata.level > max_level) { - max_level = file_metadata.level; - } - } - for (int level = min_level; level <= max_level; ++level) { - autovector sorted_files; - for (size_t i = 0; i < num_files; i++) { - if (metadata_[i].level == level) { - sorted_files.push_back(&files_to_import_[i]); - } - } - - std::sort( - sorted_files.begin(), sorted_files.end(), - [this](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { - return cfd_->internal_comparator().Compare( - info1->smallest_internal_key, - info2->smallest_internal_key) < 0; - }); - - for (size_t i = 0; i + 1 < sorted_files.size(); i++) { - if (cfd_->internal_comparator().Compare( - sorted_files[i]->largest_internal_key, - sorted_files[i + 1]->smallest_internal_key) >= 0) { - return Status::InvalidArgument("Files have overlapping ranges"); - } - } - } } for (const auto& f : files_to_import_) { @@ -143,9 +109,6 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, // REQUIRES: we have become the only writer by entering both write_thread_ and // nonmem_write_thread_ Status ImportColumnFamilyJob::Run() { - Status status; - edit_.SetColumnFamily(cfd_->GetID()); - // We use the import time as the ancester time. This is the time the data // is written to the database. int64_t temp_current_time = 0; @@ -156,27 +119,57 @@ Status ImportColumnFamilyJob::Run() { static_cast(temp_current_time); } - for (size_t i = 0; i < files_to_import_.size(); ++i) { + VersionBuilder version_builder( + cfd_->current()->version_set()->file_options(), cfd_->ioptions(), + cfd_->table_cache(), cfd_->current()->storage_info(), + cfd_->current()->version_set(), + cfd_->GetFileMetadataCacheReservationManager()); + VersionStorageInfo vstorage( + &cfd_->internal_comparator(), cfd_->user_comparator(), + cfd_->NumberLevels(), cfd_->ioptions()->compaction_style, + nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks); + Status s; + for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) { const auto& f = files_to_import_[i]; const auto& file_metadata = metadata_[i]; - edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), - f.fd.GetFileSize(), f.smallest_internal_key, - f.largest_internal_key, file_metadata.smallest_seqno, - file_metadata.largest_seqno, false, file_metadata.temperature, - kInvalidBlobFileNumber, oldest_ancester_time, current_time, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - f.unique_id); - - // If incoming sequence number is higher, update local sequence number. - if (file_metadata.largest_seqno > versions_->LastSequence()) { - versions_->SetLastAllocatedSequence(file_metadata.largest_seqno); - versions_->SetLastPublishedSequence(file_metadata.largest_seqno); - versions_->SetLastSequence(file_metadata.largest_seqno); + VersionEdit version_edit; + version_edit.AddFile( + file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), + f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key, + file_metadata.smallest_seqno, file_metadata.largest_seqno, false, + file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time, + current_time, kUnknownFileChecksum, kUnknownFileChecksumFuncName, + f.unique_id); + s = version_builder.Apply(&version_edit); + } + if (s.ok()) { + s = version_builder.SaveTo(&vstorage); + } + if (s.ok()) { + edit_.SetColumnFamily(cfd_->GetID()); + + for (int level = 0; level < vstorage.num_levels(); level++) { + for (FileMetaData* file_meta : vstorage.LevelFiles(level)) { + edit_.AddFile(level, *file_meta); + // If incoming sequence number is higher, update local sequence number. + if (file_meta->fd.largest_seqno > versions_->LastSequence()) { + versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno); + versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno); + versions_->SetLastSequence(file_meta->fd.largest_seqno); + } + } } } - - return status; + for (int level = 0; level < vstorage.num_levels(); level++) { + for (FileMetaData* file_meta : vstorage.LevelFiles(level)) { + file_meta->refs--; + if (file_meta->refs <= 0) { + delete file_meta; + } + } + } + return s; } void ImportColumnFamilyJob::Cleanup(const Status& status) { diff --git a/db/import_column_family_test.cc b/db/import_column_family_test.cc index 2847ea8da..0c07ee2a8 100644 --- a/db/import_column_family_test.cc +++ b/db/import_column_family_test.cc @@ -556,10 +556,9 @@ TEST_F(ImportColumnFamilyTest, ImportColumnFamilyNegativeTest) { LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 10, 19)); metadata.db_comparator_name = options.comparator->Name(); - ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - metadata, &import_cfh_), - Status::InvalidArgument("Files have overlapping ranges")); + ASSERT_NOK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", + ImportColumnFamilyOptions(), + metadata, &import_cfh_)); ASSERT_EQ(import_cfh_, nullptr); }