From 70aa9421533e560de99f9385b725b64002e93237 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 10 Oct 2017 13:07:00 -0700 Subject: [PATCH] fix file numbers after repair Summary: The file numbers assigned post-repair were sometimes smaller than older files' numbers due to `LogAndApply` saving the wrong next file number in the manifest. - Mark the highest file seen during repair as used before `LogAndApply` so the correct next file number will be stored. - Renamed `MarkFileNumberUsedDuringRecovery` to `MarkFileNumberUsed` since now it's used during repair in addition to during recovery - Added `TEST_Current_Next_FileNo` to expose the next file number for the unit test. Closes https://github.com/facebook/rocksdb/pull/2988 Differential Revision: D6018083 Pulled By: ajkr fbshipit-source-id: 3f25cbf74439cb8f16dd12af90b67f9f9f75e718 --- db/db_impl.h | 3 +++ db/db_impl_debug.cc | 4 ++++ db/db_impl_open.cc | 4 ++-- db/repair.cc | 2 ++ db/repair_test.cc | 17 +++++++++++++++++ db/version_set.cc | 10 +++++----- db/version_set.h | 4 ++-- 7 files changed, 35 insertions(+), 9 deletions(-) diff --git a/db/db_impl.h b/db/db_impl.h index 79017e52d..067a2f591 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -371,6 +371,9 @@ class DBImpl : public DB { // Return the current manifest file no. uint64_t TEST_Current_Manifest_FileNo(); + // Returns the number that'll be assigned to the next file that's created. + uint64_t TEST_Current_Next_FileNo(); + // get total level0 file size. Only for testing. uint64_t TEST_GetLevel0TotalSize(); diff --git a/db/db_impl_debug.cc b/db/db_impl_debug.cc index e7f689121..b20a31719 100644 --- a/db/db_impl_debug.cc +++ b/db/db_impl_debug.cc @@ -60,6 +60,10 @@ uint64_t DBImpl::TEST_Current_Manifest_FileNo() { return versions_->manifest_file_number(); } +uint64_t DBImpl::TEST_Current_Next_FileNo() { + return versions_->current_next_file_number(); +} + Status DBImpl::TEST_CompactRange(int level, const Slice* begin, const Slice* end, ColumnFamilyHandle* column_family, diff --git a/db/db_impl_open.cc b/db/db_impl_open.cc index f78adb722..8981f35ea 100644 --- a/db/db_impl_open.cc +++ b/db/db_impl_open.cc @@ -498,7 +498,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, // The previous incarnation may not have written any MANIFEST // records after allocating this log number. So we manually // update the file number allocation counter in VersionSet. - versions_->MarkFileNumberUsedDuringRecovery(log_number); + versions_->MarkFileNumberUsed(log_number); // Open the log file std::string fname = LogFileName(immutable_db_options_.wal_dir, log_number); @@ -817,7 +817,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, // not actually used. that is because VersionSet assumes // VersionSet::next_file_number_ always to be strictly greater than any // log number - versions_->MarkFileNumberUsedDuringRecovery(max_log_number + 1); + versions_->MarkFileNumberUsed(max_log_number + 1); status = versions_->LogAndApply( cfd, *cfd->GetLatestMutableCFOptions(), edit, &mutex_); if (!status.ok()) { diff --git a/db/repair.cc b/db/repair.cc index 8e21bead0..6b58c195c 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -569,6 +569,8 @@ class Repairer { table->meta.largest, table->min_sequence, table->max_sequence, table->meta.marked_for_compaction); } + assert(next_file_number_ > 0); + vset_.MarkFileNumberUsed(next_file_number_ - 1); mutex_.Lock(); Status status = vset_.LogAndApply( cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, diff --git a/db/repair_test.cc b/db/repair_test.cc index 1cc86664a..ab00a96b4 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -108,6 +108,23 @@ TEST_F(RepairTest, IncompleteManifest) { ASSERT_EQ(Get("key2"), "val2"); } +TEST_F(RepairTest, PostRepairSstFileNumbering) { + // Verify after a DB is repaired, new files will be assigned higher numbers + // than old files. + Put("key", "val"); + Flush(); + Put("key2", "val2"); + Flush(); + uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); + Close(); + + ASSERT_OK(RepairDB(dbname_, CurrentOptions())); + + Reopen(CurrentOptions()); + uint64_t post_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); + ASSERT_GE(post_repair_file_num, pre_repair_file_num); +} + TEST_F(RepairTest, LostSst) { // Delete one of the SST files but preserve the manifest that refers to it, // then verify the DB is still usable for the intact SST. diff --git a/db/version_set.cc b/db/version_set.cc index 281c6fc71..100d5a786 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2928,8 +2928,8 @@ Status VersionSet::Recover( column_family_set_->UpdateMaxColumnFamily(max_column_family); - MarkFileNumberUsedDuringRecovery(previous_log_number); - MarkFileNumberUsedDuringRecovery(log_number); + MarkFileNumberUsed(previous_log_number); + MarkFileNumberUsed(log_number); } // there were some column families in the MANIFEST that weren't specified @@ -3373,9 +3373,9 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, } #endif // ROCKSDB_LITE -void VersionSet::MarkFileNumberUsedDuringRecovery(uint64_t number) { - // only called during recovery which is single threaded, so this works because - // there can't be concurrent calls +void VersionSet::MarkFileNumberUsed(uint64_t number) { + // only called during recovery and repair which are single threaded, so this + // works because there can't be concurrent calls if (next_file_number_.load(std::memory_order_relaxed) <= number) { next_file_number_.store(number + 1, std::memory_order_relaxed); } diff --git a/db/version_set.h b/db/version_set.h index 0ad1bf76f..5b990d6ff 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -726,8 +726,8 @@ class VersionSet { } // Mark the specified file number as used. - // REQUIRED: this is only called during single-threaded recovery - void MarkFileNumberUsedDuringRecovery(uint64_t number); + // REQUIRED: this is only called during single-threaded recovery or repair. + void MarkFileNumberUsed(uint64_t number); // Return the log file number for the log file that is currently // being compacted, or zero if there is no such log file.