diff --git a/HISTORY.md b/HISTORY.md index a78861fc2..4d8a72484 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -18,6 +18,7 @@ * Fixed a regression in scan for async_io. During seek, valid buffers were getting cleared causing a regression. * Tiered Storage: fixed excessive keys written to penultimate level in non-debug builds. * Fixed a bug that multi-level FIFO compaction deletes one file in non-L0 even when `CompactionOptionsFIFO::max_table_files_size` is no exceeded since #10348 or 7.8.0. +* Fixed a bug caused by `DB::SyncWAL()` affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#10892). ### New Features * Add basic support for user-defined timestamp to Merge (#10819). diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 058df4da7..9834ff4b2 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -315,6 +315,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, } log_write_mutex_.Unlock(); mutex_.Unlock(); + TEST_SYNC_POINT_CALLBACK("FindObsoleteFiles::PostMutexUnlock", nullptr); log_write_mutex_.Lock(); while (!logs_.empty() && logs_.front().number < min_log_number) { auto& log = logs_.front(); @@ -360,6 +361,8 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname, } TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion", &file_deletion_status); + TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion2", + const_cast(&fname)); if (file_deletion_status.ok()) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id, diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 5b5ec76af..6246cc70f 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1599,6 +1599,90 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) { delete db1; } +TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) { + Options options = CurrentOptions(); + options.track_and_verify_wals_in_manifest = true; + // Set a small max_manifest_file_size to force manifest creation + // in SyncWAL() for tet purpose + options.max_manifest_file_size = 170; + DestroyAndReopen(options); + + // Accumulate memtable m1 and create the 1st wal (i.e, 4.log) + ASSERT_OK(Put(Key(1), "")); + ASSERT_OK(Put(Key(2), "")); + ASSERT_OK(Put(Key(3), "")); + + const std::string wal_file_path = db_->GetName() + "/000004.log"; + + // Coerce the following sequence of events: + // (1) Flush() marks 4.log to be obsoleted, 8.log to be the latest (i.e, + // active) log and release the lock + // (2) SyncWAL() proceeds with the lock. It + // creates a new manifest and syncs all the inactive wals before the latest + // (i.e, active log), which is 4.log. SyncWAL() is not aware of the fact + // that 4.log has marked as to be obseleted. + // Prior to the fix, such wal sync will then add a WAL addition record of + // 4.log to the new manifest without any special treatment. + // (3) BackgroundFlush() will eventually purge 4.log. + bool sync_point_called = false; + bool new_manifest_created = false; + SyncPoint::GetInstance()->SetCallBack( + "FindObsoleteFiles::PostMutexUnlock", [&](void*) { + ASSERT_OK(env_->FileExists(wal_file_path)); + + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::ProcessManifestWrites:BeforeNewManifest", + [&](void*) { new_manifest_created = true; }); + + ASSERT_OK(db_->SyncWAL()); + ASSERT_TRUE(new_manifest_created); + + sync_point_called = true; + }); + + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::DeleteObsoleteFileImpl:AfterDeletion2", [&](void* arg) { + std::string* file_name = (std::string*)arg; + if (*file_name == wal_file_path) { + TEST_SYNC_POINT( + "DBWALTest::" + "FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::" + "PostDeleteWAL"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::" + "PostDeleteWAL", + "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::" + "PreConfrimWALDeleted"}}); + + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(Flush()); + ASSERT_TRUE(sync_point_called); + + TEST_SYNC_POINT( + "DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::" + "PreConfrimWALDeleted"); + + // BackgroundFlush() purged 4.log + // because the memtable associated with the WAL was flushed and new WAL was + // created (i.e, 8.log) + ASSERT_TRUE(env_->FileExists(wal_file_path).IsNotFound()); + + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + + // To verify the corruption of "Missing WAL with log number: 4" under + // `options.track_and_verify_wals_in_manifest = true` is fixed. + // + // Before the fix, `db_->SyncWAL()` will sync and record WAL addtion of the + // obseleted WAL 4.log in a new manifest without any special treament. + // This will result in missing-wal corruption in DB::Reopen(). + Status s = TryReopen(options); + EXPECT_OK(s); +} + // Test scope: // - We expect to open data store under all circumstances // - We expect only data upto the point where the first error was encountered diff --git a/db/version_set.cc b/db/version_set.cc index 082e55217..3b9df2aec 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4967,6 +4967,8 @@ Status VersionSet::ProcessManifestWrites( if (!descriptor_log_ || manifest_file_size_ > db_options_->max_manifest_file_size) { TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest"); + TEST_SYNC_POINT_CALLBACK( + "VersionSet::ProcessManifestWrites:BeforeNewManifest", nullptr); new_descriptor_log = true; } else { pending_manifest_file_number_ = manifest_file_number_; @@ -5099,6 +5101,7 @@ Status VersionSet::ProcessManifestWrites( break; } } + if (s.ok()) { io_s = SyncManifest(db_options_, descriptor_log_->file()); manifest_io_status = io_s; @@ -5506,7 +5509,8 @@ Status VersionSet::GetCurrentManifestPath(const std::string& dbname, Status VersionSet::Recover( const std::vector& column_families, bool read_only, std::string* db_id, bool no_error_if_files_missing) { - // Read "CURRENT" file, which contains a pointer to the current manifest file + // Read "CURRENT" file, which contains a pointer to the current manifest + // file std::string manifest_path; Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, &manifest_file_number_); @@ -6037,6 +6041,22 @@ Status VersionSet::WriteCurrentStateToManifest( } } + // New manifest should rollover the WAL deletion record from previous + // manifest. Otherwise, when an addition record of a deleted WAL gets added to + // this new manifest later (which can happens in e.g, SyncWAL()), this new + // manifest creates an illusion that such WAL hasn't been deleted. + VersionEdit wal_deletions; + wal_deletions.DeleteWalsBefore(min_log_number_to_keep()); + std::string wal_deletions_record; + if (!wal_deletions.EncodeTo(&wal_deletions_record)) { + return Status::Corruption("Unable to Encode VersionEdit: " + + wal_deletions.DebugString(true)); + } + io_s = log->AddRecord(wal_deletions_record); + if (!io_s.ok()) { + return io_s; + } + for (auto cfd : *column_family_set_) { assert(cfd); diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 422bed40c..488dbafe8 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1425,7 +1425,7 @@ public class RocksDBTest { try (final RocksDB db = RocksDB.open(options, dbPath)) { final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); assertThat(livefiles).isNotNull(); - assertThat(livefiles.manifestFileSize).isEqualTo(59); + assertThat(livefiles.manifestFileSize).isEqualTo(66); assertThat(livefiles.files.size()).isEqualTo(3); assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005");