From a00cffaf6921fa1ff7193a3d060b42025d80a474 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 1 Jun 2022 11:02:27 -0700 Subject: [PATCH] Reduce risk of backup or checkpoint missing a WAL file (#10083) Summary: We recently saw a case in crash test in which a WAL file in the middle of the list of live WALs was not included in the backup, so the DB was not openable due to missing WAL. We are not sure why, but this change should at least turn that into a backup-time failure by ensuring all the WAL files expected by the manifest (according to VersionSet) are included in `GetSortedWalFiles()` (used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) Related: to maximize the effectiveness of track_and_verify_wals_in_manifest with GetSortedWalFiles() during checkpoint/backup, we will now sync WAL in GetLiveFilesStorageInfo() when track_and_verify_wals_in_manifest=true. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10083 Test Plan: added new unit test for the check in GetSortedWalFiles() Reviewed By: ajkr Differential Revision: D36791608 Pulled By: pdillinger fbshipit-source-id: a27bcf0213fc7ab177760fede50d4375d579afa6 --- HISTORY.md | 3 ++- db/db_filesnapshot.cc | 41 ++++++++++++++++++++++++++++++++- db/db_wal_test.cc | 53 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 762e57f05..51af876ea 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,13 +6,14 @@ * Add two-phase commit support to C API. * Add `rocksdb_transaction_get_writebatch_wi` and `rocksdb_transaction_rebuild_from_writebatch` to C API. * Add SingleDelete for DB in C API -* Add User Defined Timestamp in C API. +* Add User Defined Timestamp in C API. * `rocksdb_comparator_with_ts_create` to create timestamp aware comparator * Put, Get, Delete, SingleDelete, MultiGet APIs has corresponding timestamp aware APIs with suffix `with_ts` * And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental)) ### New Features * Add FileSystem::ReadAsync API in io_tracing +* Add an extra sanity check in `GetSortedWalFiles()` (also used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) to reduce risk of successfully created backup or checkpoint failing to open because of missing WAL file. ### Behavior changes * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index e30071341..515abb728 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -124,6 +124,9 @@ Status DBImpl::GetLiveFiles(std::vector& ret, } Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { + // Record tracked WALs as a (minimum) cross-check for directory scan + std::vector required_by_manifest; + // If caller disabled deletions, this function should return files that are // guaranteed not to be deleted until deletions are re-enabled. We need to // wait for pending purges to finish since WalManager doesn't know which @@ -137,6 +140,13 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) { bg_cv_.Wait(); } + + // Record tracked WALs as a (minimum) cross-check for directory scan + const auto& manifest_wals = versions_->GetWalSet().GetWals(); + required_by_manifest.reserve(manifest_wals.size()); + for (const auto& wal : manifest_wals) { + required_by_manifest.push_back(wal.first); + } } Status s = wal_manager_.GetSortedWalFiles(files); @@ -150,6 +160,29 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { assert(deletions_disabled.IsNotSupported()); } + if (s.ok()) { + // Verify includes those required by manifest (one sorted list is superset + // of the other) + auto required = required_by_manifest.begin(); + auto included = files.begin(); + + while (required != required_by_manifest.end()) { + if (included == files.end() || *required < (*included)->LogNumber()) { + // FAIL - did not find + return Status::Corruption( + "WAL file " + std::to_string(*required) + + " required by manifest but not in directory list"); + } + if (*required == (*included)->LogNumber()) { + ++required; + ++included; + } else { + assert(*required > (*included)->LogNumber()); + ++included; + } + } + } + return s; } @@ -349,7 +382,13 @@ Status DBImpl::GetLiveFilesStorageInfo( TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"); if (s.ok()) { - s = FlushWAL(false /* sync */); + // To maximize the effectiveness of track_and_verify_wals_in_manifest, + // sync WAL when it is enabled. + s = FlushWAL( + immutable_db_options_.track_and_verify_wals_in_manifest /* sync */); + if (s.IsNotSupported()) { // read-only DB or similar + s = Status::OK(); + } } TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1"); diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 10668b9f9..19c1b334f 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -2165,6 +2165,59 @@ TEST_F(DBWALTest, ReadOnlyRecoveryNoTruncate) { #endif // ROCKSDB_FALLOCATE_PRESENT #endif // ROCKSDB_PLATFORM_POSIX +TEST_F(DBWALTest, WalInManifestButNotInSortedWals) { + Options options = CurrentOptions(); + options.track_and_verify_wals_in_manifest = true; + options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; + + // Build a way to make wal files selectively go missing + bool wals_go_missing = false; + struct MissingWalFs : public FileSystemWrapper { + MissingWalFs(const std::shared_ptr& t, + bool* _wals_go_missing_flag) + : FileSystemWrapper(t), wals_go_missing_flag(_wals_go_missing_flag) {} + bool* wals_go_missing_flag; + IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts, + std::vector* r, + IODebugContext* dbg) override { + IOStatus s = target_->GetChildren(dir, io_opts, r, dbg); + if (s.ok() && *wals_go_missing_flag) { + for (size_t i = 0; i < r->size();) { + if (EndsWith(r->at(i), ".log")) { + r->erase(r->begin() + i); + } else { + ++i; + } + } + } + return s; + } + const char* Name() const override { return "MissingWalFs"; } + }; + auto my_fs = + std::make_shared(env_->GetFileSystem(), &wals_go_missing); + std::unique_ptr my_env(NewCompositeEnv(my_fs)); + options.env = my_env.get(); + + CreateAndReopenWithCF({"blah"}, options); + + // Currently necessary to get a WAL tracked in manifest; see + // https://github.com/facebook/rocksdb/issues/10080 + ASSERT_OK(Put(0, "x", "y")); + ASSERT_OK(db_->SyncWAL()); + ASSERT_OK(Put(1, "x", "y")); + ASSERT_OK(db_->SyncWAL()); + ASSERT_OK(Flush(1)); + + ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); + std::vector> wals; + ASSERT_OK(db_->GetSortedWalFiles(wals)); + wals_go_missing = true; + ASSERT_NOK(db_->GetSortedWalFiles(wals)); + wals_go_missing = false; + Close(); +} + #endif // ROCKSDB_LITE TEST_F(DBWALTest, WalTermTest) {