diff --git a/HISTORY.md b/HISTORY.md index 762e57f05..51af876ea 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,13 +6,14 @@ * Add two-phase commit support to C API. * Add `rocksdb_transaction_get_writebatch_wi` and `rocksdb_transaction_rebuild_from_writebatch` to C API. * Add SingleDelete for DB in C API -* Add User Defined Timestamp in C API. +* Add User Defined Timestamp in C API. * `rocksdb_comparator_with_ts_create` to create timestamp aware comparator * Put, Get, Delete, SingleDelete, MultiGet APIs has corresponding timestamp aware APIs with suffix `with_ts` * And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental)) ### New Features * Add FileSystem::ReadAsync API in io_tracing +* Add an extra sanity check in `GetSortedWalFiles()` (also used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) to reduce risk of successfully created backup or checkpoint failing to open because of missing WAL file. ### Behavior changes * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index e30071341..515abb728 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -124,6 +124,9 @@ Status DBImpl::GetLiveFiles(std::vector& ret, } Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { + // Record tracked WALs as a (minimum) cross-check for directory scan + std::vector required_by_manifest; + // If caller disabled deletions, this function should return files that are // guaranteed not to be deleted until deletions are re-enabled. We need to // wait for pending purges to finish since WalManager doesn't know which @@ -137,6 +140,13 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) { bg_cv_.Wait(); } + + // Record tracked WALs as a (minimum) cross-check for directory scan + const auto& manifest_wals = versions_->GetWalSet().GetWals(); + required_by_manifest.reserve(manifest_wals.size()); + for (const auto& wal : manifest_wals) { + required_by_manifest.push_back(wal.first); + } } Status s = wal_manager_.GetSortedWalFiles(files); @@ -150,6 +160,29 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { assert(deletions_disabled.IsNotSupported()); } + if (s.ok()) { + // Verify includes those required by manifest (one sorted list is superset + // of the other) + auto required = required_by_manifest.begin(); + auto included = files.begin(); + + while (required != required_by_manifest.end()) { + if (included == files.end() || *required < (*included)->LogNumber()) { + // FAIL - did not find + return Status::Corruption( + "WAL file " + std::to_string(*required) + + " required by manifest but not in directory list"); + } + if (*required == (*included)->LogNumber()) { + ++required; + ++included; + } else { + assert(*required > (*included)->LogNumber()); + ++included; + } + } + } + return s; } @@ -349,7 +382,13 @@ Status DBImpl::GetLiveFilesStorageInfo( TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"); if (s.ok()) { - s = FlushWAL(false /* sync */); + // To maximize the effectiveness of track_and_verify_wals_in_manifest, + // sync WAL when it is enabled. + s = FlushWAL( + immutable_db_options_.track_and_verify_wals_in_manifest /* sync */); + if (s.IsNotSupported()) { // read-only DB or similar + s = Status::OK(); + } } TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1"); diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 10668b9f9..19c1b334f 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -2165,6 +2165,59 @@ TEST_F(DBWALTest, ReadOnlyRecoveryNoTruncate) { #endif // ROCKSDB_FALLOCATE_PRESENT #endif // ROCKSDB_PLATFORM_POSIX +TEST_F(DBWALTest, WalInManifestButNotInSortedWals) { + Options options = CurrentOptions(); + options.track_and_verify_wals_in_manifest = true; + options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; + + // Build a way to make wal files selectively go missing + bool wals_go_missing = false; + struct MissingWalFs : public FileSystemWrapper { + MissingWalFs(const std::shared_ptr& t, + bool* _wals_go_missing_flag) + : FileSystemWrapper(t), wals_go_missing_flag(_wals_go_missing_flag) {} + bool* wals_go_missing_flag; + IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts, + std::vector* r, + IODebugContext* dbg) override { + IOStatus s = target_->GetChildren(dir, io_opts, r, dbg); + if (s.ok() && *wals_go_missing_flag) { + for (size_t i = 0; i < r->size();) { + if (EndsWith(r->at(i), ".log")) { + r->erase(r->begin() + i); + } else { + ++i; + } + } + } + return s; + } + const char* Name() const override { return "MissingWalFs"; } + }; + auto my_fs = + std::make_shared(env_->GetFileSystem(), &wals_go_missing); + std::unique_ptr my_env(NewCompositeEnv(my_fs)); + options.env = my_env.get(); + + CreateAndReopenWithCF({"blah"}, options); + + // Currently necessary to get a WAL tracked in manifest; see + // https://github.com/facebook/rocksdb/issues/10080 + ASSERT_OK(Put(0, "x", "y")); + ASSERT_OK(db_->SyncWAL()); + ASSERT_OK(Put(1, "x", "y")); + ASSERT_OK(db_->SyncWAL()); + ASSERT_OK(Flush(1)); + + ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); + std::vector> wals; + ASSERT_OK(db_->GetSortedWalFiles(wals)); + wals_go_missing = true; + ASSERT_NOK(db_->GetSortedWalFiles(wals)); + wals_go_missing = false; + Close(); +} + #endif // ROCKSDB_LITE TEST_F(DBWALTest, WalTermTest) {