From db03172d08d85472fe70d9f6a0a901f6ae090821 Mon Sep 17 00:00:00 2001 From: mrambacher Date: Mon, 7 Dec 2020 20:09:55 -0800 Subject: [PATCH] Change ErrorHandler methods to return const Status& (#7539) Summary: This change eliminates the need for a lot of the PermitUncheckedError calls on return from ErrorHandler methods. The calls are no longer needed as the status is returned as a reference rather than a copy. Additionally, this means that the originating status (recovery_error_, bg_error_) is not cleared implicitly as a result of calling one of these methods. For this class, I do not know if the proper behavior should be to call PermitUncheckedError in the destructor or if the checked state should be cleared when the status is cleared. I did tests both ways. Without the code in the destructor, the status will need to be cleared in at least some of the places where it is set to OK. When running tests, I found no instances where this class was destructed with a non-OK, non-checked Status. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7539 Reviewed By: anand1976 Differential Revision: D25340565 Pulled By: pdillinger fbshipit-source-id: 1730c035c81a475875ea745226112030ec25136c --- db/compaction/compaction_job.cc | 4 +- db/db_impl/db_impl.cc | 3 +- db/db_impl/db_impl_compaction_flush.cc | 73 +++++++++----------------- db/db_impl/db_impl_debug.cc | 2 +- db/db_impl/db_impl_write.cc | 18 ++----- db/error_handler.cc | 46 +++++++++------- db/error_handler.h | 20 ++++--- 7 files changed, 68 insertions(+), 98 deletions(-) diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index a517a2015..94182c50d 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1555,9 +1555,7 @@ Status CompactionJob::FinishCompactionOutputFile( "CompactionJob::FinishCompactionOutputFile:" "MaxAllowedSpaceReached"); InstrumentedMutexLock l(db_mutex_); - // Should handle return error? - db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction) - .PermitUncheckedError(); + db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction); } } #endif diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0a7722692..b43dfe23d 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4607,8 +4607,7 @@ Status DBImpl::IngestExternalFiles( // TODO: distinguish between MANIFEST write and CURRENT renaming const IOStatus& io_s = versions_->io_status(); // Should handle return error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite); } // Resume writes to the DB diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index ee5756cf5..7255f6d30 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -130,12 +130,10 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) { } if (!io_s.ok()) { if (total_log_size_ > 0) { - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush); } else { // If the WAL is empty, we use different error reason - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL); } TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed"); return io_s; @@ -192,8 +190,7 @@ Status DBImpl::FlushMemTableToOutputFile( s = io_s; if (!io_s.ok() && !io_s.IsShutdownInProgress() && !io_s.IsColumnFamilyDropped()) { - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush); } } else { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip"); @@ -253,31 +250,23 @@ Status DBImpl::FlushMemTableToOutputFile( // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming if (!versions_->io_status().ok()) { - // Should handle return error? if (total_log_size_ > 0) { // If the WAL is empty, we use different error reason - error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWrite); } else { - error_handler_ - .SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWriteNoWAL); } } else if (total_log_size_ > 0) { - // Should handle return error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush); } else { // If the WAL is empty, we use different error reason - // Should handle return error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL); } } else { Status new_bg_error = s; - // Should handle return error? - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } else { // If we got here, then we decided not to care about the i_os status (either @@ -302,9 +291,7 @@ Status DBImpl::FlushMemTableToOutputFile( TEST_SYNC_POINT_CALLBACK( "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", &new_bg_error); - // Should handle this error? - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } #endif // ROCKSDB_LITE @@ -645,9 +632,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( error_handler_.GetBGError().ok()) { Status new_bg_error = Status::SpaceLimit("Max allowed space was reached"); - // Should Handle this error? - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(new_bg_error, + BackgroundErrorReason::kFlush); } } } @@ -664,31 +650,23 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming if (!versions_->io_status().ok()) { - // Should handle return error? if (total_log_size_ > 0) { // If the WAL is empty, we use different error reason - error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWrite); } else { - error_handler_ - .SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWriteNoWAL); } } else if (total_log_size_ > 0) { - // Should Handle this error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush); } else { // If the WAL is empty, we use different error reason - // Should Handle this error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL); } } else { Status new_bg_error = s; - // Should Handle this error? - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush) - .PermitUncheckedError(); + error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } @@ -1266,11 +1244,9 @@ Status DBImpl::CompactFilesImpl( job_context->job_id, status.ToString().c_str()); IOStatus io_s = compaction_job.io_status(); if (!io_s.ok()) { - error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction); } else { - error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction) - .PermitUncheckedError(); + error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); } } @@ -3121,10 +3097,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, auto err_reason = versions_->io_status().ok() ? BackgroundErrorReason::kCompaction : BackgroundErrorReason::kManifestWrite; - error_handler_.SetBGError(io_s, err_reason).PermitUncheckedError(); + error_handler_.SetBGError(io_s, err_reason); } else { - error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction) - .PermitUncheckedError(); + error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); } if (c != nullptr && !is_manual && !error_handler_.IsBGWorkStopped()) { // Put this cfd back in the compaction queue so we can retry after some diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index c21c9fa8f..4825fb6ef 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -170,7 +170,7 @@ Status DBImpl::TEST_WaitForCompact(bool wait_unscheduled) { while ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || bg_flush_scheduled_ || (wait_unscheduled && unscheduled_compactions_)) && - (error_handler_.GetBGError() == Status::OK())) { + (error_handler_.GetBGError().ok())) { bg_cv_.Wait(); } return error_handler_.GetBGError(); diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 02ef5671e..1a459c36a 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -850,8 +850,7 @@ void DBImpl::WriteStatusCheckOnLocked(const Status& status) { if (immutable_db_options_.paranoid_checks && !status.ok() && !status.IsBusy() && !status.IsIncomplete()) { // Maybe change the return status to void? - error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback) - .PermitUncheckedError(); + error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback); } } @@ -863,8 +862,7 @@ void DBImpl::WriteStatusCheck(const Status& status) { !status.IsBusy() && !status.IsIncomplete()) { mutex_.Lock(); // Maybe change the return status to void? - error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback) - .PermitUncheckedError(); + error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback); mutex_.Unlock(); } } @@ -877,8 +875,7 @@ void DBImpl::IOStatusCheck(const IOStatus& io_status) { io_status.IsIOFenced()) { mutex_.Lock(); // Maybe change the return status to void? - error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback) - .PermitUncheckedError(); + error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback); mutex_.Unlock(); } } @@ -1810,15 +1807,10 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { } // We may have lost data from the WritableFileBuffer in-memory buffer for // the current log, so treat it as a fatal error and set bg_error - // Should handle return error? if (!io_s.ok()) { - // Should handle return error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable) - .PermitUncheckedError(); + error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable); } else { - // Should handle return error? - error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable) - .PermitUncheckedError(); + error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable); } // Read back bg_error in order to get the right severity s = error_handler_.GetBGError(); diff --git a/db/error_handler.cc b/db/error_handler.cc index f2aabde5f..f121519f4 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -267,10 +267,11 @@ void ErrorHandler::CancelErrorRecovery() { // This can also get called as part of a recovery operation. In that case, we // also track the error separately in recovery_error_ so we can tell in the // end whether recovery succeeded or not -Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) { +const Status& ErrorHandler::SetBGError(const Status& bg_err, + BackgroundErrorReason reason) { db_mutex_->AssertHeld(); if (bg_err.ok()) { - return Status::OK(); + return bg_err; } bool paranoid = db_options_.paranoid_checks; @@ -363,11 +364,11 @@ Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reas // c) all other errors are mapped to hard error. // 3) for other cases, SetBGError(const Status& bg_err, BackgroundErrorReason // reason) will be called to handle other error cases. -Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, - BackgroundErrorReason reason) { +const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, + BackgroundErrorReason reason) { db_mutex_->AssertHeld(); if (bg_io_err.ok()) { - return Status::OK(); + return bg_io_err; } ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); @@ -382,7 +383,6 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, } Status new_bg_io_err = bg_io_err; - Status s; DBRecoverContext context; if (bg_io_err.GetDataLoss()) { // First, data loss is treated as unrecoverable error. So it can directly @@ -393,8 +393,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, if (recovery_in_prog_ && recovery_error_.ok()) { recovery_error_ = bg_err; } - EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, - db_mutex_, &auto_recovery); + EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, + &bg_err, db_mutex_, &auto_recovery); recover_context_ = context; return bg_error_; } else if (bg_io_err.GetRetryable()) { @@ -405,8 +405,9 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, // soft error. In other cases, treat the retryable IO error as hard // error. bool auto_recovery = false; - EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, - db_mutex_, &auto_recovery); + EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, + &new_bg_io_err, db_mutex_, + &auto_recovery); if (BackgroundErrorReason::kCompaction == reason) { Status bg_err(new_bg_io_err, Status::Severity::kSoftError); if (bg_err.severity() > bg_error_.severity()) { @@ -446,12 +447,11 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, return StartRecoverFromRetryableBGIOError(bg_io_err); } } else { - s = SetBGError(new_bg_io_err, reason); + return SetBGError(new_bg_io_err, reason); } - return s; } -Status ErrorHandler::OverrideNoSpaceError(Status bg_error, +Status ErrorHandler::OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery) { #ifndef ROCKSDB_LITE if (bg_error.severity() >= Status::Severity::kFatalError) { @@ -507,7 +507,11 @@ Status ErrorHandler::ClearBGError() { // Signal that recovery succeeded if (recovery_error_.ok()) { Status old_bg_error = bg_error_; + // Clear and check the recovery IO and BG error bg_error_ = Status::OK(); + recovery_io_error_ = IOStatus::OK(); + bg_error_.PermitUncheckedError(); + recovery_io_error_.PermitUncheckedError(); recovery_in_prog_ = false; soft_error_no_bg_work_ = false; EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners, @@ -557,6 +561,7 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) { // during the recovery process. While recovering, the only operations that // can generate background errors should be the flush operations recovery_error_ = Status::OK(); + recovery_error_.PermitUncheckedError(); Status s = db_->ResumeImpl(recover_context_); if (s.ok()) { soft_error_no_bg_work_ = false; @@ -578,13 +583,15 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) { #endif } -Status ErrorHandler::StartRecoverFromRetryableBGIOError(IOStatus io_error) { +const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( + const IOStatus& io_error) { #ifndef ROCKSDB_LITE db_mutex_->AssertHeld(); - if (bg_error_.ok() || io_error.ok()) { - return Status::OK(); - } - if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { + if (bg_error_.ok()) { + return bg_error_; + } else if (io_error.ok()) { + return io_error; + } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { // Auto resume BG error is not enabled, directly return bg_error_. return bg_error_; } @@ -603,7 +610,7 @@ Status ErrorHandler::StartRecoverFromRetryableBGIOError(IOStatus io_error) { new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); if (recovery_io_error_.ok() && recovery_error_.ok()) { - return Status::OK(); + return recovery_error_; } else { TEST_SYNC_POINT("StartRecoverRetryableBGIOError:RecoverFail"); return bg_error_; @@ -668,6 +675,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess"); Status old_bg_error = bg_error_; bg_error_ = Status::OK(); + bg_error_.PermitUncheckedError(); EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners, old_bg_error, db_mutex_); recovery_in_prog_ = false; diff --git a/db/error_handler.h b/db/error_handler.h index 084434101..acd09514e 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -31,17 +31,14 @@ class ErrorHandler { InstrumentedMutex* db_mutex) : db_(db), db_options_(db_options), - bg_error_(Status::OK()), - recovery_error_(Status::OK()), - recovery_io_error_(IOStatus::OK()), cv_(db_mutex), end_recovery_(false), recovery_thread_(nullptr), db_mutex_(db_mutex), auto_recovery_(false), recovery_in_prog_(false), - soft_error_no_bg_work_(false) {} - ~ErrorHandler() { + soft_error_no_bg_work_(false) { + // Clear the checked flag for uninitialized errors bg_error_.PermitUncheckedError(); recovery_error_.PermitUncheckedError(); recovery_io_error_.PermitUncheckedError(); @@ -53,13 +50,14 @@ class ErrorHandler { Status::Code code, Status::SubCode subcode); - Status SetBGError(const Status& bg_err, BackgroundErrorReason reason); + const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason); - Status SetBGError(const IOStatus& bg_io_err, BackgroundErrorReason reason); + const Status& SetBGError(const IOStatus& bg_io_err, + BackgroundErrorReason reason); - Status GetBGError() { return bg_error_; } + Status GetBGError() const { return bg_error_; } - Status GetRecoveryError() { return recovery_error_; } + Status GetRecoveryError() const { return recovery_error_; } Status ClearBGError(); @@ -110,9 +108,9 @@ class ErrorHandler { // Used to store the context for recover, such as flush reason. DBRecoverContext recover_context_; - Status OverrideNoSpaceError(Status bg_error, bool* auto_recovery); + Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery); void RecoverFromNoSpace(); - Status StartRecoverFromRetryableBGIOError(IOStatus io_error); + const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error); void RecoverFromRetryableBGIOError(); };