diff --git a/db/error_handler.cc b/db/error_handler.cc index dc8f6a0eb..925dd87ff 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -406,10 +406,7 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, // it can directly overwrite any existing bg_error_. bool auto_recovery = false; Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError); - bg_error_ = bg_err; - if (recovery_in_prog_ && recovery_error_.ok()) { - recovery_error_ = bg_err; - } + CheckAndSetRecoveryAndBGError(bg_err); if (bg_error_stats_ != nullptr) { RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT); RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT); @@ -468,24 +465,14 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, // to many small memtable being generated during auto resume, the flush // reason is set to kErrorRecoveryRetryFlush. Status bg_err(new_bg_io_err, Status::Severity::kSoftError); - if (recovery_in_prog_ && recovery_error_.ok()) { - recovery_error_ = bg_err; - } - if (bg_err.severity() > bg_error_.severity()) { - bg_error_ = bg_err; - } + CheckAndSetRecoveryAndBGError(bg_err); soft_error_no_bg_work_ = true; context.flush_reason = FlushReason::kErrorRecoveryRetryFlush; recover_context_ = context; return StartRecoverFromRetryableBGIOError(bg_io_err); } else { Status bg_err(new_bg_io_err, Status::Severity::kHardError); - if (recovery_in_prog_ && recovery_error_.ok()) { - recovery_error_ = bg_err; - } - if (bg_err.severity() > bg_error_.severity()) { - bg_error_ = bg_err; - } + CheckAndSetRecoveryAndBGError(bg_err); recover_context_ = context; return StartRecoverFromRetryableBGIOError(bg_io_err); } @@ -663,7 +650,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( if (recovery_io_error_.ok() && recovery_error_.ok()) { return recovery_error_; } else { - TEST_SYNC_POINT("StartRecoverRetryableBGIOError:RecoverFail"); return bg_error_; } #else @@ -677,7 +663,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( void ErrorHandler::RecoverFromRetryableBGIOError() { #ifndef ROCKSDB_LITE TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart1"); InstrumentedMutexLock l(db_mutex_); if (end_recovery_) { return; @@ -697,8 +682,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { recovery_error_ = Status::OK(); retry_count++; Status s = db_->ResumeImpl(context); - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume0"); - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume1"); if (bg_error_stats_ != nullptr) { RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT); @@ -707,7 +690,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { bg_error_.severity() >= Status::Severity::kFatalError) { // If DB shutdown in progress or the error severity is higher than // Hard Error, stop auto resume and returns. - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail0"); recovery_in_prog_ = false; if (bg_error_stats_ != nullptr) { RecordInHistogram(bg_error_stats_.get(), @@ -725,7 +707,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1"); int64_t wait_until = db_options_.clock->NowMicros() + wait_interval; cv_.TimedWait(wait_until); - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterWait0"); } else { // There are three possibility: 1) recover_io_error is set during resume // and the error is not retryable, 2) recover is successful, 3) other @@ -751,7 +732,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { } return; } else { - TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail1"); // In this case: 1) recovery_io_error is more serious or not retryable // 2) other Non IO recovery_error happens. The auto recovery stops. recovery_in_prog_ = false; @@ -776,6 +756,16 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { #endif } +void ErrorHandler::CheckAndSetRecoveryAndBGError(const Status& bg_err) { + if (recovery_in_prog_ && recovery_error_.ok()) { + recovery_error_ = bg_err; + } + if (bg_err.severity() > bg_error_.severity()) { + bg_error_ = bg_err; + } + return; +} + void ErrorHandler::EndAutoRecovery() { db_mutex_->AssertHeld(); if (!end_recovery_) { diff --git a/db/error_handler.h b/db/error_handler.h index ab1169bc9..3e05ee6cc 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -116,6 +116,10 @@ class ErrorHandler { void RecoverFromNoSpace(); const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error); void RecoverFromRetryableBGIOError(); + // First, if it is in recovery and the recovery_error is ok. Set the + // recovery_error_ to bg_err. Second, if the severity is higher than the + // current bg_error_, overwrite it. + void CheckAndSetRecoveryAndBGError(const Status& bg_err); }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index d676142f7..3dff3ee36 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -487,6 +487,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWALAtomicWriteRetryableError) { Destroy(options); } +// The flush error is injected before we finish the table build TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) { std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -542,6 +543,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) { Destroy(options); } +// The retryable IO error is injected before we sync table TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) { std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -585,6 +587,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) { Destroy(options); } +// The retryable IO error is injected before we close the table file TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) { std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -1874,6 +1877,8 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { Destroy(options); } +// Auto resume fromt the flush retryable IO error. Activate the FS before the +// first resume. Resume is successful TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { // Activate the FS before the first resume std::shared_ptr listener( @@ -1914,6 +1919,8 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { Destroy(options); } +// Auto resume fromt the flush retryable IO error and set the retry limit count. +// Never activate the FS and auto resume should fail at the end TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { // Fail all the resume and let user to resume std::shared_ptr listener( @@ -1963,6 +1970,8 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { Destroy(options); } +// Auto resume fromt the flush retryable IO error and set the retry limit count. +// Fail the first resume and let the second resume be successful. TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { // Fail the first resume and let the second resume be successful std::shared_ptr listener(