|
|
@ -406,10 +406,7 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, |
|
|
|
// it can directly overwrite any existing bg_error_.
|
|
|
|
// it can directly overwrite any existing bg_error_.
|
|
|
|
bool auto_recovery = false; |
|
|
|
bool auto_recovery = false; |
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError); |
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError); |
|
|
|
bg_error_ = bg_err; |
|
|
|
CheckAndSetRecoveryAndBGError(bg_err); |
|
|
|
if (recovery_in_prog_ && recovery_error_.ok()) { |
|
|
|
|
|
|
|
recovery_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT); |
|
|
|
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT); |
|
|
|
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT); |
|
|
|
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT); |
|
|
@ -468,24 +465,14 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, |
|
|
|
// to many small memtable being generated during auto resume, the flush
|
|
|
|
// to many small memtable being generated during auto resume, the flush
|
|
|
|
// reason is set to kErrorRecoveryRetryFlush.
|
|
|
|
// reason is set to kErrorRecoveryRetryFlush.
|
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kSoftError); |
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kSoftError); |
|
|
|
if (recovery_in_prog_ && recovery_error_.ok()) { |
|
|
|
CheckAndSetRecoveryAndBGError(bg_err); |
|
|
|
recovery_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (bg_err.severity() > bg_error_.severity()) { |
|
|
|
|
|
|
|
bg_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
soft_error_no_bg_work_ = true; |
|
|
|
soft_error_no_bg_work_ = true; |
|
|
|
context.flush_reason = FlushReason::kErrorRecoveryRetryFlush; |
|
|
|
context.flush_reason = FlushReason::kErrorRecoveryRetryFlush; |
|
|
|
recover_context_ = context; |
|
|
|
recover_context_ = context; |
|
|
|
return StartRecoverFromRetryableBGIOError(bg_io_err); |
|
|
|
return StartRecoverFromRetryableBGIOError(bg_io_err); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kHardError); |
|
|
|
Status bg_err(new_bg_io_err, Status::Severity::kHardError); |
|
|
|
if (recovery_in_prog_ && recovery_error_.ok()) { |
|
|
|
CheckAndSetRecoveryAndBGError(bg_err); |
|
|
|
recovery_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (bg_err.severity() > bg_error_.severity()) { |
|
|
|
|
|
|
|
bg_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
recover_context_ = context; |
|
|
|
recover_context_ = context; |
|
|
|
return StartRecoverFromRetryableBGIOError(bg_io_err); |
|
|
|
return StartRecoverFromRetryableBGIOError(bg_io_err); |
|
|
|
} |
|
|
|
} |
|
|
@ -663,7 +650,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( |
|
|
|
if (recovery_io_error_.ok() && recovery_error_.ok()) { |
|
|
|
if (recovery_io_error_.ok() && recovery_error_.ok()) { |
|
|
|
return recovery_error_; |
|
|
|
return recovery_error_; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
TEST_SYNC_POINT("StartRecoverRetryableBGIOError:RecoverFail"); |
|
|
|
|
|
|
|
return bg_error_; |
|
|
|
return bg_error_; |
|
|
|
} |
|
|
|
} |
|
|
|
#else |
|
|
|
#else |
|
|
@ -677,7 +663,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( |
|
|
|
void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
#ifndef ROCKSDB_LITE |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart1"); |
|
|
|
|
|
|
|
InstrumentedMutexLock l(db_mutex_); |
|
|
|
InstrumentedMutexLock l(db_mutex_); |
|
|
|
if (end_recovery_) { |
|
|
|
if (end_recovery_) { |
|
|
|
return; |
|
|
|
return; |
|
|
@ -697,8 +682,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
recovery_error_ = Status::OK(); |
|
|
|
recovery_error_ = Status::OK(); |
|
|
|
retry_count++; |
|
|
|
retry_count++; |
|
|
|
Status s = db_->ResumeImpl(context); |
|
|
|
Status s = db_->ResumeImpl(context); |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume0"); |
|
|
|
|
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume1"); |
|
|
|
|
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
RecordTick(bg_error_stats_.get(), |
|
|
|
RecordTick(bg_error_stats_.get(), |
|
|
|
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT); |
|
|
|
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT); |
|
|
@ -707,7 +690,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
bg_error_.severity() >= Status::Severity::kFatalError) { |
|
|
|
bg_error_.severity() >= Status::Severity::kFatalError) { |
|
|
|
// If DB shutdown in progress or the error severity is higher than
|
|
|
|
// If DB shutdown in progress or the error severity is higher than
|
|
|
|
// Hard Error, stop auto resume and returns.
|
|
|
|
// Hard Error, stop auto resume and returns.
|
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail0"); |
|
|
|
|
|
|
|
recovery_in_prog_ = false; |
|
|
|
recovery_in_prog_ = false; |
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
if (bg_error_stats_ != nullptr) { |
|
|
|
RecordInHistogram(bg_error_stats_.get(), |
|
|
|
RecordInHistogram(bg_error_stats_.get(), |
|
|
@ -725,7 +707,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1"); |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1"); |
|
|
|
int64_t wait_until = db_options_.clock->NowMicros() + wait_interval; |
|
|
|
int64_t wait_until = db_options_.clock->NowMicros() + wait_interval; |
|
|
|
cv_.TimedWait(wait_until); |
|
|
|
cv_.TimedWait(wait_until); |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterWait0"); |
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
// There are three possibility: 1) recover_io_error is set during resume
|
|
|
|
// There are three possibility: 1) recover_io_error is set during resume
|
|
|
|
// and the error is not retryable, 2) recover is successful, 3) other
|
|
|
|
// and the error is not retryable, 2) recover is successful, 3) other
|
|
|
@ -751,7 +732,6 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
} |
|
|
|
} |
|
|
|
return; |
|
|
|
return; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail1"); |
|
|
|
|
|
|
|
// In this case: 1) recovery_io_error is more serious or not retryable
|
|
|
|
// In this case: 1) recovery_io_error is more serious or not retryable
|
|
|
|
// 2) other Non IO recovery_error happens. The auto recovery stops.
|
|
|
|
// 2) other Non IO recovery_error happens. The auto recovery stops.
|
|
|
|
recovery_in_prog_ = false; |
|
|
|
recovery_in_prog_ = false; |
|
|
@ -776,6 +756,16 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void ErrorHandler::CheckAndSetRecoveryAndBGError(const Status& bg_err) { |
|
|
|
|
|
|
|
if (recovery_in_prog_ && recovery_error_.ok()) { |
|
|
|
|
|
|
|
recovery_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (bg_err.severity() > bg_error_.severity()) { |
|
|
|
|
|
|
|
bg_error_ = bg_err; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void ErrorHandler::EndAutoRecovery() { |
|
|
|
void ErrorHandler::EndAutoRecovery() { |
|
|
|
db_mutex_->AssertHeld(); |
|
|
|
db_mutex_->AssertHeld(); |
|
|
|
if (!end_recovery_) { |
|
|
|
if (!end_recovery_) { |
|
|
|