Change ErrorHandler methods to return const Status& (#7539)

Summary:
This change eliminates the need for a lot of the PermitUncheckedError calls on return from ErrorHandler methods.  The calls are no longer needed as the status is returned as a reference rather than a copy.  Additionally, this means that the originating status (recovery_error_, bg_error_) is not cleared implicitly as a result of calling one of these methods.

For this class, I do not know if the proper behavior should be to call PermitUncheckedError in the destructor or if the checked state should be cleared when the status is cleared.  I did tests both ways.  Without the code in the destructor, the status will need to be cleared in at least some of the places where it is set to OK.  When running tests, I found no instances where this class was destructed with a non-OK, non-checked Status.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7539

Reviewed By: anand1976

Differential Revision: D25340565

Pulled By: pdillinger

fbshipit-source-id: 1730c035c81a475875ea745226112030ec25136c
main
mrambacher 4 years ago committed by Facebook GitHub Bot
parent 8a06fe278f
commit db03172d08
  1. 4
      db/compaction/compaction_job.cc
  2. 3
      db/db_impl/db_impl.cc
  3. 73
      db/db_impl/db_impl_compaction_flush.cc
  4. 2
      db/db_impl/db_impl_debug.cc
  5. 18
      db/db_impl/db_impl_write.cc
  6. 44
      db/error_handler.cc
  7. 20
      db/error_handler.h

@ -1555,9 +1555,7 @@ Status CompactionJob::FinishCompactionOutputFile(
"CompactionJob::FinishCompactionOutputFile:"
"MaxAllowedSpaceReached");
InstrumentedMutexLock l(db_mutex_);
// Should handle return error?
db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction)
.PermitUncheckedError();
db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction);
}
}
#endif

@ -4607,8 +4607,7 @@ Status DBImpl::IngestExternalFiles(
// TODO: distinguish between MANIFEST write and CURRENT renaming
const IOStatus& io_s = versions_->io_status();
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite);
}
// Resume writes to the DB

@ -130,12 +130,10 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
}
if (!io_s.ok()) {
if (total_log_size_ > 0) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
} else {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
return io_s;
@ -192,8 +190,7 @@ Status DBImpl::FlushMemTableToOutputFile(
s = io_s;
if (!io_s.ok() && !io_s.IsShutdownInProgress() &&
!io_s.IsColumnFamilyDropped()) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
}
} else {
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
@ -253,31 +250,23 @@ Status DBImpl::FlushMemTableToOutputFile(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
// Should handle return error?
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_
.SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL)
.PermitUncheckedError();
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0) {
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
} else {
// If the WAL is empty, we use different error reason
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {
Status new_bg_error = s;
// Should handle return error?
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
}
} else {
// If we got here, then we decided not to care about the i_os status (either
@ -302,9 +291,7 @@ Status DBImpl::FlushMemTableToOutputFile(
TEST_SYNC_POINT_CALLBACK(
"DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached",
&new_bg_error);
// Should handle this error?
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
}
}
#endif // ROCKSDB_LITE
@ -645,9 +632,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
error_handler_.GetBGError().ok()) {
Status new_bg_error =
Status::SpaceLimit("Max allowed space was reached");
// Should Handle this error?
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(new_bg_error,
BackgroundErrorReason::kFlush);
}
}
}
@ -664,31 +650,23 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
// Should handle return error?
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_
.SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL)
.PermitUncheckedError();
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0) {
// Should Handle this error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
} else {
// If the WAL is empty, we use different error reason
// Should Handle this error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {
Status new_bg_error = s;
// Should Handle this error?
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush)
.PermitUncheckedError();
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
}
}
@ -1266,11 +1244,9 @@ Status DBImpl::CompactFilesImpl(
job_context->job_id, status.ToString().c_str());
IOStatus io_s = compaction_job.io_status();
if (!io_s.ok()) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction);
} else {
error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction)
.PermitUncheckedError();
error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
}
}
@ -3121,10 +3097,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
auto err_reason = versions_->io_status().ok()
? BackgroundErrorReason::kCompaction
: BackgroundErrorReason::kManifestWrite;
error_handler_.SetBGError(io_s, err_reason).PermitUncheckedError();
error_handler_.SetBGError(io_s, err_reason);
} else {
error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction)
.PermitUncheckedError();
error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction);
}
if (c != nullptr && !is_manual && !error_handler_.IsBGWorkStopped()) {
// Put this cfd back in the compaction queue so we can retry after some

@ -170,7 +170,7 @@ Status DBImpl::TEST_WaitForCompact(bool wait_unscheduled) {
while ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
bg_flush_scheduled_ ||
(wait_unscheduled && unscheduled_compactions_)) &&
(error_handler_.GetBGError() == Status::OK())) {
(error_handler_.GetBGError().ok())) {
bg_cv_.Wait();
}
return error_handler_.GetBGError();

@ -850,8 +850,7 @@ void DBImpl::WriteStatusCheckOnLocked(const Status& status) {
if (immutable_db_options_.paranoid_checks && !status.ok() &&
!status.IsBusy() && !status.IsIncomplete()) {
// Maybe change the return status to void?
error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback)
.PermitUncheckedError();
error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
}
}
@ -863,8 +862,7 @@ void DBImpl::WriteStatusCheck(const Status& status) {
!status.IsBusy() && !status.IsIncomplete()) {
mutex_.Lock();
// Maybe change the return status to void?
error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback)
.PermitUncheckedError();
error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
mutex_.Unlock();
}
}
@ -877,8 +875,7 @@ void DBImpl::IOStatusCheck(const IOStatus& io_status) {
io_status.IsIOFenced()) {
mutex_.Lock();
// Maybe change the return status to void?
error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback)
.PermitUncheckedError();
error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback);
mutex_.Unlock();
}
}
@ -1810,15 +1807,10 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
}
// We may have lost data from the WritableFileBuffer in-memory buffer for
// the current log, so treat it as a fatal error and set bg_error
// Should handle return error?
if (!io_s.ok()) {
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable)
.PermitUncheckedError();
error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable);
} else {
// Should handle return error?
error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable)
.PermitUncheckedError();
error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable);
}
// Read back bg_error in order to get the right severity
s = error_handler_.GetBGError();

@ -267,10 +267,11 @@ void ErrorHandler::CancelErrorRecovery() {
// This can also get called as part of a recovery operation. In that case, we
// also track the error separately in recovery_error_ so we can tell in the
// end whether recovery succeeded or not
Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) {
const Status& ErrorHandler::SetBGError(const Status& bg_err,
BackgroundErrorReason reason) {
db_mutex_->AssertHeld();
if (bg_err.ok()) {
return Status::OK();
return bg_err;
}
bool paranoid = db_options_.paranoid_checks;
@ -363,11 +364,11 @@ Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reas
// c) all other errors are mapped to hard error.
// 3) for other cases, SetBGError(const Status& bg_err, BackgroundErrorReason
// reason) will be called to handle other error cases.
Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
BackgroundErrorReason reason) {
db_mutex_->AssertHeld();
if (bg_io_err.ok()) {
return Status::OK();
return bg_io_err;
}
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
bg_io_err.ToString().c_str());
@ -382,7 +383,6 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
}
Status new_bg_io_err = bg_io_err;
Status s;
DBRecoverContext context;
if (bg_io_err.GetDataLoss()) {
// First, data loss is treated as unrecoverable error. So it can directly
@ -393,8 +393,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
if (recovery_in_prog_ && recovery_error_.ok()) {
recovery_error_ = bg_err;
}
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
db_mutex_, &auto_recovery);
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
&bg_err, db_mutex_, &auto_recovery);
recover_context_ = context;
return bg_error_;
} else if (bg_io_err.GetRetryable()) {
@ -405,8 +405,9 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
// soft error. In other cases, treat the retryable IO error as hard
// error.
bool auto_recovery = false;
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
db_mutex_, &auto_recovery);
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
&new_bg_io_err, db_mutex_,
&auto_recovery);
if (BackgroundErrorReason::kCompaction == reason) {
Status bg_err(new_bg_io_err, Status::Severity::kSoftError);
if (bg_err.severity() > bg_error_.severity()) {
@ -446,12 +447,11 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
return StartRecoverFromRetryableBGIOError(bg_io_err);
}
} else {
s = SetBGError(new_bg_io_err, reason);
return SetBGError(new_bg_io_err, reason);
}
return s;
}
Status ErrorHandler::OverrideNoSpaceError(Status bg_error,
Status ErrorHandler::OverrideNoSpaceError(const Status& bg_error,
bool* auto_recovery) {
#ifndef ROCKSDB_LITE
if (bg_error.severity() >= Status::Severity::kFatalError) {
@ -507,7 +507,11 @@ Status ErrorHandler::ClearBGError() {
// Signal that recovery succeeded
if (recovery_error_.ok()) {
Status old_bg_error = bg_error_;
// Clear and check the recovery IO and BG error
bg_error_ = Status::OK();
recovery_io_error_ = IOStatus::OK();
bg_error_.PermitUncheckedError();
recovery_io_error_.PermitUncheckedError();
recovery_in_prog_ = false;
soft_error_no_bg_work_ = false;
EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
@ -557,6 +561,7 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
// during the recovery process. While recovering, the only operations that
// can generate background errors should be the flush operations
recovery_error_ = Status::OK();
recovery_error_.PermitUncheckedError();
Status s = db_->ResumeImpl(recover_context_);
if (s.ok()) {
soft_error_no_bg_work_ = false;
@ -578,13 +583,15 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
#endif
}
Status ErrorHandler::StartRecoverFromRetryableBGIOError(IOStatus io_error) {
const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
const IOStatus& io_error) {
#ifndef ROCKSDB_LITE
db_mutex_->AssertHeld();
if (bg_error_.ok() || io_error.ok()) {
return Status::OK();
}
if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
if (bg_error_.ok()) {
return bg_error_;
} else if (io_error.ok()) {
return io_error;
} else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
// Auto resume BG error is not enabled, directly return bg_error_.
return bg_error_;
}
@ -603,7 +610,7 @@ Status ErrorHandler::StartRecoverFromRetryableBGIOError(IOStatus io_error) {
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
if (recovery_io_error_.ok() && recovery_error_.ok()) {
return Status::OK();
return recovery_error_;
} else {
TEST_SYNC_POINT("StartRecoverRetryableBGIOError:RecoverFail");
return bg_error_;
@ -668,6 +675,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess");
Status old_bg_error = bg_error_;
bg_error_ = Status::OK();
bg_error_.PermitUncheckedError();
EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
old_bg_error, db_mutex_);
recovery_in_prog_ = false;

@ -31,17 +31,14 @@ class ErrorHandler {
InstrumentedMutex* db_mutex)
: db_(db),
db_options_(db_options),
bg_error_(Status::OK()),
recovery_error_(Status::OK()),
recovery_io_error_(IOStatus::OK()),
cv_(db_mutex),
end_recovery_(false),
recovery_thread_(nullptr),
db_mutex_(db_mutex),
auto_recovery_(false),
recovery_in_prog_(false),
soft_error_no_bg_work_(false) {}
~ErrorHandler() {
soft_error_no_bg_work_(false) {
// Clear the checked flag for uninitialized errors
bg_error_.PermitUncheckedError();
recovery_error_.PermitUncheckedError();
recovery_io_error_.PermitUncheckedError();
@ -53,13 +50,14 @@ class ErrorHandler {
Status::Code code,
Status::SubCode subcode);
Status SetBGError(const Status& bg_err, BackgroundErrorReason reason);
const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
Status SetBGError(const IOStatus& bg_io_err, BackgroundErrorReason reason);
const Status& SetBGError(const IOStatus& bg_io_err,
BackgroundErrorReason reason);
Status GetBGError() { return bg_error_; }
Status GetBGError() const { return bg_error_; }
Status GetRecoveryError() { return recovery_error_; }
Status GetRecoveryError() const { return recovery_error_; }
Status ClearBGError();
@ -110,9 +108,9 @@ class ErrorHandler {
// Used to store the context for recover, such as flush reason.
DBRecoverContext recover_context_;
Status OverrideNoSpaceError(Status bg_error, bool* auto_recovery);
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
void RecoverFromNoSpace();
Status StartRecoverFromRetryableBGIOError(IOStatus io_error);
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
void RecoverFromRetryableBGIOError();
};

Loading…
Cancel
Save