diff --git a/HISTORY.md b/HISTORY.md index db3bb9f3d..9ebbb1a66 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased +### Behavior Changes +* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. ## 6.17.0 (01/15/2021) ### Behavior Changes diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 71ef9505e..98d95927a 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -951,6 +951,9 @@ class DBImpl : public DB { // is only for the special test of CancelledCompactions Status TEST_WaitForCompact(bool waitUnscheduled = false); + // Get the background error status + Status TEST_GetBGError(); + // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. int64_t TEST_MaxNextLevelOverlappingBytes( diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index 44108d52e..e590607c6 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -177,6 +177,11 @@ Status DBImpl::TEST_WaitForCompact(bool wait_unscheduled) { return error_handler_.GetBGError(); } +Status DBImpl::TEST_GetBGError() { + InstrumentedMutexLock l(&mutex_); + return error_handler_.GetBGError(); +} + void DBImpl::TEST_LockMutex() { mutex_.Lock(); } void DBImpl::TEST_UnlockMutex() { mutex_.Unlock(); } diff --git a/db/error_handler.cc b/db/error_handler.cc index 221ca6d1a..f8d878e7c 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -417,11 +417,11 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, &new_bg_io_err, db_mutex_, &auto_recovery); if (BackgroundErrorReason::kCompaction == reason) { - Status bg_err(new_bg_io_err, Status::Severity::kSoftError); - if (bg_err.severity() > bg_error_.severity()) { - bg_error_ = bg_err; - } - recover_context_ = context; + // We map the retryable IO error during compaction to soft error. Since + // compaction can reschedule by itself. We will not set the BG error in + // this case + // TODO: a better way to set or clean the retryable IO error which + // happens during compaction SST file write. return bg_error_; } else if (BackgroundErrorReason::kFlushNoWAL == reason || BackgroundErrorReason::kManifestWriteNoWAL == reason) { diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index f344c7d16..bcb061bfd 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -892,15 +892,17 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableError) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:Finish", + [&](void*) { CancelAllBackgroundWork(dbfull()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(Key(1), "val")); s = Flush(); ASSERT_OK(s); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - + s = dbfull()->TEST_GetBGError(); + ASSERT_OK(s); fault_fs_->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); @@ -940,14 +942,17 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteFileScopeError) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:Finish", + [&](void*) { CancelAllBackgroundWork(dbfull()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(Key(1), "val")); s = Flush(); ASSERT_OK(s); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); + s = dbfull()->TEST_GetBGError(); + ASSERT_OK(s); fault_fs_->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); @@ -2198,8 +2203,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableErrorAutoRecover) { ASSERT_OK(s); s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - + ASSERT_OK(s); TEST_SYNC_POINT("CompactionWriteRetryableErrorAutoRecover0"); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing();