From 29e8f6a698acf0d00d88f21a2692fcdd974a74e5 Mon Sep 17 00:00:00 2001 From: Zhichao Cao Date: Wed, 2 Dec 2020 18:22:05 -0800 Subject: [PATCH] Add kManifestWriteNoWAL to BackgroundErrorReason to handle Flush IO Error when WAL is disabled (#7693) Summary: In the current code base, all the manifest writes with IO error will be set with reason: BackgroundErrorReason::kManifestWrite, which will be mapped to the kHardError if the IO Error is retryable. However, if the system does not use the WAL, all the retryable IO error should be mapped to kSoftError. Create this PR to handle is special case by adding kManifestWriteNoWAL to BackgroundErrorReason. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7693 Test Plan: make check, add new testing cases to error_handler_fs_test Reviewed By: anand1976 Differential Revision: D25066204 Pulled By: zhichao-cao fbshipit-source-id: d59553896c2eac3fb37c05238544d2b265379462 --- db/db_impl/db_impl_compaction_flush.cc | 24 ++++-- db/error_handler.cc | 45 ++++++++++- db/error_handler_fs_test.cc | 100 +++++++++++++++++++++++++ include/rocksdb/listener.h | 5 ++ 4 files changed, 167 insertions(+), 7 deletions(-) diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index a36d81b97..be8018acf 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -254,8 +254,15 @@ Status DBImpl::FlushMemTableToOutputFile( // TODO: distinguish between MANIFEST write and CURRENT renaming if (!versions_->io_status().ok()) { // Should handle return error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) - .PermitUncheckedError(); + if (total_log_size_ > 0) { + // If the WAL is empty, we use different error reason + error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) + .PermitUncheckedError(); + } else { + error_handler_ + .SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL) + .PermitUncheckedError(); + } } else if (total_log_size_ > 0) { // Should handle return error? error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) @@ -656,9 +663,16 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming if (!versions_->io_status().ok()) { - // Should Handle this error? - error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) - .PermitUncheckedError(); + // Should handle return error? + if (total_log_size_ > 0) { + // If the WAL is empty, we use different error reason + error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite) + .PermitUncheckedError(); + } else { + error_handler_ + .SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL) + .PermitUncheckedError(); + } } else if (total_log_size_ > 0) { // Should Handle this error? error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush) diff --git a/db/error_handler.cc b/db/error_handler.cc index 7aa4aa826..26b4b909d 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -111,6 +111,23 @@ std::map, Status::Code::kIOError, Status::SubCode::kIOFenced, false), Status::Severity::kFatalError}, + // Errors during MANIFEST write when WAL is disabled + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, Status::SubCode::kNoSpace, + true), + Status::Severity::kHardError}, + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, Status::SubCode::kNoSpace, + false), + Status::Severity::kHardError}, + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, Status::SubCode::kIOFenced, + true), + Status::Severity::kFatalError}, + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, Status::SubCode::kIOFenced, + false), + Status::Severity::kFatalError}, }; @@ -175,6 +192,12 @@ std::map, {std::make_tuple(BackgroundErrorReason::kFlushNoWAL, Status::Code::kIOError, false), Status::Severity::kNoError}, + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, true), + Status::Severity::kFatalError}, + {std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL, + Status::Code::kIOError, false), + Status::Severity::kFatalError}, }; std::map, Status::Severity> @@ -324,6 +347,22 @@ Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reas return bg_error_; } +// This is the main function for looking at IO related error during the +// background operations. The main logic is: +// 1) if the error is caused by data loss, the error is mapped to +// unrecoverable error. Application/user must take action to handle +// this situation. +// 2) if the error is a Retryable IO error, auto resume will be called and the +// auto resume can be controlled by resume count and resume interval +// options. There are three sub-cases: +// a) if the error happens during compaction, it is mapped to a soft error. +// the compaction thread will reschedule a new compaction. +// b) if the error happens during flush and also WAL is empty, it is mapped +// to a soft error. Note that, it includes the case that IO error happens +// in SST or manifest write during flush. +// c) all other errors are mapped to hard error. +// 3) for other cases, SetBGError(const Status& bg_err, BackgroundErrorReason +// reason) will be called to handle other error cases. Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, BackgroundErrorReason reason) { db_mutex_->AssertHeld(); @@ -336,7 +375,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, if (recovery_in_prog_ && recovery_io_error_.ok()) { recovery_io_error_ = bg_io_err; } - if (BackgroundErrorReason::kManifestWrite == reason) { + if (BackgroundErrorReason::kManifestWrite == reason || + BackgroundErrorReason::kManifestWriteNoWAL == reason) { // Always returns ok db_->DisableFileDeletionsWithLock().PermitUncheckedError(); } @@ -374,7 +414,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, } recover_context_ = context; return bg_error_; - } else if (BackgroundErrorReason::kFlushNoWAL == reason) { + } else if (BackgroundErrorReason::kFlushNoWAL == reason || + BackgroundErrorReason::kManifestWriteNoWAL == reason) { // When the BG Retryable IO error reason is flush without WAL, // We map it to a soft error. At the same time, all the background work // should be stopped except the BG work from recovery. Therefore, we diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 34c2f4cbb..c17cac290 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -453,6 +453,51 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { Close(); } +TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { + std::shared_ptr listener( + new ErrorHandlerFSListener()); + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + options.listeners.emplace_back(listener); + options.max_bgerror_resume_count = 0; + Status s; + std::string old_manifest; + std::string new_manifest; + + listener->EnableAutoRecovery(false); + DestroyAndReopen(options); + old_manifest = GetManifestNameFromLiveFiles(); + + IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); + error_msg.SetRetryable(true); + + WriteOptions wo = WriteOptions(); + wo.disableWAL = true; + ASSERT_OK(Put(Key(0), "val", wo)); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(1), "val", wo)); + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:WriteManifest", + [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); + SyncPoint::GetInstance()->EnableProcessing(); + s = Flush(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + fault_fs_->SetFilesystemActive(true); + s = dbfull()->Resume(); + ASSERT_EQ(s, Status::OK()); + + new_manifest = GetManifestNameFromLiveFiles(); + ASSERT_NE(new_manifest, old_manifest); + + Reopen(options); + ASSERT_EQ("val", Get(Key(0))); + ASSERT_EQ("val", Get(Key(1))); + Close(); +} + TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -1789,6 +1834,61 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { Close(); } +TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableErrorAutoRecover) { + // Fail the first resume and let the second resume be successful + std::shared_ptr listener( + new ErrorHandlerFSListener()); + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + options.listeners.emplace_back(listener); + options.max_bgerror_resume_count = 2; + options.bgerror_resume_retry_interval = 100000; // 0.1 second + Status s; + std::string old_manifest; + std::string new_manifest; + + listener->EnableAutoRecovery(false); + DestroyAndReopen(options); + old_manifest = GetManifestNameFromLiveFiles(); + + IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); + error_msg.SetRetryable(true); + + WriteOptions wo = WriteOptions(); + wo.disableWAL = true; + ASSERT_OK(Put(Key(0), "val", wo)); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(1), "val", wo)); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"RecoverFromRetryableBGIOError:BeforeStart", + "ManifestWriteNoWALRetryableErrorAutoRecover:0"}, + {"ManifestWriteNoWALRetryableErrorAutoRecover:1", + "RecoverFromRetryableBGIOError:BeforeWait1"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", + "ManifestWriteNoWALRetryableErrorAutoRecover:2"}}); + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:WriteManifest", + [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); + SyncPoint::GetInstance()->EnableProcessing(); + s = Flush(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); + TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:0"); + fault_fs_->SetFilesystemActive(true); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); + TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:1"); + TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:2"); + SyncPoint::GetInstance()->DisableProcessing(); + + new_manifest = GetManifestNameFromLiveFiles(); + ASSERT_NE(new_manifest, old_manifest); + + Reopen(options); + ASSERT_EQ("val", Get(Key(0))); + ASSERT_EQ("val", Get(Key(1))); + Close(); +} + TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableErrorAutoRecover) { std::shared_ptr listener( diff --git a/include/rocksdb/listener.h b/include/rocksdb/listener.h index e90a8707a..ca1766195 100644 --- a/include/rocksdb/listener.h +++ b/include/rocksdb/listener.h @@ -120,6 +120,10 @@ enum class FlushReason : int { kErrorRecoveryRetryFlush = 0xc, }; +// TODO: In the future, BackgroundErrorReason will only be used to indicate +// why the BG Error is happening (e.g., flush, compaction). We may introduce +// other data structure to indicate other essential information such as +// the file type (e.g., Manifest, SST) and special context. enum class BackgroundErrorReason { kFlush, kCompaction, @@ -127,6 +131,7 @@ enum class BackgroundErrorReason { kMemTable, kManifestWrite, kFlushNoWAL, + kManifestWriteNoWAL, }; enum class WriteStallCondition {