|
|
|
@ -1827,56 +1827,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { |
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover1) { |
|
|
|
|
// Fail the first resume and make the second resume successful
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
|
Options options = GetDefaultOptions(); |
|
|
|
|
options.env = fault_env_.get(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.listeners.emplace_back(listener); |
|
|
|
|
options.max_bgerror_resume_count = 2; |
|
|
|
|
options.bgerror_resume_retry_interval = 100000; // 0.1 second
|
|
|
|
|
Status s; |
|
|
|
|
|
|
|
|
|
listener->EnableAutoRecovery(false); |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
|
|
|
|
|
IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); |
|
|
|
|
error_msg.SetRetryable(true); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "val1")); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( |
|
|
|
|
{{"RecoverFromRetryableBGIOError:BeforeWait0", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover1:0"}, |
|
|
|
|
{"FLushWritRetryableeErrorAutoRecover1:1", |
|
|
|
|
"RecoverFromRetryableBGIOError:BeforeWait1"}, |
|
|
|
|
{"RecoverFromRetryableBGIOError:RecoverSuccess", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover1:2"}}); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"BuildTable:BeforeFinishBuildTable", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); |
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing(); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:0"); |
|
|
|
|
fault_fs_->SetFilesystemActive(true); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:1"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:2"); |
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
|
|
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
ASSERT_OK(Put(Key(2), "val2")); |
|
|
|
|
ASSERT_OK(Flush()); |
|
|
|
|
ASSERT_EQ("val2", Get(Key(2))); |
|
|
|
|
|
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { |
|
|
|
|
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { |
|
|
|
|
// Activate the FS before the first resume
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
@ -1916,7 +1867,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { |
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { |
|
|
|
|
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { |
|
|
|
|
// Fail all the resume and let user to resume
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
@ -1936,18 +1887,18 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "val1")); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( |
|
|
|
|
{{"FLushWritRetryableeErrorAutoRecover3:0", |
|
|
|
|
{{"FLushWritRetryableeErrorAutoRecover2:0", |
|
|
|
|
"RecoverFromRetryableBGIOError:BeforeStart"}, |
|
|
|
|
{"RecoverFromRetryableBGIOError:LoopOut", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover3:1"}}); |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover2:1"}}); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"BuildTable:BeforeFinishBuildTable", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); |
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing(); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:0"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:1"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:0"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:1"); |
|
|
|
|
fault_fs_->SetFilesystemActive(true); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); |
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
@ -1965,173 +1916,6 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { |
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover4) { |
|
|
|
|
// Fail the first resume and does not do resume second time because
|
|
|
|
|
// the IO error severity is Fatal Error and not Retryable.
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
|
Options options = GetDefaultOptions(); |
|
|
|
|
options.env = fault_env_.get(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.listeners.emplace_back(listener); |
|
|
|
|
options.max_bgerror_resume_count = 2; |
|
|
|
|
options.bgerror_resume_retry_interval = 10; // 0.1 second
|
|
|
|
|
Status s; |
|
|
|
|
|
|
|
|
|
listener->EnableAutoRecovery(false); |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
|
|
|
|
|
IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); |
|
|
|
|
error_msg.SetRetryable(true); |
|
|
|
|
IOStatus nr_msg = IOStatus::IOError("No Retryable Fatal IO Error"); |
|
|
|
|
nr_msg.SetRetryable(false); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "val1")); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( |
|
|
|
|
{{"RecoverFromRetryableBGIOError:BeforeStart", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover4:0"}, |
|
|
|
|
{"FLushWritRetryableeErrorAutoRecover4:2", |
|
|
|
|
"RecoverFromRetryableBGIOError:RecoverFail0"}}); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"BuildTable:BeforeFinishBuildTable", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"RecoverFromRetryableBGIOError:BeforeResume1", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, nr_msg); }); |
|
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing(); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:0"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:2"); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); |
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
|
fault_fs_->SetFilesystemActive(true); |
|
|
|
|
// Even the FS is recoverd, due to the Fatal Error in bg_error_ the resume
|
|
|
|
|
// and flush will all fail.
|
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
ASSERT_NOK(dbfull()->Resume()); |
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
ASSERT_OK(Put(Key(2), "val2")); |
|
|
|
|
ASSERT_NOK(Flush()); |
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(Key(2))); |
|
|
|
|
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
ASSERT_OK(Put(Key(2), "val2")); |
|
|
|
|
ASSERT_OK(Flush()); |
|
|
|
|
ASSERT_EQ("val2", Get(Key(2))); |
|
|
|
|
|
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover5) { |
|
|
|
|
// During the resume, call DB->CLose, make sure the resume thread exist
|
|
|
|
|
// before close continues. Due to the shutdown, the resume is not successful
|
|
|
|
|
// and the FS does not become active, so close status is still IO error
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
|
Options options = GetDefaultOptions(); |
|
|
|
|
options.env = fault_env_.get(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.listeners.emplace_back(listener); |
|
|
|
|
options.max_bgerror_resume_count = 2; |
|
|
|
|
options.bgerror_resume_retry_interval = 10; // 0.1 second
|
|
|
|
|
Status s; |
|
|
|
|
|
|
|
|
|
listener->EnableAutoRecovery(false); |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
|
|
|
|
|
IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); |
|
|
|
|
error_msg.SetRetryable(true); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "val1")); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( |
|
|
|
|
{{"RecoverFromRetryableBGIOError:BeforeStart", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover5:0"}}); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"BuildTable:BeforeFinishBuildTable", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); |
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing(); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover5:0"); |
|
|
|
|
// The first resume will cause recovery_error and its severity is the
|
|
|
|
|
// Fatal error
|
|
|
|
|
s = dbfull()->Close(); |
|
|
|
|
ASSERT_NOK(s); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); |
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
|
fault_fs_->SetFilesystemActive(true); |
|
|
|
|
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_NE("val1", Get(Key(1))); |
|
|
|
|
ASSERT_OK(Put(Key(2), "val2")); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_OK(s); |
|
|
|
|
ASSERT_EQ("val2", Get(Key(2))); |
|
|
|
|
|
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover6) { |
|
|
|
|
// During the resume, call DB->CLose, make sure the resume thread exist
|
|
|
|
|
// before close continues. Due to the shutdown, the resume is not successful
|
|
|
|
|
// and the FS does not become active, so close status is still IO error
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|
new ErrorHandlerFSListener()); |
|
|
|
|
Options options = GetDefaultOptions(); |
|
|
|
|
options.env = fault_env_.get(); |
|
|
|
|
options.create_if_missing = true; |
|
|
|
|
options.listeners.emplace_back(listener); |
|
|
|
|
options.max_bgerror_resume_count = 2; |
|
|
|
|
options.bgerror_resume_retry_interval = 10; // 0.1 second
|
|
|
|
|
Status s; |
|
|
|
|
|
|
|
|
|
listener->EnableAutoRecovery(false); |
|
|
|
|
DestroyAndReopen(options); |
|
|
|
|
|
|
|
|
|
IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); |
|
|
|
|
error_msg.SetRetryable(true); |
|
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "val1")); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( |
|
|
|
|
{{"FLushWritRetryableeErrorAutoRecover6:0", |
|
|
|
|
"RecoverFromRetryableBGIOError:BeforeStart"}, |
|
|
|
|
{"RecoverFromRetryableBGIOError:BeforeWait0", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover6:1"}, |
|
|
|
|
{"FLushWritRetryableeErrorAutoRecover6:2", |
|
|
|
|
"RecoverFromRetryableBGIOError:BeforeWait1"}, |
|
|
|
|
{"RecoverFromRetryableBGIOError:AfterWait0", |
|
|
|
|
"FLushWritRetryableeErrorAutoRecover6:3"}}); |
|
|
|
|
SyncPoint::GetInstance()->SetCallBack( |
|
|
|
|
"BuildTable:BeforeFinishBuildTable", |
|
|
|
|
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); |
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing(); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:0"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:1"); |
|
|
|
|
fault_fs_->SetFilesystemActive(true); |
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:2"); |
|
|
|
|
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:3"); |
|
|
|
|
// The first resume will cause recovery_error and its severity is the
|
|
|
|
|
// Fatal error
|
|
|
|
|
s = dbfull()->Close(); |
|
|
|
|
ASSERT_OK(s); |
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing(); |
|
|
|
|
|
|
|
|
|
Reopen(options); |
|
|
|
|
ASSERT_EQ("val1", Get(Key(1))); |
|
|
|
|
ASSERT_OK(Put(Key(2), "val2")); |
|
|
|
|
s = Flush(); |
|
|
|
|
ASSERT_OK(s); |
|
|
|
|
ASSERT_EQ("val2", Get(Key(2))); |
|
|
|
|
|
|
|
|
|
Destroy(options); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { |
|
|
|
|
// Fail the first resume and let the second resume be successful
|
|
|
|
|
std::shared_ptr<ErrorHandlerFSListener> listener( |
|
|
|
|