Allow DB resume after background errors (#3997)
Summary: Currently, if RocksDB encounters errors during a write operation (user requested or BG operations), it sets DBImpl::bg_error_ and fails subsequent writes. This PR allows the DB to be resumed for certain classes of errors. It consists of 3 parts - 1. Introduce Status::Severity in rocksdb::Status to indicate whether a given error can be recovered from or not 2. Refactor the error handling code so that setting bg_error_ and deciding on severity is in one place 3. Provide an API for the user to clear the error and resume the DB instance This whole change is broken up into multiple PRs. Initially, we only allow clearing the error for Status::NoSpace() errors during background flush/compaction. Subsequent PRs will expand this to include more errors and foreground operations such as Put(), and implement a polling mechanism for out-of-space errors. Closes https://github.com/facebook/rocksdb/pull/3997 Differential Revision: D8653831 Pulled By: anand1976 fbshipit-source-id: 6dc835c76122443a7668497c0226b4f072bc6afdmain
parent
26d67e357e
commit
52d4c9b7f6
@ -0,0 +1,170 @@ |
|||||||
|
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
#include "db/error_handler.h" |
||||||
|
#include "db/event_helpers.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// Maps to help decide the severity of an error based on the
|
||||||
|
// BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
|
||||||
|
// is set or not. There are 3 maps, going from most specific to least specific
|
||||||
|
// (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
|
||||||
|
// paranoid_checks). The less specific map serves as a catch all in case we miss
|
||||||
|
// a specific error code or subcode.
|
||||||
|
std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>, |
||||||
|
Status::Severity> |
||||||
|
ErrorSeverityMap = { |
||||||
|
// Errors during BG compaction
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kIOError, Status::SubCode::kNoSpace, |
||||||
|
true), |
||||||
|
Status::Severity::kSoftError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kIOError, Status::SubCode::kNoSpace, |
||||||
|
false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kIOError, Status::SubCode::kSpaceLimit, |
||||||
|
true), |
||||||
|
Status::Severity::kHardError}, |
||||||
|
// Errors during BG flush
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, |
||||||
|
Status::SubCode::kNoSpace, true), |
||||||
|
Status::Severity::kSoftError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, |
||||||
|
Status::SubCode::kNoSpace, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, |
||||||
|
Status::SubCode::kSpaceLimit, true), |
||||||
|
Status::Severity::kHardError}, |
||||||
|
// Errors during Write
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kIOError, Status::SubCode::kNoSpace, |
||||||
|
true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kIOError, Status::SubCode::kNoSpace, |
||||||
|
false), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
}; |
||||||
|
|
||||||
|
std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>, Status::Severity> |
||||||
|
DefaultErrorSeverityMap = { |
||||||
|
// Errors during BG compaction
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kCorruption, true), |
||||||
|
Status::Severity::kUnrecoverableError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kCorruption, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kIOError, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, |
||||||
|
Status::Code::kIOError, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
// Errors during BG flush
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, |
||||||
|
Status::Code::kCorruption, true), |
||||||
|
Status::Severity::kUnrecoverableError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, |
||||||
|
Status::Code::kCorruption, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, |
||||||
|
Status::Code::kIOError, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, |
||||||
|
Status::Code::kIOError, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
// Errors during Write
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kCorruption, true), |
||||||
|
Status::Severity::kUnrecoverableError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kCorruption, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kIOError, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, |
||||||
|
Status::Code::kIOError, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
}; |
||||||
|
|
||||||
|
std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity> |
||||||
|
DefaultReasonMap = { |
||||||
|
// Errors during BG compaction
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kCompaction, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
// Errors during BG flush
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kFlush, false), |
||||||
|
Status::Severity::kNoError}, |
||||||
|
// Errors during Write
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kWriteCallback, false), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
// Errors during Memtable update
|
||||||
|
{std::make_tuple(BackgroundErrorReason::kMemTable, true), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
{std::make_tuple(BackgroundErrorReason::kMemTable, false), |
||||||
|
Status::Severity::kFatalError}, |
||||||
|
}; |
||||||
|
|
||||||
|
Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) { |
||||||
|
db_mutex_->AssertHeld(); |
||||||
|
|
||||||
|
if (bg_err.ok()) { |
||||||
|
return Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
bool paranoid = db_options_.paranoid_checks; |
||||||
|
Status::Severity sev = Status::Severity::kFatalError; |
||||||
|
Status new_bg_err; |
||||||
|
bool found = false; |
||||||
|
|
||||||
|
{ |
||||||
|
auto entry = ErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(), |
||||||
|
bg_err.subcode(), paranoid)); |
||||||
|
if (entry != ErrorSeverityMap.end()) { |
||||||
|
sev = entry->second; |
||||||
|
found = true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (!found) { |
||||||
|
auto entry = DefaultErrorSeverityMap.find(std::make_tuple(reason, |
||||||
|
bg_err.code(), paranoid)); |
||||||
|
if (entry != DefaultErrorSeverityMap.end()) { |
||||||
|
sev = entry->second; |
||||||
|
found = true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (!found) { |
||||||
|
auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid)); |
||||||
|
if (entry != DefaultReasonMap.end()) { |
||||||
|
sev = entry->second; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
new_bg_err = Status(bg_err, sev); |
||||||
|
if (!new_bg_err.ok()) { |
||||||
|
Status s = new_bg_err; |
||||||
|
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, db_mutex_); |
||||||
|
if (!s.ok() && (s.severity() > bg_error_.severity())) { |
||||||
|
bg_error_ = s; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return bg_error_; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,52 @@ |
|||||||
|
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include "monitoring/instrumented_mutex.h" |
||||||
|
#include "options/db_options.h" |
||||||
|
#include "rocksdb/listener.h" |
||||||
|
#include "rocksdb/status.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class ErrorHandler { |
||||||
|
public: |
||||||
|
ErrorHandler(const ImmutableDBOptions& db_options, |
||||||
|
InstrumentedMutex* db_mutex) |
||||||
|
: db_options_(db_options), |
||||||
|
bg_error_(Status::OK()), |
||||||
|
db_mutex_(db_mutex) |
||||||
|
{} |
||||||
|
~ErrorHandler() {} |
||||||
|
|
||||||
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason, |
||||||
|
Status::Code code, Status::SubCode subcode); |
||||||
|
|
||||||
|
Status SetBGError(const Status& bg_err, BackgroundErrorReason reason); |
||||||
|
|
||||||
|
Status GetBGError() |
||||||
|
{ |
||||||
|
return bg_error_; |
||||||
|
} |
||||||
|
|
||||||
|
void ClearBGError() { |
||||||
|
bg_error_ = Status::OK(); |
||||||
|
} |
||||||
|
|
||||||
|
bool IsDBStopped() { |
||||||
|
return !bg_error_.ok(); |
||||||
|
} |
||||||
|
|
||||||
|
bool IsBGWorkStopped() { |
||||||
|
return !bg_error_.ok(); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
const ImmutableDBOptions& db_options_; |
||||||
|
Status bg_error_; |
||||||
|
InstrumentedMutex* db_mutex_; |
||||||
|
}; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,138 @@ |
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
#include "db/db_test_util.h" |
||||||
|
#include "port/stack_trace.h" |
||||||
|
#include "rocksdb/perf_context.h" |
||||||
|
#include "util/fault_injection_test_env.h" |
||||||
|
#if !defined(ROCKSDB_LITE) |
||||||
|
#include "util/sync_point.h" |
||||||
|
#endif |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class DBErrorHandlingTest : public DBTestBase { |
||||||
|
public: |
||||||
|
DBErrorHandlingTest() : DBTestBase("/db_error_handling_test") {} |
||||||
|
}; |
||||||
|
|
||||||
|
class DBErrorHandlingEnv : public EnvWrapper { |
||||||
|
public: |
||||||
|
DBErrorHandlingEnv() : EnvWrapper(Env::Default()), |
||||||
|
trig_no_space(false), trig_io_error(false) {} |
||||||
|
|
||||||
|
void SetTrigNoSpace() {trig_no_space = true;} |
||||||
|
void SetTrigIoError() {trig_io_error = true;} |
||||||
|
private: |
||||||
|
bool trig_no_space; |
||||||
|
bool trig_io_error; |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_F(DBErrorHandlingTest, FLushWriteError) { |
||||||
|
std::unique_ptr<FaultInjectionTestEnv> fault_env( |
||||||
|
new FaultInjectionTestEnv(Env::Default())); |
||||||
|
Options options = GetDefaultOptions(); |
||||||
|
options.create_if_missing = true; |
||||||
|
options.env = fault_env.get(); |
||||||
|
Status s; |
||||||
|
DestroyAndReopen(options); |
||||||
|
|
||||||
|
Put(Key(0), "va;"); |
||||||
|
SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"FlushJob::Start", [&](void *) { |
||||||
|
fault_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); |
||||||
|
}); |
||||||
|
SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
s = Flush(); |
||||||
|
ASSERT_EQ(s.severity(), rocksdb::Status::Severity::kSoftError); |
||||||
|
fault_env->SetFilesystemActive(true); |
||||||
|
s = dbfull()->Resume(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
|
||||||
|
Destroy(options); |
||||||
|
} |
||||||
|
|
||||||
|
TEST_F(DBErrorHandlingTest, CompactionWriteError) { |
||||||
|
std::unique_ptr<FaultInjectionTestEnv> fault_env( |
||||||
|
new FaultInjectionTestEnv(Env::Default())); |
||||||
|
Options options = GetDefaultOptions(); |
||||||
|
options.create_if_missing = true; |
||||||
|
options.level0_file_num_compaction_trigger = 2; |
||||||
|
options.env = fault_env.get(); |
||||||
|
Status s; |
||||||
|
DestroyAndReopen(options); |
||||||
|
|
||||||
|
Put(Key(0), "va;"); |
||||||
|
Put(Key(2), "va;"); |
||||||
|
s = Flush(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency( |
||||||
|
{{"FlushMemTableFinished", "BackgroundCallCompaction:0"}}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"BackgroundCallCompaction:0", [&](void *) { |
||||||
|
fault_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); |
||||||
|
}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
Put(Key(1), "val"); |
||||||
|
s = Flush(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
|
||||||
|
s = dbfull()->TEST_WaitForCompact(); |
||||||
|
ASSERT_EQ(s.severity(), rocksdb::Status::Severity::kSoftError); |
||||||
|
|
||||||
|
fault_env->SetFilesystemActive(true); |
||||||
|
s = dbfull()->Resume(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
Destroy(options); |
||||||
|
} |
||||||
|
|
||||||
|
TEST_F(DBErrorHandlingTest, CorruptionError) { |
||||||
|
std::unique_ptr<FaultInjectionTestEnv> fault_env( |
||||||
|
new FaultInjectionTestEnv(Env::Default())); |
||||||
|
Options options = GetDefaultOptions(); |
||||||
|
options.create_if_missing = true; |
||||||
|
options.level0_file_num_compaction_trigger = 2; |
||||||
|
options.env = fault_env.get(); |
||||||
|
Status s; |
||||||
|
DestroyAndReopen(options); |
||||||
|
|
||||||
|
Put(Key(0), "va;"); |
||||||
|
Put(Key(2), "va;"); |
||||||
|
s = Flush(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency( |
||||||
|
{{"FlushMemTableFinished", "BackgroundCallCompaction:0"}}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack( |
||||||
|
"BackgroundCallCompaction:0", [&](void *) { |
||||||
|
fault_env->SetFilesystemActive(false, Status::Corruption("Corruption")); |
||||||
|
}); |
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
||||||
|
|
||||||
|
Put(Key(1), "val"); |
||||||
|
s = Flush(); |
||||||
|
ASSERT_EQ(s, Status::OK()); |
||||||
|
|
||||||
|
s = dbfull()->TEST_WaitForCompact(); |
||||||
|
ASSERT_EQ(s.severity(), rocksdb::Status::Severity::kUnrecoverableError); |
||||||
|
|
||||||
|
fault_env->SetFilesystemActive(true); |
||||||
|
s = dbfull()->Resume(); |
||||||
|
ASSERT_NE(s, Status::OK()); |
||||||
|
Destroy(options); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
rocksdb::port::InstallStackTraceHandler(); |
||||||
|
::testing::InitGoogleTest(&argc, argv); |
||||||
|
return RUN_ALL_TESTS(); |
||||||
|
} |
Loading…
Reference in new issue