Stress test to inject read failures in DB reopen (#8476)

Summary:
Inject read failures in DB reopen, just as what we do for metadata writes and writes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8476

Test Plan: Some manual tests and make sure failures are triggered.

Reviewed By: anand1976

Differential Revision: D29507283

fbshipit-source-id: d04da0163973447041038bd87701686a417c4e0c
main
sdong 3 years ago committed by Facebook GitHub Bot
parent 1ae026c400
commit f33611d5e9
  1. 5
      db_stress_tool/db_stress_gflags.cc
  2. 1
      db_stress_tool/db_stress_shared_state.h
  3. 11
      db_stress_tool/db_stress_test_base.cc
  4. 2
      db_stress_tool/db_stress_tool.cc
  5. 5
      tools/db_crashtest.py
  6. 54
      utilities/fault_injection_fs.cc
  7. 32
      utilities/fault_injection_fs.h

@ -825,7 +825,10 @@ DEFINE_string(secondary_cache_uri, "",
"Full URI for creating a customized secondary cache object");
#endif // ROCKSDB_LITE
DEFINE_int32(open_write_fault_one_in, 0,
"On non-zero, enables fault injection on file write "
"On non-zero, enables fault injection on file writes "
"during DB reopen.");
DEFINE_int32(open_read_fault_one_in, 0,
"On non-zero, enables fault injection on file reads "
"during DB reopen.");
DEFINE_int32(injest_error_severity, 1,
"The severity of the injested IO Error. 1 is soft error (e.g. "

@ -32,6 +32,7 @@ DECLARE_int32(read_fault_one_in);
DECLARE_int32(write_fault_one_in);
DECLARE_int32(open_metadata_write_fault_one_in);
DECLARE_int32(open_write_fault_one_in);
DECLARE_int32(open_read_fault_one_in);
DECLARE_int32(injest_error_severity);

@ -2476,13 +2476,15 @@ void StressTest::Open() {
// TODO cover transaction DB is not covered in this fault test too.
bool ingest_meta_error = false;
bool ingest_write_error = false;
bool ingest_read_error = false;
if ((FLAGS_open_metadata_write_fault_one_in ||
FLAGS_open_write_fault_one_in) &&
FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) &&
fault_fs_guard
->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr)
.ok()) {
ingest_meta_error = FLAGS_open_metadata_write_fault_one_in;
ingest_write_error = FLAGS_open_write_fault_one_in;
ingest_read_error = FLAGS_open_read_fault_one_in;
if (ingest_meta_error) {
fault_fs_guard->EnableMetadataWriteErrorInjection();
fault_fs_guard->SetRandomMetadataWriteError(
@ -2496,6 +2498,9 @@ void StressTest::Open() {
IOStatus::IOError("Injected Open Error"),
/*inject_for_all_file_types=*/true, /*types=*/{});
}
if (ingest_read_error) {
fault_fs_guard->SetRandomReadError(FLAGS_open_read_fault_one_in);
}
}
while (true) {
#endif // NDEBUG
@ -2529,10 +2534,11 @@ void StressTest::Open() {
}
#ifndef NDEBUG
if (ingest_meta_error || ingest_write_error) {
if (ingest_meta_error || ingest_write_error || ingest_read_error) {
fault_fs_guard->SetFilesystemDirectWritable(true);
fault_fs_guard->DisableMetadataWriteErrorInjection();
fault_fs_guard->DisableWriteErrorInjection();
fault_fs_guard->SetRandomReadError(0);
if (s.ok()) {
// Ingested errors might happen in background compactions. We
// wait for all compactions to finish to make sure DB is in
@ -2549,6 +2555,7 @@ void StressTest::Open() {
// up.
ingest_meta_error = false;
ingest_write_error = false;
ingest_read_error = false;
Random rand(static_cast<uint32_t>(FLAGS_seed));
if (rand.OneIn(2)) {

@ -87,7 +87,7 @@ int db_stress_tool(int argc, char** argv) {
#ifndef NDEBUG
if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in ||
FLAGS_open_write_fault_one_in) {
FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) {
FaultInjectionTestFS* fs =
new FaultInjectionTestFS(raw_env->GetFileSystem());
fault_fs_guard.reset(fs);

@ -139,8 +139,9 @@ default_params = {
"max_key_len": 3,
"key_len_percent_dist": "1,30,69",
"read_fault_one_in": lambda: random.choice([0, 1000]),
"open_metadata_write_fault_one_in": lambda: random.choice([0, 8]),
"open_write_fault_one_in": lambda: random.choice([0, 16]),
"open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
"open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
"open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
"sync_fault_injection": False,
"get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),

@ -324,8 +324,12 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n,
}
IOStatus s = target_->Read(offset, n, options, result, scratch, dbg);
if (s.ok()) {
s = fs_->InjectError(FaultInjectionTestFS::ErrorOperation::kRead, result,
use_direct_io(), scratch);
s = fs_->InjectThreadSpecificReadError(
FaultInjectionTestFS::ErrorOperation::kRead, result, use_direct_io(),
scratch);
}
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
return IOStatus::IOError("Injected read error");
}
return s;
}
@ -337,6 +341,27 @@ size_t TestFSRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
return target_->GetUniqueId(id, max_size);
}
}
IOStatus TestFSSequentialFile::Read(size_t n, const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) {
IOStatus s = target()->Read(n, options, result, scratch, dbg);
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
return IOStatus::IOError("Injected seq read error");
}
return s;
}
IOStatus TestFSSequentialFile::PositionedRead(uint64_t offset, size_t n,
const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) {
IOStatus s =
target()->PositionedRead(offset, n, options, result, scratch, dbg);
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
return IOStatus::IOError("Injected seq positioned read error");
}
return s;
}
IOStatus FaultInjectionTestFS::NewDirectory(
const std::string& name, const IOOptions& options,
@ -474,7 +499,11 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile(
if (!IsFilesystemActive()) {
return GetError();
}
IOStatus io_s = InjectError(ErrorOperation::kOpen, nullptr, false, nullptr);
if (ShouldInjectRandomReadError()) {
return IOStatus::IOError("Injected error when open random access file");
}
IOStatus io_s = InjectThreadSpecificReadError(ErrorOperation::kOpen, nullptr,
false, nullptr);
if (io_s.ok()) {
io_s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
}
@ -484,6 +513,23 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile(
return io_s;
}
IOStatus FaultInjectionTestFS::NewSequentialFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
if (!IsFilesystemActive()) {
return GetError();
}
if (ShouldInjectRandomReadError()) {
return IOStatus::IOError("Injected read error when creating seq file");
}
IOStatus io_s = target()->NewSequentialFile(fname, file_opts, result, dbg);
if (io_s.ok()) {
result->reset(new TestFSSequentialFile(result->release(), this));
}
return io_s;
}
IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f,
const IOOptions& options,
IODebugContext* dbg) {
@ -642,7 +688,7 @@ void FaultInjectionTestFS::UntrackFile(const std::string& f) {
open_files_.erase(f);
}
IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op,
IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError(ErrorOperation op,
Slice* result,
bool direct_io,
char* scratch) {

@ -150,6 +150,21 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
FaultInjectionTestFS* fs_;
};
class TestFSSequentialFile : public FSSequentialFileWrapper {
public:
explicit TestFSSequentialFile(FSSequentialFile* f, FaultInjectionTestFS* fs)
: FSSequentialFileWrapper(f), target_guard_(f), fs_(fs) {}
IOStatus Read(size_t n, const IOOptions& options, Slice* result,
char* scratch, IODebugContext* dbg) override;
IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) override;
private:
std::unique_ptr<FSSequentialFile> target_guard_;
FaultInjectionTestFS* fs_;
};
class TestFSDirectory : public FSDirectory {
public:
explicit TestFSDirectory(FaultInjectionTestFS* fs, std::string dirname,
@ -178,6 +193,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
write_error_rand_(0),
write_error_one_in_(0),
metadata_write_error_one_in_(0),
read_error_one_in_(0),
ingest_data_corruption_before_write_(false),
fail_get_file_unique_id_(false) {}
virtual ~FaultInjectionTestFS() { error_.PermitUncheckedError(); }
@ -207,6 +223,9 @@ class FaultInjectionTestFS : public FileSystemWrapper {
const FileOptions& file_opts,
std::unique_ptr<FSRandomAccessFile>* result,
IODebugContext* dbg) override;
IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* r,
IODebugContext* dbg) override;
virtual IOStatus DeleteFile(const std::string& f, const IOOptions& options,
IODebugContext* dbg) override;
@ -381,6 +400,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
MutexLock l(&mutex_);
metadata_write_error_one_in_ = one_in;
}
// If the value is not 0, it is enabled. Otherwise, it is disabled.
void SetRandomReadError(int one_in) { read_error_one_in_ = one_in; }
bool ShouldInjectRandomReadError() {
return read_error_one_in() &&
Random::GetTLSInstance()->OneIn(read_error_one_in());
}
// Inject an write error with randomlized parameter and the predefined
// error type. Only the allowed file types will inject the write error
@ -393,7 +419,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
// corruption in the contents of scratch, or truncation of slice
// are the types of error with equal probability. For OPEN,
// its always an IOError.
IOStatus InjectError(ErrorOperation op, Slice* slice,
IOStatus InjectThreadSpecificReadError(ErrorOperation op, Slice* slice,
bool direct_io, char* scratch);
// Get the count of how many times we injected since the previous call
@ -420,7 +446,6 @@ class FaultInjectionTestFS : public FileSystemWrapper {
MutexLock l(&mutex_);
enable_write_error_injection_ = true;
}
void EnableMetadataWriteErrorInjection() {
MutexLock l(&mutex_);
enable_metadata_write_error_injection_ = true;
@ -444,6 +469,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
enable_metadata_write_error_injection_ = false;
}
int read_error_one_in() const { return read_error_one_in_.load(); }
// We capture a backtrace every time a fault is injected, for debugging
// purposes. This call prints the backtrace to stderr and frees the
// saved callstack
@ -494,6 +521,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
Random write_error_rand_;
int write_error_one_in_;
int metadata_write_error_one_in_;
std::atomic<int> read_error_one_in_;
bool inject_for_all_file_types_;
std::vector<FileType> write_error_allowed_types_;
bool ingest_data_corruption_before_write_;

Loading…
Cancel
Save