diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index a91c1993e..2dd9dab85 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -349,7 +349,9 @@ class NonBatchedOpsStressTest : public StressTest { // Grab mutex so multiple thread don't try to print the // stack trace at the same time MutexLock l(thread->shared->GetMutex()); - fprintf(stderr, "Didn't get expected error from MultiGet\n"); + fprintf(stderr, "Didn't get expected error from MultiGet. \n"); + fprintf(stderr, "num_keys %zu Expected %d errors, seen %d\n", num_keys, + error_count, stat_nok); fprintf(stderr, "Callstack that injected the fault\n"); fault_fs_guard->PrintFaultBacktrace(); std::terminate(); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 3a96079bd..973198ca8 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -140,7 +140,7 @@ default_params = { "continuous_verification_interval" : 0, "max_key_len": 3, "key_len_percent_dist": "1,30,69", - "read_fault_one_in": lambda: random.choice([0, 1000]), + "read_fault_one_in": lambda: random.choice([0, 32, 1000]), "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]), "open_write_fault_one_in": lambda: random.choice([0, 0, 16]), "open_read_fault_one_in": lambda: random.choice([0, 0, 32]), diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 45399f24f..7aae89479 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -26,6 +26,7 @@ #include "util/coding.h" #include "util/crc32c.h" #include "util/random.h" +#include "util/string_util.h" #include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { @@ -340,7 +341,7 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n, if (s.ok()) { s = fs_->InjectThreadSpecificReadError( FaultInjectionTestFS::ErrorOperation::kRead, result, use_direct_io(), - scratch); + scratch, /*need_count_increase=*/true, /*fault_injected=*/nullptr); } if (s.ok() && fs_->ShouldInjectRandomReadError()) { return IOStatus::IOError("Injected read error"); @@ -355,19 +356,25 @@ IOStatus TestFSRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs, return fs_->GetError(); } IOStatus s = target_->MultiRead(reqs, num_reqs, options, dbg); + bool injected_error = false; for (size_t i = 0; i < num_reqs; i++) { if (!reqs[i].status.ok()) { // Already seeing an error. break; } + bool this_injected_error; reqs[i].status = fs_->InjectThreadSpecificReadError( - FaultInjectionTestFS::ErrorOperation::kRead, &reqs[i].result, - use_direct_io(), reqs[i].scratch); + FaultInjectionTestFS::ErrorOperation::kMultiReadSingleReq, + &(reqs[i].result), use_direct_io(), reqs[i].scratch, + /*need_count_increase=*/true, + /*fault_injected=*/&this_injected_error); + injected_error |= this_injected_error; } if (s.ok()) { s = fs_->InjectThreadSpecificReadError( - FaultInjectionTestFS::ErrorOperation::kRead, nullptr, use_direct_io(), - nullptr); + FaultInjectionTestFS::ErrorOperation::kMultiRead, nullptr, + use_direct_io(), nullptr, /*need_count_increase=*/!injected_error, + /*fault_injected=*/nullptr); } if (s.ok() && fs_->ShouldInjectRandomReadError()) { return IOStatus::IOError("Injected read error"); @@ -550,7 +557,9 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile( return IOStatus::IOError("Injected error when open random access file"); } IOStatus io_s = InjectThreadSpecificReadError(ErrorOperation::kOpen, nullptr, - false, nullptr); + false, nullptr, + /*need_count_increase=*/true, + /*fault_injected=*/nullptr); if (io_s.ok()) { io_s = target()->NewRandomAccessFile(fname, file_opts, result, dbg); } @@ -759,8 +768,11 @@ void FaultInjectionTestFS::UntrackFile(const std::string& f) { } IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( - ErrorOperation /*op*/, Slice* /*result*/, bool /*direct_io*/, - char* /*scratch*/) { + ErrorOperation op, Slice* result, bool direct_io, char* /*scratch*/, + bool need_count_increase, bool* fault_injected) { + bool dummy_bool; + bool& ret_fault_injected = fault_injected ? *fault_injected : dummy_bool; + ret_fault_injected = false; ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in) { @@ -768,12 +780,47 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( } if (ctx->rand.OneIn(ctx->one_in)) { - ctx->count++; + if (ctx->count == 0) { + ctx->message = ""; + } + if (need_count_increase) { + ctx->count++; + } if (ctx->callstack) { free(ctx->callstack); } ctx->callstack = port::SaveStack(&ctx->frames); - return IOStatus::IOError(); + + if (op != ErrorOperation::kMultiReadSingleReq) { + // Likely non-per read status code for MultiRead + ctx->message += "error; "; + ret_fault_injected = true; + return IOStatus::IOError(); + } else if (Random::GetTLSInstance()->OneIn(8)) { + assert(result); + // For a small chance, set the failure to status but turn the + // result to be empty, which is supposed to be caught for a check. + *result = Slice(); + ctx->message += "inject empty result; "; + ret_fault_injected = true; + } else if (!direct_io && Random::GetTLSInstance()->OneIn(7)) { + assert(result); + // With direct I/O, many extra bytes might be read so corrupting + // one byte might not cause checksum mismatch. Skip checksum + // corruption injection. + // For a small chance, set the failure to status but corrupt the + // result in a way that checksum checking is supposed to fail. + // Corrupt the last byte, which is supposed to be a checksum byte + // It would work for CRC. Not 100% sure for xxhash and will adjust + // if it is not the case. + const_cast(result->data())[result->size() - 1]++; + ctx->message += "corrupt last byte; "; + ret_fault_injected = true; + } else { + ctx->message += "error result multiget single; "; + ret_fault_injected = true; + return IOStatus::IOError(); + } } return IOStatus::OK(); } @@ -835,6 +882,7 @@ void FaultInjectionTestFS::PrintFaultBacktrace() { return; } fprintf(stderr, "Injected error type = %d\n", ctx->type); + fprintf(stderr, "Message: %s\n", ctx->message.c_str()); port::PrintAndFreeStack(ctx->callstack, ctx->frames); ctx->callstack = nullptr; #endif diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index 2ed2b5c01..42077aad4 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -370,6 +370,8 @@ class FaultInjectionTestFS : public FileSystemWrapper { // Specify what the operation, so we can inject the right type of error enum ErrorOperation : char { kRead = 0, + kMultiReadSingleReq = 1, + kMultiRead = 2, kOpen, }; @@ -440,8 +442,12 @@ class FaultInjectionTestFS : public FileSystemWrapper { // corruption in the contents of scratch, or truncation of slice // are the types of error with equal probability. For OPEN, // its always an IOError. + // fault_injected returns whether a fault is injected. It is needed + // because some fault is inected with IOStatus to be OK. IOStatus InjectThreadSpecificReadError(ErrorOperation op, Slice* slice, - bool direct_io, char* scratch); + bool direct_io, char* scratch, + bool need_count_increase, + bool* fault_injected); // Get the count of how many times we injected since the previous call int GetAndResetErrorCount() { @@ -525,6 +531,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { int count; bool enable_error_injection; void* callstack; + std::string message; int frames; ErrorType type;