From 9e7b7e2c08a035805de264a57ad309f47b63aaed Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 24 Apr 2020 13:03:08 -0700 Subject: [PATCH] Silence false alarms in db_stress fault injection (#6741) Summary: False alarms are caused by codepaths that intentionally swallow IO errors. Tests: make crash_test Pull Request resolved: https://github.com/facebook/rocksdb/pull/6741 Reviewed By: ltamasi Differential Revision: D21181138 Pulled By: anand1976 fbshipit-source-id: 5ccfbc68eb192033488de6269e59c00f2c65ce00 --- db_stress_tool/db_stress_shared_state.cc | 6 +++--- db_stress_tool/db_stress_shared_state.h | 14 +++++++------- db_stress_tool/no_batched_ops_stress.cc | 8 ++++---- file/file_prefetch_buffer.cc | 11 ++++++++++- table/block_based/block_based_filter_block.cc | 2 ++ table/block_based/block_based_table_reader.cc | 10 +++++----- table/block_based/full_filter_block.cc | 6 ++++-- table/block_based/partitioned_filter_block.cc | 19 ++++++------------- table/block_based/partitioned_index_reader.cc | 4 ++++ test_util/fault_injection_test_fs.cc | 10 ++++++---- test_util/fault_injection_test_fs.h | 8 ++++++++ test_util/testharness.h | 15 ++++++++++++++- 12 files changed, 73 insertions(+), 40 deletions(-) diff --git a/db_stress_tool/db_stress_shared_state.cc b/db_stress_tool/db_stress_shared_state.cc index d64a4edd4..bc1363504 100644 --- a/db_stress_tool/db_stress_shared_state.cc +++ b/db_stress_tool/db_stress_shared_state.cc @@ -16,12 +16,12 @@ const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe; const uint32_t SharedState::DELETION_SENTINEL = 0xffffffff; #if defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #if defined(OS_SOLARIS) -__thread bool SharedState::filter_read_error; +__thread bool SharedState::ignore_read_error; #else -thread_local bool SharedState::filter_read_error; +thread_local bool SharedState::ignore_read_error; #endif // OS_SOLARIS #else -bool SharedState::filter_read_error; +bool SharedState::ignore_read_error; #endif // ROCKSDB_SUPPORT_THREAD_LOCAL } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index e7a3853eb..86310f82f 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -48,12 +48,12 @@ class SharedState { // for those calls #if defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #if defined(OS_SOLARIS) - static __thread bool filter_read_error; + static __thread bool ignore_read_error; #else - static thread_local bool filter_read_error; + static thread_local bool ignore_read_error; #endif // OS_SOLARIS #else - static bool filter_read_error; + static bool ignore_read_error; #endif // ROCKSDB_SUPPORT_THREAD_LOCAL SharedState(Env* env, StressTest* stress_test) @@ -192,8 +192,8 @@ class SharedState { } #ifndef NDEBUG if (FLAGS_read_fault_one_in) { - SyncPoint::GetInstance()->SetCallBack("FilterReadError", - FilterReadErrorCallback); + SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError", + IgnoreReadErrorCallback); SyncPoint::GetInstance()->EnableProcessing(); } #endif // NDEBUG @@ -362,8 +362,8 @@ class SharedState { } private: - static void FilterReadErrorCallback(void*) { - filter_read_error = true; + static void IgnoreReadErrorCallback(void*) { + ignore_read_error = true; } port::Mutex mu_; diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 4fc8872bf..3dd749141 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -152,7 +152,7 @@ class NonBatchedOpsStressTest : public StressTest { #ifndef NDEBUG if (fault_fs_guard) { fault_fs_guard->EnableErrorInjection(); - SharedState::filter_read_error = false; + SharedState::ignore_read_error = false; } #endif // NDEBUG Status s = db_->Get(read_opts, cfh, key, &from_db); @@ -164,7 +164,7 @@ class NonBatchedOpsStressTest : public StressTest { if (s.ok()) { #ifndef NDEBUG if (fault_fs_guard) { - if (error_count && !SharedState::filter_read_error) { + if (error_count && !SharedState::ignore_read_error) { // Grab mutex so multiple thread don't try to print the // stack trace at the same time MutexLock l(thread->shared->GetMutex()); @@ -272,7 +272,7 @@ class NonBatchedOpsStressTest : public StressTest { #ifndef NDEBUG if (fault_fs_guard) { fault_fs_guard->EnableErrorInjection(); - SharedState::filter_read_error = false; + SharedState::ignore_read_error = false; } #endif // NDEBUG db_->MultiGet(read_opts, cfh, num_keys, keys.data(), values.data(), @@ -291,7 +291,7 @@ class NonBatchedOpsStressTest : public StressTest { } #ifndef NDEBUG - if (fault_fs_guard && error_count && !SharedState::filter_read_error) { + if (fault_fs_guard && error_count && !SharedState::ignore_read_error) { int stat_nok = 0; for (const auto& s : statuses) { if (!s.ok() && !s.IsNotFound()) { diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 098d6beb7..f688a964f 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -17,6 +17,7 @@ #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "test_util/sync_point.h" +#include "test_util/testharness.h" #include "util/random.h" #include "util/rate_limiter.h" @@ -86,9 +87,17 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, } Slice result; + size_t read_len = static_cast(roundup_len - chunk_len); s = reader->Read(rounddown_offset + chunk_len, - static_cast(roundup_len - chunk_len), &result, + read_len, &result, buffer_.BufferStart() + chunk_len, nullptr, for_compaction); +#ifndef NDEBUG + if (!s.ok() || result.size() < read_len) { + // Fake an IO error to force db_stress fault injection to ignore + // truncated read errors + IGNORE_STATUS_IF_ERROR(Status::IOError()); + } +#endif if (s.ok()) { buffer_offset_ = rounddown_offset; buffer_.Size(static_cast(chunk_len) + result.size()); diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index de3f5cb13..9301d09c3 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -14,6 +14,7 @@ #include "monitoring/perf_context_imp.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" +#include "test_util/testharness.h" #include "util/coding.h" #include "util/string_util.h" @@ -184,6 +185,7 @@ std::unique_ptr BlockBasedFilterBlockReader::Create( use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { + IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index b685a450f..7fca143e7 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -52,6 +52,7 @@ #include "table/sst_file_writer_collectors.h" #include "table/two_level_iterator.h" +#include "test_util/testharness.h" #include "monitoring/perf_context_imp.h" #include "port/lang.h" #include "test_util/sync_point.h" @@ -728,6 +729,7 @@ Status BlockBasedTable::PrefetchTail( nullptr, 0, 0, true /* enable */, true /* track_min_offset */)); s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len); } + return s; } @@ -788,10 +790,12 @@ Status BlockBasedTable::ReadPropertiesBlock( nullptr /* ret_block_handle */, nullptr /* ret_block_contents */, false /* compression_type_missing */, nullptr /* memory_allocator */); } + IGNORE_STATUS_IF_ERROR(s); if (s.IsCorruption()) { s = TryReadPropertiesWithGlobalSeqno(prefetch_buffer, meta_iter->value(), &table_properties); + IGNORE_STATUS_IF_ERROR(s); } std::unique_ptr props_guard; if (table_properties != nullptr) { @@ -890,6 +894,7 @@ Status BlockBasedTable::ReadRangeDelBlock( rep_->ioptions.info_log, "Encountered error while reading data from range del block %s", s.ToString().c_str()); + IGNORE_STATUS_IF_ERROR(s); } else { rep_->fragmented_range_dels = std::make_shared(std::move(iter), @@ -994,11 +999,6 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( auto filter = new_table->CreateFilterBlockReader( prefetch_buffer, use_cache, prefetch_filter, pin_filter, lookup_context); -#ifndef NDEBUG - if (rep_->filter_type != Rep::FilterType::kNoFilter && !filter) { - TEST_SYNC_POINT("FilterReadError"); - } -#endif if (filter) { // Refer to the comment above about paritioned indexes always being cached if (prefetch_all) { diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 93baa70d0..74b6d185b 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -11,6 +11,7 @@ #include "port/port.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" +#include "test_util/testharness.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { @@ -132,6 +133,7 @@ std::unique_ptr FullFilterBlockReader::Create( use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { + IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } @@ -164,7 +166,7 @@ bool FullFilterBlockReader::MayMatch( const Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); if (!s.ok()) { - TEST_SYNC_POINT("FilterReadError"); + IGNORE_STATUS_IF_ERROR(s); return true; } @@ -222,7 +224,7 @@ void FullFilterBlockReader::MayMatch( const Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context, lookup_context, &filter_block); if (!s.ok()) { - TEST_SYNC_POINT("FilterReadError"); + IGNORE_STATUS_IF_ERROR(s); return; } diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 04ce2c439..2b0631377 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -13,6 +13,7 @@ #include "rocksdb/filter_policy.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" +#include "test_util/testharness.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { @@ -143,6 +144,7 @@ std::unique_ptr PartitionedFilterBlockReader::Create( use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { + IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } @@ -254,7 +256,7 @@ bool PartitionedFilterBlockReader::MayMatch( Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); if (UNLIKELY(!s.ok())) { - TEST_SYNC_POINT("FilterReadError"); + IGNORE_STATUS_IF_ERROR(s); return true; } @@ -272,7 +274,7 @@ bool PartitionedFilterBlockReader::MayMatch( no_io, get_context, lookup_context, &filter_partition_block); if (UNLIKELY(!s.ok())) { - TEST_SYNC_POINT("FilterReadError"); + IGNORE_STATUS_IF_ERROR(s); return true; } @@ -312,7 +314,7 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) { "Error retrieving top-level filter block while trying to " "cache filter partitions: %s", s.ToString().c_str()); - TEST_SYNC_POINT("FilterReadError"); + IGNORE_STATUS_IF_ERROR(s); return; } @@ -343,11 +345,6 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) { prefetch_buffer.reset(new FilePrefetchBuffer()); s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, static_cast(prefetch_len)); -#ifndef NDEBUG - if (!s.ok()) { - TEST_SYNC_POINT("FilterReadError"); - } -#endif // After prefetch, read the partitions one by one ReadOptions read_options; @@ -370,11 +367,7 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) { } } } -#ifndef NDEBUG - if (!s.ok()) { - TEST_SYNC_POINT("FilterReadError"); - } -#endif + IGNORE_STATUS_IF_ERROR(s); } } diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index c22ec4364..ba4d3f97b 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/partitioned_index_reader.h" #include "table/block_based/partitioned_index_iterator.h" +#include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { Status PartitionIndexReader::Create( @@ -116,6 +117,7 @@ void PartitionIndexReader::CacheDependencies(bool pin) { "Error retrieving top-level index block while trying to " "cache index partitions: %s", s.ToString().c_str()); + IGNORE_STATUS_IF_ERROR(s); return; } @@ -162,6 +164,8 @@ void PartitionIndexReader::CacheDependencies(bool pin) { &block, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr); + IGNORE_STATUS_IF_ERROR(s); + assert(s.ok() || block.GetValue() == nullptr); if (s.ok() && block.GetValue() != nullptr) { if (block.IsCached()) { diff --git a/test_util/fault_injection_test_fs.cc b/test_util/fault_injection_test_fs.cc index 619731319..0381e5020 100644 --- a/test_util/fault_injection_test_fs.cc +++ b/test_util/fault_injection_test_fs.cc @@ -473,13 +473,14 @@ IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op, switch (op) { case kRead: { - uint32_t type = ctx->rand.Uniform(3); + ErrorType type = + static_cast(ctx->rand.Uniform(ErrorType::kErrorTypeMax)); switch (type) { // Inject IO error - case 0: + case ErrorType::kErrorTypeStatus: return IOStatus::IOError(); // Inject random corruption - case 1: + case ErrorType::kErrorTypeCorruption: { if (result->data() == scratch) { uint64_t offset = ctx->rand.Uniform((uint32_t)result->size()); @@ -496,7 +497,7 @@ IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op, } } // Truncate the result - case 2: + case ErrorType::kErrorTypeTruncated: { assert(result->size() > 0); uint64_t offset = ctx->rand.Uniform((uint32_t)result->size()); @@ -525,6 +526,7 @@ void FaultInjectionTestFS::PrintFaultBacktrace() { if (ctx == nullptr) { return; } + fprintf(stderr, "Injected error type = %d\n", ctx->type); port::PrintAndFreeStack(ctx->callstack, ctx->frames); ctx->callstack = nullptr; #endif diff --git a/test_util/fault_injection_test_fs.h b/test_util/fault_injection_test_fs.h index d761a866e..7a7b1fc36 100644 --- a/test_util/fault_injection_test_fs.h +++ b/test_util/fault_injection_test_fs.h @@ -355,6 +355,13 @@ class FaultInjectionTestFS : public FileSystemWrapper { // to underlying FS for writable files IOStatus error_; + enum ErrorType : int { + kErrorTypeStatus = 0, + kErrorTypeCorruption, + kErrorTypeTruncated, + kErrorTypeMax + }; + struct ErrorContext { Random rand; int one_in; @@ -362,6 +369,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { bool enable_error_injection; void* callstack; int frames; + ErrorType type; explicit ErrorContext(uint32_t seed) : rand(seed), diff --git a/test_util/testharness.h b/test_util/testharness.h index 11f40ff83..acaa4b7cc 100644 --- a/test_util/testharness.h +++ b/test_util/testharness.h @@ -42,6 +42,19 @@ int RandomSeed(); #define EXPECT_OK(s) \ EXPECT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s) #define EXPECT_NOK(s) EXPECT_FALSE((s).ok()) - } // namespace test + +// Callback sync point for any read IO errors that should be ignored by +// the fault injection framework +#ifdef NDEBUG +// Disable in release mode +#define IGNORE_STATUS_IF_ERROR(_status_) +#else +#define IGNORE_STATUS_IF_ERROR(_status_) \ +{ \ + if (!_status_.ok()) { \ + TEST_SYNC_POINT("FaultInjectionIgnoreError"); \ + } \ +} +#endif // NDEBUG } // namespace ROCKSDB_NAMESPACE