From cde69a7cfd8a11593812ef5a8e835ee963007cd9 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 28 Apr 2021 10:57:11 -0700 Subject: [PATCH] db_stress to add --open_metadata_write_fault_one_in (#8235) Summary: DB Stress to add --open_metadata_write_fault_one_in which would randomly fail in some file metadata modification operations during DB Open, including file creation, close, renaming and directory sync. Some operations can fail before and after the operations take place. If DB open fails, db_stress would retry without the failure ingestion, and DB is expected to open successfully. This option is enabled in crash test in half of the time. Some follow up changes would allow write failures in open time, and ingesting those failures in non-DB open cases. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8235 Test Plan: Run stress tests for a while and see failures got triggered. This can reproduce the bug fixed by https://github.com/facebook/rocksdb/pull/8192 and a similar one that fails when fsyncing parent directory. Reviewed By: anand1976 Differential Revision: D28010944 fbshipit-source-id: 36a96da4dc3633e5f7680cef3ea0a900fcdb5558 --- db_stress_tool/db_stress_env_wrapper.h | 4 +- db_stress_tool/db_stress_gflags.cc | 4 + db_stress_tool/db_stress_shared_state.h | 1 + db_stress_tool/db_stress_test_base.cc | 94 +++++++++++---- db_stress_tool/db_stress_tool.cc | 2 +- tools/db_crashtest.py | 1 + utilities/fault_injection_fs.cc | 153 +++++++++++++++++++----- utilities/fault_injection_fs.h | 23 ++++ 8 files changed, 230 insertions(+), 52 deletions(-) diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h index 484071f10..f517a489b 100644 --- a/db_stress_tool/db_stress_env_wrapper.h +++ b/db_stress_tool/db_stress_env_wrapper.h @@ -28,7 +28,9 @@ class DbStressEnvWrapper : public EnvWrapper { f.find(".restore") != std::string::npos) { return target()->DeleteFile(f); } - return Status::OK(); + // Rename the file instead of deletion to keep the history, and + // at the same time it is not visible to RocksDB. + return target()->RenameFile(f, f + "_renamed_"); } // If true, all manifest files will not be delted in DeleteFile(). diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 873dca59c..6325314d9 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -808,4 +808,8 @@ DEFINE_uint64(user_timestamp_size, 0, "Number of bytes for a user-defined timestamp. Currently, only " "8-byte is supported"); +DEFINE_int32(open_metadata_write_fault_one_in, 0, + "On non-zero, enables fault injection on file metadata write " + "during DB reopen."); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index 03583db7a..03bc0784c 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -30,6 +30,7 @@ DECLARE_int32(compaction_thread_pool_adjust_interval); DECLARE_int32(continuous_verification_interval); DECLARE_int32(read_fault_one_in); DECLARE_int32(write_fault_one_in); +DECLARE_int32(open_metadata_write_fault_one_in); namespace ROCKSDB_NAMESPACE { class StressTest; diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 1df4aa4de..6f8da9ba4 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2104,6 +2104,9 @@ void StressTest::PrintEnv() const { static_cast(FLAGS_level_compaction_dynamic_level_bytes)); fprintf(stdout, "Read fault one in : %d\n", FLAGS_read_fault_one_in); fprintf(stdout, "Write fault one in : %d\n", FLAGS_write_fault_one_in); + fprintf(stdout, "Open metadata write fault one in:\n"); + fprintf(stdout, " %d\n", + FLAGS_open_metadata_write_fault_one_in); fprintf(stdout, "Sync fault injection : %d\n", FLAGS_sync_fault_injection); fprintf(stdout, "Best efforts recovery : %d\n", static_cast(FLAGS_best_efforts_recovery)); @@ -2409,33 +2412,78 @@ void StressTest::Open() { new DbStressListener(FLAGS_db, options_.db_paths, cf_descriptors)); options_.create_missing_column_families = true; if (!FLAGS_use_txn) { +#ifndef NDEBUG + // Determine whether we need to ingest file metadata write failures + // during DB reopen. If it does, enable it. + // Only ingest metadata error if it is reopening, as initial open + // failure doesn't need to be handled. + // TODO cover transaction DB is not covered in this fault test too. + bool ingest_meta_error = + FLAGS_open_metadata_write_fault_one_in && + fault_fs_guard + ->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr) + .ok(); + if (ingest_meta_error) { + fault_fs_guard->EnableMetadataWriteErrorInjection(); + fault_fs_guard->SetRandomMetadataWriteError( + FLAGS_open_metadata_write_fault_one_in); + } + while (true) { +#endif // NDEBUG #ifndef ROCKSDB_LITE - // StackableDB-based BlobDB - if (FLAGS_use_blob_db) { - blob_db::BlobDBOptions blob_db_options; - blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; - blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; - blob_db_options.blob_file_size = FLAGS_blob_db_file_size; - blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; - blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; - - blob_db::BlobDB* blob_db = nullptr; - s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, - cf_descriptors, &column_families_, &blob_db); - if (s.ok()) { - db_ = blob_db; - } - } else + // StackableDB-based BlobDB + if (FLAGS_use_blob_db) { + blob_db::BlobDBOptions blob_db_options; + blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; + blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; + blob_db_options.blob_file_size = FLAGS_blob_db_file_size; + blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; + blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; + + blob_db::BlobDB* blob_db = nullptr; + s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, + cf_descriptors, &column_families_, + &blob_db); + if (s.ok()) { + db_ = blob_db; + } + } else #endif // !ROCKSDB_LITE - { - if (db_preload_finished_.load() && FLAGS_read_only) { - s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, cf_descriptors, - &column_families_, &db_); - } else { - s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, - &column_families_, &db_); + { + if (db_preload_finished_.load() && FLAGS_read_only) { + s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, + cf_descriptors, &column_families_, &db_); + } else { + s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, + &column_families_, &db_); + } + } + +#ifndef NDEBUG + if (ingest_meta_error) { + fault_fs_guard->DisableMetadataWriteErrorInjection(); + if (!s.ok()) { + // After failure to opening a DB due to IO error, retry should + // successfully open the DB with correct data if no IO error shows + // up. + ingest_meta_error = false; + + Random rand(static_cast(FLAGS_seed)); + if (rand.OneIn(2)) { + fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), + nullptr); + } + if (rand.OneIn(3)) { + fault_fs_guard->DropUnsyncedFileData(); + } else if (rand.OneIn(2)) { + fault_fs_guard->DropRandomUnsyncedFileData(&rand); + } + continue; + } } + break; } +#endif // NDEBUG } else { #ifndef ROCKSDB_LITE TransactionDBOptions txn_db_options; diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 04a7bb8cc..e7c36384f 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -98,7 +98,7 @@ int db_stress_tool(int argc, char** argv) { #ifndef NDEBUG if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection || - FLAGS_write_fault_one_in) { + FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index ae37f9706..a9556508d 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -137,6 +137,7 @@ default_params = { "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 1000]), + "open_metadata_write_fault_one_in": lambda: random.choice([0, 8]), "sync_fault_injection": False, "get_property_one_in": 1000000, "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 27509ab45..90c403690 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -87,8 +87,21 @@ IOStatus TestFSDirectory::Fsync(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } fs_->SyncDir(dirname_); - return dir_->Fsync(options, dbg); + IOStatus s = dir_->Fsync(options, dbg); + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } + return s; } TestFSWritableFile::TestFSWritableFile(const std::string& fname, @@ -159,6 +172,12 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options, if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } writable_file_opened_ = false; IOStatus io_s; io_s = target_->Append(state_.buffer_, options, dbg); @@ -170,6 +189,10 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options, } if (io_s.ok()) { fs_->WritableFileClosed(state_); + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } } return io_s; } @@ -294,6 +317,12 @@ IOStatus FaultInjectionTestFS::NewWritableFile( if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } if (IsFilesystemDirectWritable()) { return target()->NewWritableFile(fname, file_opts, result, dbg); } @@ -305,11 +334,19 @@ IOStatus FaultInjectionTestFS::NewWritableFile( // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -323,6 +360,12 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( if (IsFilesystemDirectWritable()) { return target()->ReopenWritableFile(fname, file_opts, result, dbg); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset( @@ -330,11 +373,19 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -348,17 +399,31 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile( if (IsFilesystemDirectWritable()) { return target()->NewRandomRWFile(fname, file_opts, result, dbg); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = target()->NewRandomRWFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset(new TestFSRandomRWFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -385,9 +450,21 @@ IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f, if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = FileSystemWrapper::DeleteFile(f, options, dbg); if (io_s.ok()) { UntrackFile(f); + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -399,21 +476,33 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg); if (io_s.ok()) { - MutexLock l(&mutex_); - if (db_file_state_.find(s) != db_file_state_.end()) { - db_file_state_[t] = db_file_state_[s]; - db_file_state_.erase(s); - } + { + MutexLock l(&mutex_); + if (db_file_state_.find(s) != db_file_state_.end()) { + db_file_state_[t] = db_file_state_[s]; + db_file_state_.erase(s); + } - auto sdn = TestFSGetDirAndName(s); - auto tdn = TestFSGetDirAndName(t); - if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { - auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; - assert(tlist.find(tdn.second) == tlist.end()); - tlist.insert(tdn.second); + auto sdn = TestFSGetDirAndName(s); + auto tdn = TestFSGetDirAndName(t); + if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { + auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; + assert(tlist.find(tdn.second) == tlist.end()); + tlist.insert(tdn.second); + } + } + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; } } @@ -618,6 +707,16 @@ IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) { return IOStatus::OK(); } +IOStatus FaultInjectionTestFS::InjectMetadataWriteError() { + MutexLock l(&mutex_); + if (!enable_metadata_write_error_injection_ || + !metadata_write_error_one_in_ || + !write_error_rand_.OneIn(metadata_write_error_one_in_)) { + return IOStatus::OK(); + } + return IOStatus::IOError(); +} + void FaultInjectionTestFS::PrintFaultBacktrace() { #if defined(OS_LINUX) ErrorContext* ctx = diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index e131224c6..2b46c1f18 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -174,7 +174,10 @@ class FaultInjectionTestFS : public FileSystemWrapper { filesystem_writable_(false), thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)), enable_write_error_injection_(false), + enable_metadata_write_error_injection_(false), write_error_rand_(0), + write_error_one_in_(0), + metadata_write_error_one_in_(0), ingest_data_corruption_before_write_(false) {} virtual ~FaultInjectionTestFS() { error_.PermitUncheckedError(); } @@ -361,10 +364,18 @@ class FaultInjectionTestFS : public FileSystemWrapper { write_error_allowed_types_ = types; } + void SetRandomMetadataWriteError(int one_in) { + MutexLock l(&mutex_); + metadata_write_error_one_in_ = one_in; + } + // Inject an write error with randomlized parameter and the predefined // error type. Only the allowed file types will inject the write error IOStatus InjectWriteError(const std::string& file_name); + // Ingest error to metadata operations. + IOStatus InjectMetadataWriteError(); + // Inject an error. For a READ operation, a status of IOError(), a // corruption in the contents of scratch, or truncation of slice // are the types of error with equal probability. For OPEN, @@ -397,6 +408,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { enable_write_error_injection_ = true; } + void EnableMetadataWriteErrorInjection() { + MutexLock l(&mutex_); + enable_metadata_write_error_injection_ = true; + } + void DisableWriteErrorInjection() { MutexLock l(&mutex_); enable_write_error_injection_ = false; @@ -410,6 +426,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { } } + void DisableMetadataWriteErrorInjection() { + MutexLock l(&mutex_); + enable_metadata_write_error_injection_ = false; + } + // We capture a backtrace every time a fault is injected, for debugging // purposes. This call prints the backtrace to stderr and frees the // saved callstack @@ -456,8 +477,10 @@ class FaultInjectionTestFS : public FileSystemWrapper { std::unique_ptr thread_local_error_; bool enable_write_error_injection_; + bool enable_metadata_write_error_injection_; Random write_error_rand_; int write_error_one_in_; + int metadata_write_error_one_in_; std::vector write_error_allowed_types_; bool ingest_data_corruption_before_write_; ChecksumType checksum_handoff_func_tpye_;