diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h index 484071f10..f517a489b 100644 --- a/db_stress_tool/db_stress_env_wrapper.h +++ b/db_stress_tool/db_stress_env_wrapper.h @@ -28,7 +28,9 @@ class DbStressEnvWrapper : public EnvWrapper { f.find(".restore") != std::string::npos) { return target()->DeleteFile(f); } - return Status::OK(); + // Rename the file instead of deletion to keep the history, and + // at the same time it is not visible to RocksDB. + return target()->RenameFile(f, f + "_renamed_"); } // If true, all manifest files will not be delted in DeleteFile(). diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 873dca59c..6325314d9 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -808,4 +808,8 @@ DEFINE_uint64(user_timestamp_size, 0, "Number of bytes for a user-defined timestamp. Currently, only " "8-byte is supported"); +DEFINE_int32(open_metadata_write_fault_one_in, 0, + "On non-zero, enables fault injection on file metadata write " + "during DB reopen."); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index 03583db7a..03bc0784c 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -30,6 +30,7 @@ DECLARE_int32(compaction_thread_pool_adjust_interval); DECLARE_int32(continuous_verification_interval); DECLARE_int32(read_fault_one_in); DECLARE_int32(write_fault_one_in); +DECLARE_int32(open_metadata_write_fault_one_in); namespace ROCKSDB_NAMESPACE { class StressTest; diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 1df4aa4de..6f8da9ba4 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2104,6 +2104,9 @@ void StressTest::PrintEnv() const { static_cast(FLAGS_level_compaction_dynamic_level_bytes)); fprintf(stdout, "Read fault one in : %d\n", FLAGS_read_fault_one_in); fprintf(stdout, "Write fault one in : %d\n", FLAGS_write_fault_one_in); + fprintf(stdout, "Open metadata write fault one in:\n"); + fprintf(stdout, " %d\n", + FLAGS_open_metadata_write_fault_one_in); fprintf(stdout, "Sync fault injection : %d\n", FLAGS_sync_fault_injection); fprintf(stdout, "Best efforts recovery : %d\n", static_cast(FLAGS_best_efforts_recovery)); @@ -2409,33 +2412,78 @@ void StressTest::Open() { new DbStressListener(FLAGS_db, options_.db_paths, cf_descriptors)); options_.create_missing_column_families = true; if (!FLAGS_use_txn) { +#ifndef NDEBUG + // Determine whether we need to ingest file metadata write failures + // during DB reopen. If it does, enable it. + // Only ingest metadata error if it is reopening, as initial open + // failure doesn't need to be handled. + // TODO cover transaction DB is not covered in this fault test too. + bool ingest_meta_error = + FLAGS_open_metadata_write_fault_one_in && + fault_fs_guard + ->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr) + .ok(); + if (ingest_meta_error) { + fault_fs_guard->EnableMetadataWriteErrorInjection(); + fault_fs_guard->SetRandomMetadataWriteError( + FLAGS_open_metadata_write_fault_one_in); + } + while (true) { +#endif // NDEBUG #ifndef ROCKSDB_LITE - // StackableDB-based BlobDB - if (FLAGS_use_blob_db) { - blob_db::BlobDBOptions blob_db_options; - blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; - blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; - blob_db_options.blob_file_size = FLAGS_blob_db_file_size; - blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; - blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; - - blob_db::BlobDB* blob_db = nullptr; - s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, - cf_descriptors, &column_families_, &blob_db); - if (s.ok()) { - db_ = blob_db; - } - } else + // StackableDB-based BlobDB + if (FLAGS_use_blob_db) { + blob_db::BlobDBOptions blob_db_options; + blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; + blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; + blob_db_options.blob_file_size = FLAGS_blob_db_file_size; + blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; + blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; + + blob_db::BlobDB* blob_db = nullptr; + s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, + cf_descriptors, &column_families_, + &blob_db); + if (s.ok()) { + db_ = blob_db; + } + } else #endif // !ROCKSDB_LITE - { - if (db_preload_finished_.load() && FLAGS_read_only) { - s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, cf_descriptors, - &column_families_, &db_); - } else { - s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, - &column_families_, &db_); + { + if (db_preload_finished_.load() && FLAGS_read_only) { + s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, + cf_descriptors, &column_families_, &db_); + } else { + s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, + &column_families_, &db_); + } + } + +#ifndef NDEBUG + if (ingest_meta_error) { + fault_fs_guard->DisableMetadataWriteErrorInjection(); + if (!s.ok()) { + // After failure to opening a DB due to IO error, retry should + // successfully open the DB with correct data if no IO error shows + // up. + ingest_meta_error = false; + + Random rand(static_cast(FLAGS_seed)); + if (rand.OneIn(2)) { + fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), + nullptr); + } + if (rand.OneIn(3)) { + fault_fs_guard->DropUnsyncedFileData(); + } else if (rand.OneIn(2)) { + fault_fs_guard->DropRandomUnsyncedFileData(&rand); + } + continue; + } } + break; } +#endif // NDEBUG } else { #ifndef ROCKSDB_LITE TransactionDBOptions txn_db_options; diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 04a7bb8cc..e7c36384f 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -98,7 +98,7 @@ int db_stress_tool(int argc, char** argv) { #ifndef NDEBUG if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection || - FLAGS_write_fault_one_in) { + FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index ae37f9706..a9556508d 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -137,6 +137,7 @@ default_params = { "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 1000]), + "open_metadata_write_fault_one_in": lambda: random.choice([0, 8]), "sync_fault_injection": False, "get_property_one_in": 1000000, "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 27509ab45..90c403690 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -87,8 +87,21 @@ IOStatus TestFSDirectory::Fsync(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } fs_->SyncDir(dirname_); - return dir_->Fsync(options, dbg); + IOStatus s = dir_->Fsync(options, dbg); + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } + return s; } TestFSWritableFile::TestFSWritableFile(const std::string& fname, @@ -159,6 +172,12 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options, if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } + { + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } writable_file_opened_ = false; IOStatus io_s; io_s = target_->Append(state_.buffer_, options, dbg); @@ -170,6 +189,10 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options, } if (io_s.ok()) { fs_->WritableFileClosed(state_); + IOStatus in_s = fs_->InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } } return io_s; } @@ -294,6 +317,12 @@ IOStatus FaultInjectionTestFS::NewWritableFile( if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } if (IsFilesystemDirectWritable()) { return target()->NewWritableFile(fname, file_opts, result, dbg); } @@ -305,11 +334,19 @@ IOStatus FaultInjectionTestFS::NewWritableFile( // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -323,6 +360,12 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( if (IsFilesystemDirectWritable()) { return target()->ReopenWritableFile(fname, file_opts, result, dbg); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset( @@ -330,11 +373,19 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -348,17 +399,31 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile( if (IsFilesystemDirectWritable()) { return target()->NewRandomRWFile(fname, file_opts, result, dbg); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = target()->NewRandomRWFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset(new TestFSRandomRWFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + { + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -385,9 +450,21 @@ IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f, if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = FileSystemWrapper::DeleteFile(f, options, dbg); if (io_s.ok()) { UntrackFile(f); + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } } return io_s; } @@ -399,21 +476,33 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, if (!IsFilesystemActive()) { return GetError(); } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg); if (io_s.ok()) { - MutexLock l(&mutex_); - if (db_file_state_.find(s) != db_file_state_.end()) { - db_file_state_[t] = db_file_state_[s]; - db_file_state_.erase(s); - } + { + MutexLock l(&mutex_); + if (db_file_state_.find(s) != db_file_state_.end()) { + db_file_state_[t] = db_file_state_[s]; + db_file_state_.erase(s); + } - auto sdn = TestFSGetDirAndName(s); - auto tdn = TestFSGetDirAndName(t); - if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { - auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; - assert(tlist.find(tdn.second) == tlist.end()); - tlist.insert(tdn.second); + auto sdn = TestFSGetDirAndName(s); + auto tdn = TestFSGetDirAndName(t); + if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { + auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; + assert(tlist.find(tdn.second) == tlist.end()); + tlist.insert(tdn.second); + } + } + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; } } @@ -618,6 +707,16 @@ IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) { return IOStatus::OK(); } +IOStatus FaultInjectionTestFS::InjectMetadataWriteError() { + MutexLock l(&mutex_); + if (!enable_metadata_write_error_injection_ || + !metadata_write_error_one_in_ || + !write_error_rand_.OneIn(metadata_write_error_one_in_)) { + return IOStatus::OK(); + } + return IOStatus::IOError(); +} + void FaultInjectionTestFS::PrintFaultBacktrace() { #if defined(OS_LINUX) ErrorContext* ctx = diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index e131224c6..2b46c1f18 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -174,7 +174,10 @@ class FaultInjectionTestFS : public FileSystemWrapper { filesystem_writable_(false), thread_local_error_(new ThreadLocalPtr(DeleteThreadLocalErrorContext)), enable_write_error_injection_(false), + enable_metadata_write_error_injection_(false), write_error_rand_(0), + write_error_one_in_(0), + metadata_write_error_one_in_(0), ingest_data_corruption_before_write_(false) {} virtual ~FaultInjectionTestFS() { error_.PermitUncheckedError(); } @@ -361,10 +364,18 @@ class FaultInjectionTestFS : public FileSystemWrapper { write_error_allowed_types_ = types; } + void SetRandomMetadataWriteError(int one_in) { + MutexLock l(&mutex_); + metadata_write_error_one_in_ = one_in; + } + // Inject an write error with randomlized parameter and the predefined // error type. Only the allowed file types will inject the write error IOStatus InjectWriteError(const std::string& file_name); + // Ingest error to metadata operations. + IOStatus InjectMetadataWriteError(); + // Inject an error. For a READ operation, a status of IOError(), a // corruption in the contents of scratch, or truncation of slice // are the types of error with equal probability. For OPEN, @@ -397,6 +408,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { enable_write_error_injection_ = true; } + void EnableMetadataWriteErrorInjection() { + MutexLock l(&mutex_); + enable_metadata_write_error_injection_ = true; + } + void DisableWriteErrorInjection() { MutexLock l(&mutex_); enable_write_error_injection_ = false; @@ -410,6 +426,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { } } + void DisableMetadataWriteErrorInjection() { + MutexLock l(&mutex_); + enable_metadata_write_error_injection_ = false; + } + // We capture a backtrace every time a fault is injected, for debugging // purposes. This call prints the backtrace to stderr and frees the // saved callstack @@ -456,8 +477,10 @@ class FaultInjectionTestFS : public FileSystemWrapper { std::unique_ptr thread_local_error_; bool enable_write_error_injection_; + bool enable_metadata_write_error_injection_; Random write_error_rand_; int write_error_one_in_; + int metadata_write_error_one_in_; std::vector write_error_allowed_types_; bool ingest_data_corruption_before_write_; ChecksumType checksum_handoff_func_tpye_;