From b1a53db327ec38e4417667e03ff6793092ebc7c5 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 7 Jul 2021 16:20:40 -0700 Subject: [PATCH] =?UTF-8?q?FaultInjectionTestFS::DeleteFilesCreatedAfterLa?= =?UTF-8?q?stDirSync()=20to=20recover=E2=80=A6=20(#8501)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: … small overwritten files. If a file is overwritten with renamed and the parent path is not synced, FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync() will delete the file. However, RocksDB relies on file renaming to be atomic no matter whether the parent directory is synced or not, and the current behavior breaks the assumption and caused some false positive: https://github.com/facebook/rocksdb/pull/8489 Since the atomic renaming is used in CURRENT files, to fix the problem, in FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(), we recover the state of overwritten file if the file is small. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8501 Test Plan: Run stress test for a while and see it doesn't break. Reviewed By: anand1976 Differential Revision: D29594384 fbshipit-source-id: 589b5c2f0a9d2aca53752d7bdb0231efa5b3ae92 --- db_stress_tool/db_stress_test_base.cc | 6 ---- utilities/fault_injection_fs.cc | 48 ++++++++++++++++++++++----- utilities/fault_injection_fs.h | 6 +++- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index ec5dfeb77..5d76b5787 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2558,16 +2558,10 @@ void StressTest::Open() { ingest_read_error = false; Random rand(static_cast(FLAGS_seed)); -// TODO: This is disabled for now as deleting the CURRENT file after open -// failure causes the DB to not be reopened again due to the presence of -// WAL files, which DB::Open considers to be a corruption. Re-enable once -// we figure out a proper fix -#if 0 if (rand.OneIn(2)) { fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), nullptr); } -#endif if (rand.OneIn(3)) { fault_fs_guard->DropUnsyncedFileData(); } else if (rand.OneIn(2)) { diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 1903965b4..392a1146c 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -30,6 +30,8 @@ namespace ROCKSDB_NAMESPACE { +const std::string kNewFileNoOverwrite = ""; + // Assume a filename, and not a directory name like "/foo/bar/" std::string TestFSGetDirName(const std::string filename) { size_t found = filename.find_last_of("/\\"); @@ -415,7 +417,10 @@ IOStatus FaultInjectionTestFS::NewWritableFile( open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + // The new file could overwrite an old one. Here we simplify + // the implementation by assuming no file of this name after + // dropping unsynced files. + list[dir_and_name.second] = kNewFileNoOverwrite; } { IOStatus in_s = InjectMetadataWriteError(); @@ -454,7 +459,7 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + list[dir_and_name.second] = kNewFileNoOverwrite; } { IOStatus in_s = InjectMetadataWriteError(); @@ -492,7 +497,9 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile( open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + // It could be overwriting an old file, but we simplify the + // implementation by ignoring it. + list[dir_and_name.second] = kNewFileNoOverwrite; } { IOStatus in_s = InjectMetadataWriteError(); @@ -579,6 +586,19 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, return in_s; } } + + // We preserve contents of overwritten files up to a size threshold. + // We could keep previous file in another name, but we need to worry about + // garbage collect the those files. We do it if it is needed later. + // We ignore I/O errors here for simplicity. + std::string previous_contents = kNewFileNoOverwrite; + if (target()->FileExists(t, IOOptions(), nullptr).ok()) { + uint64_t file_size; + if (target()->GetFileSize(t, IOOptions(), &file_size, nullptr).ok() && + file_size < 1024) { + ReadFileToString(target(), t, &previous_contents).PermitUncheckedError(); + } + } IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg); if (io_s.ok()) { @@ -594,7 +614,7 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; assert(tlist.find(tdn.second) == tlist.end()); - tlist.insert(tdn.second); + tlist[tdn.second] = previous_contents; } } IOStatus in_s = InjectMetadataWriteError(); @@ -665,7 +685,7 @@ IOStatus FaultInjectionTestFS::DropRandomUnsyncedFileData(Random* rnd) { IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync( const IOOptions& options, IODebugContext* dbg) { // Because DeleteFile access this container make a copy to avoid deadlock - std::map> map_copy; + std::map> map_copy; { MutexLock l(&mutex_); map_copy.insert(dir_to_new_files_since_last_sync_.begin(), @@ -673,10 +693,20 @@ IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync( } for (auto& pair : map_copy) { - for (std::string name : pair.second) { - IOStatus io_s = DeleteFile(pair.first + "/" + name, options, dbg); - if (!io_s.ok()) { - return io_s; + for (auto& file_pair : pair.second) { + if (file_pair.second == kNewFileNoOverwrite) { + IOStatus io_s = + DeleteFile(pair.first + "/" + file_pair.first, options, dbg); + if (!io_s.ok()) { + return io_s; + } + } else { + IOStatus io_s = + WriteStringToFile(target(), file_pair.second, + pair.first + "/" + file_pair.first, true); + if (!io_s.ok()) { + return io_s; + } } } } diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index 4fc707b5d..eb3be049e 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -480,7 +480,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { port::Mutex mutex_; std::map db_file_state_; std::set open_files_; - std::unordered_map> + // directory -> (file name -> file contents to recover) + // When data is recovered from unsyned parent directory, the files with + // empty file contents to recover is deleted. Those with non-empty ones + // will be recovered to content accordingly. + std::unordered_map> dir_to_new_files_since_last_sync_; bool filesystem_active_; // Record flushes, syncs, writes bool filesystem_writable_; // Bypass FaultInjectionTestFS and go directly