Use FaultInjectionTestFS in transaction_test, clarify Close() APIs (#11499)

Summary:
... instead of race-condition-laden FaultInjectionTestEnv. See https://app.circleci.com/pipelines/github/facebook/rocksdb/27912/workflows/4c63e5a8-597e-439d-8c7e-82308056af02/jobs/609648 and similar PR https://github.com/facebook/rocksdb/issues/11271

Had to fix the semantics of FaultInjectionTestFS Close() operations to allow a non-OK Close() to fulfill the obligation to close before destruction. To me, this is the obvious choice of Close contract, because what is the caller supposed to do if Close() fails and they still have an obligation to successfully close before object destruction? Call Close() in an infinite loop? Leak the object? I have added API comments to the Env and Filesystem Close() functions to clarify the contracts.

Note that `DB::Close()` has one exception to this kind of Close contract, but it is clearly described in API comments and it is really only for catching programming mistakes, not for dealing with exogenous errors.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11499

Test Plan: watch CI

Reviewed By: jowlyzhang

Differential Revision: D46375708

Pulled By: pdillinger

fbshipit-source-id: 03d4d8251e5df50a82ecd139f7e83f613015fe40
oxigraph-main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent 7521478b43
commit 05a1d52e77
  1. 31
      include/rocksdb/env.h
  2. 24
      include/rocksdb/file_system.h
  3. 2
      utilities/fault_injection_fs.cc
  4. 15
      utilities/transactions/transaction_test.cc
  5. 23
      utilities/transactions/transaction_test.h

@ -875,6 +875,9 @@ class WritableFile {
WritableFile(const WritableFile&) = delete;
void operator=(const WritableFile&) = delete;
// For cases when Close() hasn't been called, many derived classes of
// WritableFile will need to call Close() non-virtually in their destructor,
// and ignore the result, to ensure resources are released.
virtual ~WritableFile();
// Append data to the end of the file
@ -938,6 +941,12 @@ class WritableFile {
// size due to whole pages writes. The behavior is undefined if called
// with other writes to follow.
virtual Status Truncate(uint64_t /*size*/) { return Status::OK(); }
// The caller should call Close() before destroying the WritableFile to
// surface any errors associated with finishing writes to the file.
// The file is considered closed regardless of return status.
// (However, implementations must also clean up properly in the destructor
// even if Close() is not called.)
virtual Status Close() = 0;
virtual Status Flush() = 0;
virtual Status Sync() = 0; // sync data
@ -1084,6 +1093,9 @@ class RandomRWFile {
RandomRWFile(const RandomRWFile&) = delete;
RandomRWFile& operator=(const RandomRWFile&) = delete;
// For cases when Close() hasn't been called, many derived classes of
// RandomRWFile will need to call Close() non-virtually in their destructor,
// and ignore the result, to ensure resources are released.
virtual ~RandomRWFile() {}
// Indicates if the class makes use of direct I/O
@ -1115,6 +1127,11 @@ class RandomRWFile {
virtual Status Fsync() { return Sync(); }
// The caller should call Close() before destroying the RandomRWFile to
// surface any errors associated with finishing writes to the file.
// The file is considered closed regardless of return status.
// (However, implementations must also clean up properly in the destructor
// even if Close() is not called.)
virtual Status Close() = 0;
// If you're adding methods here, remember to add them to
@ -1147,10 +1164,14 @@ class MemoryMappedFileBuffer {
// filesystem operations that can be executed on directories.
class Directory {
public:
// Many derived classes of Directory will need to call Close() in their
// destructor, when not called already, to ensure resources are released.
virtual ~Directory() {}
// Fsync directory. Can be called concurrently from multiple threads.
virtual Status Fsync() = 0;
// Close directory.
// Calling Close() before destroying a Directory is recommended to surface
// any errors associated with finishing writes (in case of future features).
// The directory is considered closed regardless of return status.
virtual Status Close() { return Status::NotSupported("Close"); }
virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const {
@ -1188,9 +1209,11 @@ class Logger {
virtual ~Logger();
// Close the log file. Must be called before destructor. If the return
// status is NotSupported(), it means the implementation does cleanup in
// the destructor
// Because Logger is typically a shared object, Close() may or may not be
// called before the object is destroyed, but is recommended to reveal any
// final errors in finishing outstanding writes. No other functions are
// supported after calling Close(), and the Logger is considered closed
// regardless of return status.
virtual Status Close();
// Write a header to the log file with the specified format

@ -955,6 +955,9 @@ class FSWritableFile {
write_hint_(Env::WLTH_NOT_SET),
strict_bytes_per_sync_(options.strict_bytes_per_sync) {}
// For cases when Close() hasn't been called, many derived classes of
// FSWritableFile will need to call Close() non-virtually in their destructor,
// and ignore the result, to ensure resources are released.
virtual ~FSWritableFile() {}
// Append data to the end of the file
@ -1028,6 +1031,12 @@ class FSWritableFile {
IODebugContext* /*dbg*/) {
return IOStatus::OK();
}
// The caller should call Close() before destroying the FSWritableFile to
// surface any errors associated with finishing writes to the file.
// The file is considered closed regardless of return status.
// (However, implementations must also clean up properly in the destructor
// even if Close() is not called.)
virtual IOStatus Close(const IOOptions& /*options*/,
IODebugContext* /*dbg*/) = 0;
@ -1185,6 +1194,9 @@ class FSRandomRWFile {
public:
FSRandomRWFile() {}
// For cases when Close() hasn't been called, many derived classes of
// FSRandomRWFile will need to call Close() non-virtually in their destructor,
// and ignore the result, to ensure resources are released.
virtual ~FSRandomRWFile() {}
// Indicates if the class makes use of direct I/O
@ -1220,6 +1232,11 @@ class FSRandomRWFile {
return Sync(options, dbg);
}
// The caller should call Close() before destroying the FSRandomRWFile to
// surface any errors associated with finishing writes to the file.
// The file is considered closed regardless of return status.
// (However, implementations must also clean up properly in the destructor
// even if Close() is not called.)
virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) = 0;
// EXPERIMENTAL
@ -1263,6 +1280,9 @@ class FSMemoryMappedFileBuffer {
// filesystem operations that can be executed on directories.
class FSDirectory {
public:
// For cases when Close() hasn't been called, many derived classes of
// FSDirectory will need to call Close() non-virtually in their destructor,
// and ignore the result, to ensure resources are released.
virtual ~FSDirectory() {}
// Fsync directory. Can be called concurrently from multiple threads.
virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) = 0;
@ -1276,7 +1296,9 @@ class FSDirectory {
return Fsync(options, dbg);
}
// Close directory
// Calling Close() before destroying a FSDirectory is recommended to surface
// any errors associated with finishing writes (in case of future features).
// The directory is considered closed regardless of return status.
virtual IOStatus Close(const IOOptions& /*options*/,
IODebugContext* /*dbg*/) {
return IOStatus::NotSupported("Close");

@ -242,6 +242,7 @@ IOStatus TestFSWritableFile::PositionedAppend(
IOStatus TestFSWritableFile::Close(const IOOptions& options,
IODebugContext* dbg) {
MutexLock l(&mutex_);
fs_->WritableFileClosed(state_);
if (!fs_->IsFilesystemActive()) {
return fs_->GetError();
}
@ -263,7 +264,6 @@ IOStatus TestFSWritableFile::Close(const IOOptions& options,
io_s = target_->Close(options, dbg);
}
if (io_s.ok()) {
fs_->WritableFileClosed(state_);
IOStatus in_s = fs_->InjectMetadataWriteError();
if (!in_s.ok()) {
return in_s;

@ -25,7 +25,6 @@
#include "test_util/transaction_test_util.h"
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/fault_injection_env.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/string_append/stringappend.h"
#include "utilities/transactions/pessimistic_transaction_db.h"
@ -1600,7 +1599,7 @@ TEST_P(TransactionStressTest, TwoPhaseLongPrepareTest) {
if (i % 29 == 0) {
// crash
env->SetFilesystemActive(false);
fault_fs->SetFilesystemActive(false);
reinterpret_cast<PessimisticTransactionDB*>(db)->TEST_Crash();
ReOpenNoDelete();
} else if (i % 37 == 0) {
@ -1668,7 +1667,7 @@ TEST_P(TransactionTest, TwoPhaseSequenceTest) {
delete txn;
// kill and reopen
env->SetFilesystemActive(false);
fault_fs->SetFilesystemActive(false);
ReOpenNoDelete();
assert(db != nullptr);
@ -1705,7 +1704,7 @@ TEST_P(TransactionTest, TwoPhaseDoubleRecoveryTest) {
delete txn;
// kill and reopen
env->SetFilesystemActive(false);
fault_fs->SetFilesystemActive(false);
reinterpret_cast<PessimisticTransactionDB*>(db)->TEST_Crash();
ReOpenNoDelete();
@ -1738,7 +1737,7 @@ TEST_P(TransactionTest, TwoPhaseDoubleRecoveryTest) {
delete txn;
// kill and reopen
env->SetFilesystemActive(false);
fault_fs->SetFilesystemActive(false);
ASSERT_OK(ReOpenNoDelete());
assert(db != nullptr);
@ -2090,7 +2089,7 @@ TEST_P(TransactionTest, TwoPhaseOutOfOrderDelete) {
ASSERT_OK(db->FlushWAL(false));
// kill and reopen
env->SetFilesystemActive(false);
fault_fs->SetFilesystemActive(false);
reinterpret_cast<PessimisticTransactionDB*>(db)->TEST_Crash();
ASSERT_OK(ReOpenNoDelete());
assert(db != nullptr);
@ -6367,11 +6366,11 @@ TEST_P(TransactionTest, DoubleCrashInRecovery) {
// Corrupt the last log file in the middle, so that it is not corrupted
// in the tail.
std::string file_content;
ASSERT_OK(ReadFileToString(env, fname, &file_content));
ASSERT_OK(ReadFileToString(env.get(), fname, &file_content));
file_content[400] = 'h';
file_content[401] = 'a';
ASSERT_OK(env->DeleteFile(fname));
ASSERT_OK(WriteStringToFile(env, file_content, fname, true));
ASSERT_OK(WriteStringToFile(env.get(), file_content, fname, true));
// Recover from corruption
std::vector<ColumnFamilyHandle*> handles;

@ -25,7 +25,7 @@
#include "test_util/transaction_test_util.h"
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/fault_injection_env.h"
#include "utilities/fault_injection_fs.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/string_append/stringappend.h"
#include "utilities/transactions/pessimistic_transaction_db.h"
@ -42,7 +42,8 @@ class TransactionTestBase : public ::testing::Test {
public:
TransactionDB* db;
SpecialEnv special_env;
FaultInjectionTestEnv* env;
std::shared_ptr<FaultInjectionTestFS> fault_fs;
std::unique_ptr<Env> env;
std::string dbname;
Options options;
@ -63,8 +64,9 @@ class TransactionTestBase : public ::testing::Test {
options.level0_file_num_compaction_trigger = 2;
options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
special_env.skip_fsync_ = true;
env = new FaultInjectionTestEnv(&special_env);
options.env = env;
fault_fs.reset(new FaultInjectionTestFS(FileSystem::Default()));
env.reset(new CompositeEnvWrapper(&special_env, fault_fs));
options.env = env.get();
options.two_write_queues = two_write_queue;
dbname = test::PerThreadDBPath("transaction_testdb");
@ -101,15 +103,14 @@ class TransactionTestBase : public ::testing::Test {
} else {
fprintf(stdout, "db is still in %s\n", dbname.c_str());
}
delete env;
}
Status ReOpenNoDelete() {
delete db;
db = nullptr;
env->AssertNoOpenFile();
env->DropUnsyncedFileData();
env->ResetState();
fault_fs->AssertNoOpenFile();
fault_fs->DropUnsyncedFileData();
fault_fs->ResetState();
Status s;
if (use_stackable_db_ == false) {
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
@ -128,9 +129,9 @@ class TransactionTestBase : public ::testing::Test {
handles->clear();
delete db;
db = nullptr;
env->AssertNoOpenFile();
env->DropUnsyncedFileData();
env->ResetState();
fault_fs->AssertNoOpenFile();
fault_fs->DropUnsyncedFileData();
fault_fs->ResetState();
Status s;
if (use_stackable_db_ == false) {
s = TransactionDB::Open(options, txn_db_options, dbname, cfs, handles,

Loading…
Cancel
Save