diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 4dcc921d6..b605d0bcd 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1217,10 +1217,6 @@ void StressTest::TestCompactFiles(ThreadState* /* thread */, Status StressTest::TestBackupRestore( ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys) { - // Note the column families chosen by `rand_column_families` cannot be - // dropped while the locks for `rand_keys` are held. So we should not have - // to worry about accessing those column families throughout this function. - assert(rand_column_families.size() == rand_keys.size()); std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid); std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid); BackupableDBOptions backup_opts(backup_dir); @@ -1249,32 +1245,60 @@ Status StressTest::TestBackupRestore( } } BackupEngine* backup_engine = nullptr; + std::string from = "a backup/restore operation"; Status s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); + if (!s.ok()) { + from = "BackupEngine::Open"; + } if (s.ok()) { s = backup_engine->CreateNewBackup(db_); + if (!s.ok()) { + from = "BackupEngine::CreateNewBackup"; + } } if (s.ok()) { delete backup_engine; backup_engine = nullptr; s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); + if (!s.ok()) { + from = "BackupEngine::Open (again)"; + } } std::vector backup_info; if (s.ok()) { backup_engine->GetBackupInfo(&backup_info); if (backup_info.empty()) { s = Status::NotFound("no backups found"); + from = "BackupEngine::GetBackupInfo"; } } if (s.ok() && thread->rand.OneIn(2)) { s = backup_engine->VerifyBackup( backup_info.front().backup_id, thread->rand.OneIn(2) /* verify_with_checksum */); + if (!s.ok()) { + from = "BackupEngine::VerifyBackup"; + } } + bool from_latest = false; if (s.ok()) { int count = static_cast(backup_info.size()); - s = backup_engine->RestoreDBFromBackup( - RestoreOptions(), backup_info[thread->rand.Uniform(count)].backup_id, - restore_dir /* db_dir */, restore_dir /* wal_dir */); + if (count > 1) { + s = backup_engine->RestoreDBFromBackup( + RestoreOptions(), backup_info[thread->rand.Uniform(count)].backup_id, + restore_dir /* db_dir */, restore_dir /* wal_dir */); + if (!s.ok()) { + from = "BackupEngine::RestoreDBFromBackup"; + } + } else { + from_latest = true; + s = backup_engine->RestoreDBFromLatestBackup(RestoreOptions(), + restore_dir /* db_dir */, + restore_dir /* wal_dir */); + if (!s.ok()) { + from = "BackupEngine::RestoreDBFromLatestBackup"; + } + } } if (s.ok()) { uint32_t to_keep = 0; @@ -1283,6 +1307,9 @@ Status StressTest::TestBackupRestore( to_keep = thread->rand.Uniform(3); } s = backup_engine->PurgeOldBackups(to_keep); + if (!s.ok()) { + from = "BackupEngine::PurgeOldBackups"; + } } DB* restored_db = nullptr; std::vector restored_cf_handles; @@ -1300,11 +1327,19 @@ Status StressTest::TestBackupRestore( } s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, &restored_cf_handles, &restored_db); + if (!s.ok()) { + from = "DB::Open in backup/restore"; + } } - // for simplicity, currently only verifies existence/non-existence of a few - // keys - for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { - std::string key_str = Key(rand_keys[i]); + // Note the column families chosen by `rand_column_families` cannot be + // dropped while the locks for `rand_keys` are held. So we should not have + // to worry about accessing those column families throughout this function. + // + // For simplicity, currently only verifies existence/non-existence of a + // single key + for (size_t i = 0; from_latest && s.ok() && i < rand_column_families.size(); + ++i) { + std::string key_str = Key(rand_keys[0]); Slice key = key_str; std::string restored_value; Status get_status = restored_db->Get( @@ -1321,6 +1356,9 @@ Status StressTest::TestBackupRestore( } } else { s = get_status; + if (!s.ok()) { + from = "DB::Get in backup/restore"; + } } } if (backup_engine != nullptr) { @@ -1335,7 +1373,7 @@ Status StressTest::TestBackupRestore( restored_db = nullptr; } if (!s.ok()) { - fprintf(stderr, "A backup/restore operation failed with: %s\n", + fprintf(stderr, "Failure in %s with: %s\n", from.c_str(), s.ToString().c_str()); } return s; diff --git a/env/fs_posix.cc b/env/fs_posix.cc index b85c76464..b4311c859 100644 --- a/env/fs_posix.cc +++ b/env/fs_posix.cc @@ -159,6 +159,7 @@ class PosixFileSystem : public FileSystem { #endif // !ROCKSDB_LITE #if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS) flags |= O_DIRECT; + TEST_SYNC_POINT_CALLBACK("NewSequentialFile:O_DIRECT", &flags); #endif } diff --git a/test_util/sync_point.cc b/test_util/sync_point.cc index afdda872b..16eb4e553 100644 --- a/test_util/sync_point.cc +++ b/test_util/sync_point.cc @@ -83,6 +83,11 @@ void SetupSyncPointsToMockDirectIO() { int* val = static_cast(arg); *val &= ~O_DIRECT; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "NewSequentialFile:O_DIRECT", [&](void* arg) { + int* val = static_cast(arg); + *val &= ~O_DIRECT; + }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); #endif } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index f509daa94..2e2ecc1fc 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -29,6 +29,9 @@ expected_values_file = tempfile.NamedTemporaryFile() default_params = { "acquire_snapshot_one_in": 10000, + "backup_max_size": 100 * 1024 * 1024, + # Consider larger number when backups considered more stable + "backup_one_in": 100000, "block_size": 16384, "bloom_bits": lambda: random.choice([random.randint(0,19), random.lognormvariate(2.3, 1.3)]), diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index c787e3056..0d36b4271 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -1182,7 +1182,9 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( new_backup->SetSequenceNumber(sequence_number); } } - ROCKS_LOG_INFO(options_.info_log, "add files for backup done, wait finish."); + ROCKS_LOG_INFO(options_.info_log, + "add files for backup done (%s), wait finish.", + s.ok() ? "OK" : "not OK"); Status item_status; for (auto& item : backup_items_to_finish) { item.result.wait(); @@ -1198,7 +1200,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( result.custom_checksum_hex, result.checksum_func_name, result.db_id, result.db_session_id)); } - if (!item_status.ok()) { + if (s.ok() && !item_status.ok()) { s = item_status; } }