diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 5c2ff989f..4224a0193 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -174,6 +174,7 @@ DECLARE_bool(use_txn); DECLARE_uint64(txn_write_policy); DECLARE_bool(unordered_write); DECLARE_int32(backup_one_in); +DECLARE_uint64(backup_max_size); DECLARE_int32(checkpoint_one_in); DECLARE_int32(ingest_external_file_one_in); DECLARE_int32(ingest_external_file_width); diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h index 411150697..484071f10 100644 --- a/db_stress_tool/db_stress_env_wrapper.h +++ b/db_stress_tool/db_stress_env_wrapper.h @@ -20,10 +20,12 @@ class DbStressEnvWrapper : public EnvWrapper { // We determine whether it is a manifest file by searching a strong, // so that there will be false positive if the directory path contains the // keyword but it is unlikely. - // Checkpoint directory needs to be exempted. + // Checkpoint, backup, and restore directories needs to be exempted. if (!if_preserve_all_manifests || f.find("MANIFEST-") == std::string::npos || - f.find("checkpoint") != std::string::npos) { + f.find("checkpoint") != std::string::npos || + f.find(".backup") != std::string::npos || + f.find(".restore") != std::string::npos) { return target()->DeleteFile(f); } return Status::OK(); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 96e05a155..919a256c2 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -479,6 +479,10 @@ DEFINE_int32(backup_one_in, 0, "every N operations on average. 0 indicates CreateNewBackup() " "is disabled."); +DEFINE_uint64(backup_max_size, 100 * 1024 * 1024, + "If non-zero, skip checking backup/restore when DB size in " + "bytes exceeds this setting."); + DEFINE_int32(checkpoint_one_in, 0, "If non-zero, then CreateCheckpoint() will be called once for " "every N operations on average. 0 indicates CreateCheckpoint() " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b0ab78adb..7dcce08b2 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -667,10 +667,23 @@ void StressTest::OperateDb(ThreadState* thread) { } if (thread->rand.OneInOpt(FLAGS_backup_one_in)) { - Status s = TestBackupRestore(thread, rand_column_families, rand_keys); - if (!s.ok()) { - VerificationAbort(shared, "Backup/restore gave inconsistent state", - s); + // Beyond a certain DB size threshold, this test becomes heavier than + // it's worth. + uint64_t total_size = 0; + if (FLAGS_backup_max_size > 0) { + std::vector files; + db_stress_env->GetChildrenFileAttributes(FLAGS_db, &files); + for (auto& file : files) { + total_size += file.size_bytes; + } + } + + if (total_size <= FLAGS_backup_max_size) { + Status s = TestBackupRestore(thread, rand_column_families, rand_keys); + if (!s.ok()) { + VerificationAbort(shared, "Backup/restore gave inconsistent state", + s); + } } } @@ -1211,6 +1224,30 @@ Status StressTest::TestBackupRestore( std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid); std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid); BackupableDBOptions backup_opts(backup_dir); + // For debugging, get info_log from live options + backup_opts.info_log = db_->GetDBOptions().info_log.get(); + assert(backup_opts.info_log); + if (thread->rand.OneIn(2)) { + backup_opts.file_checksum_gen_factory = options_.file_checksum_gen_factory; + } + if (thread->rand.OneIn(10)) { + backup_opts.share_table_files = false; + } else { + backup_opts.share_table_files = true; + if (thread->rand.OneIn(5)) { + backup_opts.share_files_with_checksum = false; + } else { + backup_opts.share_files_with_checksum = true; + if (thread->rand.OneIn(2)) { + // old + backup_opts.share_files_with_checksum_naming = kChecksumAndFileSize; + } else { + // new + backup_opts.share_files_with_checksum_naming = + kOptionalChecksumAndDbSessionId; + } + } + } BackupEngine* backup_engine = nullptr; Status s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); if (s.ok()) { @@ -1221,12 +1258,31 @@ Status StressTest::TestBackupRestore( backup_engine = nullptr; s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); } + std::vector backup_info; if (s.ok()) { - s = backup_engine->RestoreDBFromLatestBackup(restore_dir /* db_dir */, - restore_dir /* wal_dir */); + backup_engine->GetBackupInfo(&backup_info); + if (backup_info.empty()) { + s = Status::NotFound("no backups found"); + } + } + if (s.ok() && thread->rand.OneIn(2)) { + s = backup_engine->VerifyBackup( + backup_info.front().backup_id, + thread->rand.OneIn(2) /* verify_with_checksum */); + } + if (s.ok()) { + int count = static_cast(backup_info.size()); + s = backup_engine->RestoreDBFromBackup( + RestoreOptions(), backup_info[thread->rand.Uniform(count)].backup_id, + restore_dir /* db_dir */, restore_dir /* wal_dir */); } if (s.ok()) { - s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */); + uint32_t to_keep = 0; + if (thread->tid == 0) { + // allow one thread to keep up to 2 backups + to_keep = thread->rand.Uniform(3); + } + s = backup_engine->PurgeOldBackups(to_keep); } DB* restored_db = nullptr; std::vector restored_cf_handles; diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 1e4b57480..0f7239b67 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -29,6 +29,9 @@ expected_values_file = tempfile.NamedTemporaryFile() default_params = { "acquire_snapshot_one_in": 10000, + "backup_max_size": 100 * 1024 * 1024, + # Consider larger number when backups considered more stable + "backup_one_in": 100000, "block_size": 16384, "bloom_bits": lambda: random.choice([random.randint(0,19), random.lognormvariate(2.3, 1.3)]),