From f6d9730ea1be3fc05080c7147a26be34254fb44c Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Fri, 13 May 2022 12:29:20 -0700 Subject: [PATCH] Fix stress test with best-efforts-recovery (#9986) Summary: This PR - since we are testing with disable_wal = true and best_efforts_recovery, we should set column family count to 1, due to the requirement of `ExpectedState` tracking and replaying logic. - during backup and checkpoint restore, disable best-efforts-recovery. This does not matter now because db_crashtest.py always disables wal when testing best-efforts-recovery. In the future, if we enable wal, then not setting `restore_opitions.best_efforts_recovery` will cause backup db not to recover the WALs, and differ from db (that enables WAL). - during verification of backup and checkpoint restore, print the key where inconsistency exists between expected state and db. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9986 Test Plan: TEST_TMPDIR=/dev/shm/rocksdb make crash_test_with_best_efforts_recovery Reviewed By: siying Differential Revision: D36353105 Pulled By: riversand963 fbshipit-source-id: a484da161273e6216a1f7e245bac15a349693917 --- db_stress_tool/db_stress_test_base.cc | 30 +++++++++++---- tools/db_crashtest.py | 54 ++++++--------------------- 2 files changed, 33 insertions(+), 51 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 3fd497901..ab8ae2dfe 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1424,8 +1424,9 @@ void StressTest::TestCompactFiles(ThreadState* /* thread */, Status StressTest::TestBackupRestore( ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys) { - std::string backup_dir = FLAGS_db + "/.backup" + std::to_string(thread->tid); - std::string restore_dir = + const std::string backup_dir = + FLAGS_db + "/.backup" + std::to_string(thread->tid); + const std::string restore_dir = FLAGS_db + "/.restore" + std::to_string(thread->tid); BackupEngineOptions backup_opts(backup_dir); // For debugging, get info_log from live options @@ -1558,6 +1559,7 @@ Status StressTest::TestBackupRestore( // Not yet implemented: opening restored BlobDB or TransactionDB if (s.ok() && !FLAGS_use_txn && !FLAGS_use_blob_db) { Options restore_options(options_); + restore_options.best_efforts_recovery = false; restore_options.listeners.clear(); // Avoid dangling/shared file descriptors, for reliable destroy restore_options.sst_file_manager = nullptr; @@ -1614,11 +1616,17 @@ Status StressTest::TestBackupRestore( bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[0]); if (get_status.ok()) { if (!exists && from_latest && ShouldAcquireMutexOnKey()) { - s = Status::Corruption("key exists in restore but not in original db"); + std::ostringstream oss; + oss << "0x" << key.ToString(true) + << " exists in restore but not in original db"; + s = Status::Corruption(oss.str()); } } else if (get_status.IsNotFound()) { if (exists && from_latest && ShouldAcquireMutexOnKey()) { - s = Status::Corruption("key exists in original db but not in restore"); + std::ostringstream oss; + oss << "0x" << key.ToString(true) + << " exists in original db but not in restore"; + s = Status::Corruption(oss.str()); } } else { s = get_status; @@ -1760,6 +1768,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread, DB* checkpoint_db = nullptr; if (s.ok()) { Options options(options_); + options.best_efforts_recovery = false; options.listeners.clear(); // Avoid race condition in trash handling after delete checkpoint_db options.sst_file_manager.reset(); @@ -1791,13 +1800,18 @@ Status StressTest::TestCheckpoint(ThreadState* thread, thread->shared->Exists(rand_column_families[i], rand_keys[0]); if (get_status.ok()) { if (!exists && ShouldAcquireMutexOnKey()) { - s = Status::Corruption( - "key exists in checkpoint but not in original db"); + std::ostringstream oss; + oss << "0x" << key.ToString(true) << " exists in checkpoint " + << checkpoint_dir << " but not in original db"; + s = Status::Corruption(oss.str()); } } else if (get_status.IsNotFound()) { if (exists && ShouldAcquireMutexOnKey()) { - s = Status::Corruption( - "key exists in original db but not in checkpoint"); + std::ostringstream oss; + oss << "0x" << key.ToString(true) + << " exists in original db but not in checkpoint " + << checkpoint_dir; + s = Status::Corruption(oss.str()); } } else { s = get_status; diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 1aed479e0..325a46871 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -313,10 +313,10 @@ txn_params = { } best_efforts_recovery_params = { - "best_efforts_recovery": True, - "skip_verifydb": True, - "verify_db_one_in": 0, - "continuous_verification_interval": 0, + "best_efforts_recovery": 1, + "atomic_flush": 0, + "disable_wal": 1, + "column_families": 1, } blob_params = { @@ -502,6 +502,13 @@ def finalize_and_sanitize(src_params): dest_params["memtable_prefix_bloom_size_ratio"] = 0 if dest_params.get("two_write_queues") == 1: dest_params["enable_pipelined_write"] = 0 + if dest_params.get("best_efforts_recovery") == 1: + dest_params["disable_wal"] = 1 + dest_params["atomic_flush"] = 0 + dest_params["enable_compaction_filter"] = 0 + dest_params["sync"] = 0 + dest_params["write_fault_one_in"] = 0 + return dest_params def gen_cmd_params(args): @@ -560,42 +567,6 @@ def gen_cmd(params, unknown_params): return cmd -# Inject inconsistency to db directory. -def inject_inconsistencies_to_db_dir(dir_path): - files = os.listdir(dir_path) - file_num_rgx = re.compile(r'(?P[0-9]{6})') - largest_fnum = 0 - for f in files: - m = file_num_rgx.search(f) - if m and not f.startswith('LOG'): - largest_fnum = max(largest_fnum, int(m.group('number'))) - - candidates = [ - f for f in files if re.search(r'[0-9]+\.sst', f) - ] - deleted = 0 - corrupted = 0 - for f in candidates: - rnd = random.randint(0, 99) - f_path = os.path.join(dir_path, f) - if rnd < 10: - os.unlink(f_path) - deleted = deleted + 1 - elif 10 <= rnd and rnd < 30: - with open(f_path, "a") as fd: - fd.write('12345678') - corrupted = corrupted + 1 - print('Removed %d table files' % deleted) - print('Corrupted %d table files' % corrupted) - - # Add corrupted MANIFEST and SST - for num in range(largest_fnum + 1, largest_fnum + 10): - rnd = random.randint(0, 1) - fname = ("MANIFEST-%06d" % num) if rnd == 0 else ("%06d.sst" % num) - print('Write %s' % fname) - with open(os.path.join(dir_path, fname), "w") as fd: - fd.write("garbage") - def execute_cmd(cmd, timeout): child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) @@ -649,9 +620,6 @@ def blackbox_crash_main(args, unknown_args): time.sleep(1) # time to stabilize before the next run - if args.test_best_efforts_recovery: - inject_inconsistencies_to_db_dir(dbname) - time.sleep(1) # time to stabilize before the next run # we need to clean up after ourselves -- only do this on test success