From ac8e89a443373de0a77648470d9dd6edba181e37 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 13 Feb 2020 18:39:38 -0800 Subject: [PATCH] Should flush and sync WAL when writing it in DB::Open() (#6417) Summary: A recent fix related to 2pc https://github.com/facebook/rocksdb/pull/6313/ writes something to WAL, but does not flush or sync. This causes assertion failure "impl->TEST_WALBufferIsEmpty()" if manual_wal_flush = true. We should fsync the entry to make sure a second power reset can recover. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6417 Test Plan: Add manual_wal_flush=true case in TransactionTest.DoubleCrashInRecovery and fix a bug in the test so that the bug can be reproduced. It passes with the fix. Differential Revision: D19894537 fbshipit-source-id: f1e84e49e2269f583c6019743118292cd8b6598e --- db/db_impl/db_impl_open.cc | 7 + utilities/transactions/transaction_test.cc | 155 +++++++++++---------- 2 files changed, 86 insertions(+), 76 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index d238dfc77..81d7f3f16 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1494,6 +1494,13 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, uint64_t log_used, log_size; log::Writer* log_writer = impl->logs_.back().writer; s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size); + if (s.ok()) { + // Need to fsync, otherwise it might get lost after a power reset. + s = impl->FlushWAL(false); + if (s.ok()) { + s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync); + } + } } } } diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 142799f5d..30491c698 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -6120,84 +6120,87 @@ TEST_P(TransactionTest, ReseekOptimization) { // there. The new log files should be still read succesfully during recovery of // the 2nd crash. TEST_P(TransactionTest, DoubleCrashInRecovery) { - for (const bool write_after_recovery : {false, true}) { - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - ReOpen(); - std::string cf_name = "two"; - ColumnFamilyOptions cf_options; - ColumnFamilyHandle* cf_handle = nullptr; - ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle)); - - // Add a prepare entry to prevent the older logs from being deleted. - WriteOptions write_options; - TransactionOptions txn_options; - Transaction* txn = db->BeginTransaction(write_options, txn_options); - ASSERT_OK(txn->SetName("xid")); - ASSERT_OK(txn->Put(Slice("foo-prepare"), Slice("bar-prepare"))); - ASSERT_OK(txn->Prepare()); - - FlushOptions flush_ops; - db->Flush(flush_ops); - // Now we have a log that cannot be deleted - - ASSERT_OK(db->Put(write_options, cf_handle, "foo1", "bar1")); - // Flush only the 2nd cf - db->Flush(flush_ops, cf_handle); - - // The value is large enough to be touched by the corruption we ingest - // below. - std::string large_value(400, ' '); - // key/value not touched by corruption - ASSERT_OK(db->Put(write_options, "foo2", "bar2")); - // key/value touched by corruption - ASSERT_OK(db->Put(write_options, "foo3", large_value)); - // key/value not touched by corruption - ASSERT_OK(db->Put(write_options, "foo4", "bar4")); - - db->FlushWAL(true); - DBImpl* db_impl = reinterpret_cast(db->GetRootDB()); - uint64_t wal_file_id = db_impl->TEST_LogfileNumber(); - std::string fname = LogFileName(dbname, wal_file_id); - reinterpret_cast(db)->TEST_Crash(); - delete txn; - delete cf_handle; - delete db; - db = nullptr; - - // Corrupt the last log file in the middle, so that it is not corrupted - // in the tail. - std::string file_content; - ASSERT_OK(ReadFileToString(env, fname, &file_content)); - file_content[400] = 'h'; - file_content[401] = 'a'; - ASSERT_OK(env->DeleteFile(fname)); - ASSERT_OK(WriteStringToFile(env, file_content, fname)); + for (const bool manual_wal_flush : {false, true}) { + for (const bool write_after_recovery : {false, true}) { + options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; + options.manual_wal_flush = manual_wal_flush; + ReOpen(); + std::string cf_name = "two"; + ColumnFamilyOptions cf_options; + ColumnFamilyHandle* cf_handle = nullptr; + ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle)); + + // Add a prepare entry to prevent the older logs from being deleted. + WriteOptions write_options; + TransactionOptions txn_options; + Transaction* txn = db->BeginTransaction(write_options, txn_options); + ASSERT_OK(txn->SetName("xid")); + ASSERT_OK(txn->Put(Slice("foo-prepare"), Slice("bar-prepare"))); + ASSERT_OK(txn->Prepare()); - // Recover from corruption - std::vector handles; - std::vector column_families; - column_families.push_back(ColumnFamilyDescriptor(kDefaultColumnFamilyName, - ColumnFamilyOptions())); - column_families.push_back( - ColumnFamilyDescriptor("two", ColumnFamilyOptions())); - ASSERT_OK(ReOpenNoDelete(column_families, &handles)); - - if (write_after_recovery) { - // Write data to the log right after the corrupted log - ASSERT_OK(db->Put(write_options, "foo5", large_value)); - } + FlushOptions flush_ops; + db->Flush(flush_ops); + // Now we have a log that cannot be deleted + + ASSERT_OK(db->Put(write_options, cf_handle, "foo1", "bar1")); + // Flush only the 2nd cf + db->Flush(flush_ops, cf_handle); + + // The value is large enough to be touched by the corruption we ingest + // below. + std::string large_value(400, ' '); + // key/value not touched by corruption + ASSERT_OK(db->Put(write_options, "foo2", "bar2")); + // key/value touched by corruption + ASSERT_OK(db->Put(write_options, "foo3", large_value)); + // key/value not touched by corruption + ASSERT_OK(db->Put(write_options, "foo4", "bar4")); + + db->FlushWAL(true); + DBImpl* db_impl = reinterpret_cast(db->GetRootDB()); + uint64_t wal_file_id = db_impl->TEST_LogfileNumber(); + std::string fname = LogFileName(dbname, wal_file_id); + reinterpret_cast(db)->TEST_Crash(); + delete txn; + delete cf_handle; + delete db; + db = nullptr; + + // Corrupt the last log file in the middle, so that it is not corrupted + // in the tail. + std::string file_content; + ASSERT_OK(ReadFileToString(env, fname, &file_content)); + file_content[400] = 'h'; + file_content[401] = 'a'; + ASSERT_OK(env->DeleteFile(fname)); + ASSERT_OK(WriteStringToFile(env, file_content, fname, true)); + + // Recover from corruption + std::vector handles; + std::vector column_families; + column_families.push_back(ColumnFamilyDescriptor(kDefaultColumnFamilyName, + ColumnFamilyOptions())); + column_families.push_back( + ColumnFamilyDescriptor("two", ColumnFamilyOptions())); + ASSERT_OK(ReOpenNoDelete(column_families, &handles)); + + if (write_after_recovery) { + // Write data to the log right after the corrupted log + ASSERT_OK(db->Put(write_options, "foo5", large_value)); + } - // Persist data written to WAL during recovery or by the last Put - db->FlushWAL(true); - // 2nd crash to recover while having a valid log after the corrupted one. - ASSERT_OK(ReOpenNoDelete(column_families, &handles)); - assert(db != nullptr); - txn = db->GetTransactionByName("xid"); - ASSERT_TRUE(txn != nullptr); - ASSERT_OK(txn->Commit()); - delete txn; - for (auto handle : handles) { - delete handle; + // Persist data written to WAL during recovery or by the last Put + db->FlushWAL(true); + // 2nd crash to recover while having a valid log after the corrupted one. + ASSERT_OK(ReOpenNoDelete(column_families, &handles)); + assert(db != nullptr); + txn = db->GetTransactionByName("xid"); + ASSERT_TRUE(txn != nullptr); + ASSERT_OK(txn->Commit()); + delete txn; + for (auto handle : handles) { + delete handle; + } } } }