From 0547cecb8190b09089cfa28ce8facdb4be41b115 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Wed, 2 Nov 2022 14:54:58 -0700 Subject: [PATCH] Reduce access to atomic variables in a test (#10909) Summary: With TSAN build on CircleCI (see mini-tsan in .circleci/config). Sometimes `SeqAdvanceConcurrentTest.SeqAdvanceConcurrent` will get stuck when an experimental feature called "unordered write" is enabled. Stack trace will be the following ``` Thread 7 (Thread 0x7f2284a1c700 (LWP 481523) "write_prepared_"): #0 0x00000000004fa3f5 in __tsan_atomic64_load () at ./db/merge_context.h:15 https://github.com/facebook/rocksdb/issues/1 0x00000000005e5942 in std::__atomic_base::load (this=0x7b74000012f8, __m=std::memory_order_seq_cst) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:481 https://github.com/facebook/rocksdb/issues/2 std::__atomic_base::operator unsigned long (this=0x7b74000012f8) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:341 https://github.com/facebook/rocksdb/issues/3 0x00000000005bf001 in rocksdb::SeqAdvanceConcurrentTest_SeqAdvanceConcurrent_Test::TestBody()::$_9::operator()(void*) const (this=0x7b14000085e8) at utilities/transactions/write_prepared_transaction_test.cc:1702 Thread 6 (Thread 0x7f228421b700 (LWP 481521) "write_prepared_"): #0 0x000000000052178c in __tsan::MetaMap::GetAndLock(__tsan::ThreadState*, unsigned long, unsigned long, bool, bool) () at ./db/merge_context.h:15 https://github.com/facebook/rocksdb/issues/1 0x00000000004fa48e in __tsan_atomic64_load () at ./db/merge_context.h:15 https://github.com/facebook/rocksdb/issues/2 0x00000000005e5942 in std::__atomic_base::load (this=0x7b74000012f8, __m=std::memory_order_seq_cst) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:481 https://github.com/facebook/rocksdb/issues/3 std::__atomic_base::operator unsigned long (this=0x7b74000012f8) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:341 https://github.com/facebook/rocksdb/issues/4 0x00000000005bf001 in rocksdb::SeqAdvanceConcurrentTest_SeqAdvanceConcurrent_Test::TestBody()::$_9::operator()(void*) const (this=0x7b14000085e8) at utilities/transactions/write_prepared_transaction_test.cc:1702 ``` This is problematic and suspicious. Two threads will get stuck in the same place trying to load from an atomic variable. https://github.com/facebook/rocksdb/blob/7.8.fb/utilities/transactions/write_prepared_transaction_test.cc#L1694:L1707. Not sure why two threads can reach the same point. The stack trace shows that there may be a deadlock, since the two threads are on the same write thread (one is doing Prepare, while the other is trying to commit). Pull Request resolved: https://github.com/facebook/rocksdb/pull/10909 Test Plan: On CircleCI mini-tsan, apply a patch first so that we have a higher chance of hitting the same problematic situation, ``` diff --git a/utilities/transactions/write_prepared_transaction_test.cc b/utilities/transactions/write_prepared_transaction_test.cc index 4bc1f3744..bd5dc4924 100644 --- a/utilities/transactions/write_prepared_transaction_test.cc +++ b/utilities/transactions/write_prepared_transaction_test.cc @@ -1714,13 +1714,13 @@ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrent) { size_t d = (n % base[bi + 1]) / base[bi]; switch (d) { case 0: - threads.emplace_back(txn_t0, bi); + threads.emplace_back(txn_t3, bi); break; case 1: - threads.emplace_back(txn_t1, bi); + threads.emplace_back(txn_t3, bi); break; case 2: - threads.emplace_back(txn_t2, bi); + threads.emplace_back(txn_t3, bi); break; case 3: threads.emplace_back(txn_t3, bi); ``` then build and run tests ``` COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check gtest-parallel -r 100 ./write_prepared_transaction_test --gtest_filter=TwoWriteQueues/SeqAdvanceConcurrentTest.SeqAdvanceConcurrent/19 ``` In the above, `SeqAdvanceConcurrent/19`. The tests 10 to 19 correspond to unordered write in which Prepare() and Commit() can both enter the same write thread. Before this PR, there is a high chance of hitting the deadlock. With this PR, no deadlock has been encountered so far. Reviewed By: ltamasi Differential Revision: D40869387 Pulled By: riversand963 fbshipit-source-id: 81e82a70c263e4f3417597a201b081ee54f1deab --- .../write_prepared_transaction_test.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/utilities/transactions/write_prepared_transaction_test.cc b/utilities/transactions/write_prepared_transaction_test.cc index ce1d7803f..4bc1f3744 100644 --- a/utilities/transactions/write_prepared_transaction_test.cc +++ b/utilities/transactions/write_prepared_transaction_test.cc @@ -1684,22 +1684,23 @@ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrent) { expected_commits = 0; std::vector threads; - linked = 0; + linked.store(0, std::memory_order_release); std::atomic batch_formed(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::EnterAsBatchGroupLeader:End", [&](void* /*arg*/) { batch_formed = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:Wait", [&](void* /*arg*/) { - linked++; - if (linked == 1) { + size_t orig_linked = linked.fetch_add(1, std::memory_order_acq_rel); + if (orig_linked == 0) { // Wait until the others are linked too. - while (linked < first_group_size) { + while (linked.load(std::memory_order_acquire) < first_group_size) { } - } else if (linked == 1 + first_group_size) { + } else if (orig_linked == first_group_size) { // Make the 2nd batch of the rest of writes plus any followup // commits from the first batch - while (linked < txn_cnt + commit_writes) { + while (linked.load(std::memory_order_acquire) < + txn_cnt + commit_writes) { } } // Then we will have one or more batches consisting of follow-up @@ -1731,14 +1732,15 @@ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrent) { FAIL(); } // wait to be linked - while (linked.load() <= bi) { + while (linked.load(std::memory_order_acquire) <= bi) { } // after a queue of size first_group_size if (bi + 1 == first_group_size) { while (!batch_formed) { } // to make it more deterministic, wait until the commits are linked - while (linked.load() <= bi + expected_commits) { + while (linked.load(std::memory_order_acquire) <= + bi + expected_commits) { } } }