This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new c2e5373  [txns] fix race between tasks and destructor
c2e5373 is described below

commit c2e5373493afe43766b0d5b6f04dfbcd340d633d
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Wed Apr 7 16:13:50 2021 -0700

    [txns] fix race between tasks and destructor
    
    It previously possible for a CommitTask to be destructed before
    completing the loop of scheduling all asynchronous tasks. This led to a
    race as seen below:
    
    WARNING: ThreadSanitizer: data race (pid=32435)
      Write of size 8 at 0x7b1c000ce2d8 by thread T105 (mutexes: write 
M424881254664896540):
        #0 std::__1::__vector_base<std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >, 
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > >::__destruct_at_end(std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >*) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:427:12
 (txn_commit-itest+0x576cb1)
        #1 std::__1::__vector_base<std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >, 
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > >::clear() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:369:29
 (txn_commit-itest+0x5770d1)
        #2 std::__1::__vector_base<std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >, 
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > >::~__vector_base() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:463:9
 (txn_commit-itest+0x59caf9)
        #3 std::__1::vector<std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >, 
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > >::~vector() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:555:5
 (libtransactions.so+0x8c2a0)
        #4 kudu::transactions::CommitTasks::~CommitTasks() 
../src/kudu/transactions/txn_status_manager.h:177:26 
(libtransactions.so+0xcce8b)
        #5 kudu::RefCountedThreadSafe<kudu::transactions::CommitTasks, 
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks> 
>::DeleteInternal(kudu::transactions::CommitTasks const*) 
../src/kudu/gutil/ref_counted.h:153:44 (libtransactions.so+0xcce1a)
        #6 
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks>::Destruct(kudu::transactions::CommitTasks
 const*) ../src/kudu/gutil/ref_counted.h:116:5 (libtransactions.so+0xccdc8)
        #7 kudu::RefCountedThreadSafe<kudu::transactions::CommitTasks, 
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks> 
>::Release() const ../src/kudu/gutil/ref_counted.h:144:7 
(libtransactions.so+0xccd70)
        #8 scoped_refptr<kudu::transactions::CommitTasks>::~scoped_refptr() 
../src/kudu/gutil/ref_counted.h:266:13 (libtransactions.so+0xbf785)
        #9 std::__1::pair<long const, 
scoped_refptr<kudu::transactions::CommitTasks> >::~pair() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/utility:315:29
 (libtransactions.so+0xc7652)
        #10 void 
std::__1::allocator_traits<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*> > 
>::__destroy<std::__1::pair<long const, 
scoped_refptr<kudu::transactions::CommitTasks> > 
>(std::__1::integral_constant<bool, false>, 
std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >&, 
std::__1::pair<long const, scoped_refptr<kud [...]
        #11 void 
std::__1::allocator_traits<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*> > 
>::destroy<std::__1::pair<long const, 
scoped_refptr<kudu::transactions::CommitTasks> > 
>(std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >&, 
std::__1::pair<long const, scoped_refptr<kudu::transactions::CommitTasks> >*) 
/data/3/aw [...]
        #12 
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*> > 
>::operator()(std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*>*) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__hash_table:844:13
 (libtransactions.so+0xc740d)
        #13 
std::__1::unique_ptr<std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*>, 
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*> > > 
>::reset(std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*>*) 
/data/3/awong/Repositories/kudu/thirdparty/installed [...]
        #14 
std::__1::unique_ptr<std::__1::__hash_node<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, void*>, 
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*> > > >::~unique_ptr() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/memory:2547:19
 (libtransactions.so+0xc6cbc)
        #15 std::__1::__hash_table<std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, 
std::__1::__unordered_map_hasher<long, std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, std::__1::hash<long>, true>, 
std::__1::__unordered_map_equal<long, std::__1::__hash_value_type<long, 
scoped_refptr<kudu::transactions::CommitTasks> >, std::__1::equal_to<long>, 
true>, std::__1::allocator<std::__1::__hash_value_type<long, scoped_refptr [...]
        #16 std::__1::unordered_map<long, 
scoped_refptr<kudu::transactions::CommitTasks>, std::__1::hash<long>, 
std::__1::equal_to<long>, std::__1::allocator<std::__1::pair<long const, 
scoped_refptr<kudu::transactions::CommitTasks> > > 
>::erase(std::__1::__hash_map_iterator<std::__1::__hash_iterator<std::__1::__hash_node<std::__1::__hash_value_type<long,
 scoped_refptr<kudu::transactions::CommitTasks> >, void*>*> >) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/u [...]
        #17 kudu::transactions::TxnStatusManager::RemoveCommitTask(long, 
kudu::transactions::CommitTasks const*) 
../src/kudu/transactions/txn_status_manager.h:433:26 
(libtransactions.so+0xbefc6)
        #18 kudu::transactions::CommitTasks::IsShuttingDownCleanupIfLastOp() 
../src/kudu/transactions/txn_status_manager.cc:181:28 
(libtransactions.so+0x97dea)
        #19 
kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2::operator()(kudu::Status
 const&) const ../src/kudu/transactions/txn_status_manager.cc:319:9 
(libtransactions.so+0xaefd6)
        #20 
decltype(std::__1::forward<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&>(fp)(std::__1::forward<kudu::Status
 const&>(fp0))) 
std::__1::__invoke<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
 kudu::Status 
const&>(kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&, 
kudu::Status const&) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/type_traits:3530:1
 (libtransactions.so+0xaeefd)
        #21 void 
std::__1::__invoke_void_return_wrapper<void>::__call<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
 kudu::Status 
const&>(kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&, 
kudu::Status const&) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__functional_base:348:9
 (libtransactions.so+0xaee3d)
        #22 
std::__1::__function::__alloc_func<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2,
 
std::__1::allocator<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2>,
 void (kudu::Status const&)>::operator()(kudu::Status const&) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1533:16
 (libtransactions.so+0xaedbd)
        #23 
std::__1::__function::__func<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2,
 
std::__1::allocator<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2>,
 void (kudu::Status const&)>::operator()(kudu::Status const&) 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1707:12
 (libtransactions.so+0xad06c)
        #24 std::__1::__function::__value_func<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1860:16
 (libmaster.so+0x32ca24)
        #25 std::__1::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:2419:12
 (libmaster.so+0x31d80b)
        #26 kudu::transactions::ParticipantRpc::Finish(kudu::Status const&) 
../src/kudu/transactions/participant_rpc.cc:227:3 (libtransactions.so+0x7f3e7)
        ...
    
      Previous read of size 8 at 0x7b1c000ce2d8 by thread T186 (mutexes: read 
M322424363142217872):
        #0 std::__1::vector<std::__1::basic_string<char, 
std::__1::char_traits<char>, std::__1::allocator<char> >, 
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > >::size() const 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:656:46
 (libtransactions.so+0x8d2f9)
        #1 kudu::transactions::CommitTasks::AbortTxnAsync() 
../src/kudu/transactions/txn_status_manager.cc:365:42 
(libtransactions.so+0x989d2)
        #2 kudu::transactions::TxnStatusManager::BeginAbortTransaction(long, 
boost::optional<std::__1::basic_string<char, std::__1::char_traits<char>, 
std::__1::allocator<char> > > const&, kudu::tserver::TabletServerErrorPB*) 
../src/kudu/transactions/txn_status_manager.cc:1219:25 
(libtransactions.so+0xa3cc6)
        #3 
kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3::operator()()
 const ../src/kudu/transactions/txn_status_manager.cc:378:3 
(libtransactions.so+0xb245d)
        #4 
decltype(std::__1::forward<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(fp)())
 
std::__1::__invoke<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&)
 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/type_traits:3530:1
 (libtransactions.so+0xb2180)
        #5 void 
std::__1::__invoke_void_return_wrapper<void>::__call<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&)
 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__functional_base:348:9
 (libtransactions.so+0xb20e0)
        #6 
std::__1::__function::__alloc_func<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3,
 
std::__1::allocator<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3>,
 void ()>::operator()() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1533:16
 (libtransactions.so+0xb2080)
        #7 
std::__1::__function::__func<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3,
 
std::__1::allocator<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3>,
 void ()>::operator()() 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1707:12
 (libtransactions.so+0xb042f)
        #8 std::__1::__function::__value_func<void ()>::operator()() const 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1860:16
 (libtserver_test_util.so+0x58396)
        #9 std::__1::function<void ()>::operator()() const 
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:2419:12
 (libtserver_test_util.so+0x58098)
        ...
    
    This patch fixes this by caching the size before iterating. Prior to
    this patch, the test failed in TSAN mode 3/100 times. With this patch,
    it passed 1000/1000 times.
    
    Change-Id: Ic974354b300f2a6c1b04505e740249273f33b80c
    Reviewed-on: http://gerrit.cloudera.org:8080/17283
    Reviewed-by: Alexey Serbin <aser...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/transactions/txn_status_manager.cc | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/kudu/transactions/txn_status_manager.cc 
b/src/kudu/transactions/txn_status_manager.cc
index 837272c..1a2e63e 100644
--- a/src/kudu/transactions/txn_status_manager.cc
+++ b/src/kudu/transactions/txn_status_manager.cc
@@ -361,8 +361,11 @@ void CommitTasks::AbortTxnAsync() {
   if (participant_ids_.empty()) {
     ScheduleFinalizeAbortTxnWrite();
   } else {
-    ops_in_flight_ = participant_ids_.size();
-    for (int i = 0; i < participant_ids_.size(); i++) {
+    // NOTE: the final AbortTxnAsyncTask() call may destruct this CommitTask
+    // and its members, so cache the participant ID size.
+    const auto participant_ids_size = participant_ids_.size();
+    ops_in_flight_ = participant_ids_size;
+    for (int i = 0; i < participant_ids_size; i++) {
       AbortTxnAsyncTask(i);
     }
   }
@@ -434,8 +437,11 @@ void CommitTasks::FinalizeCommitAsync() {
   // tasks to complete.
   auto old_val = ops_in_flight_.exchange(participant_ids_.size());
   DCHECK_EQ(0, old_val);
-  ops_in_flight_ = participant_ids_.size();
-  for (int i = 0; i < participant_ids_.size(); i++) {
+  // NOTE: the final FinalizeCommitAsyncTask() call may destruct this
+  // CommitTask and its members, so cache the participant ID size.
+  const auto participant_ids_size = participant_ids_.size();
+  ops_in_flight_ = participant_ids_size;
+  for (int i = 0; i < participant_ids_size; i++) {
     FinalizeCommitAsyncTask(i);
   }
 }
@@ -1010,10 +1016,14 @@ void CommitTasks::BeginCommitAsync() {
     // If there are some participants, schedule beginning commit tasks so
     // we can determine a finalized commit timestamp.
     //
+    // NOTE: the final BeginCommitAsyncTask() call may destruct this CommitTask
+    // and its members, so cache the participant ID size.
+    //
     // TODO(awong): consider an approach in which clients propagate
     // timestamps in such a way that the client's call to begin commit
     // includes the expected finalized commit timestamp.
-    for (int i = 0; i < participant_ids_.size(); i++) {
+    const auto participant_ids_size = participant_ids_.size();
+    for (int i = 0; i < participant_ids_size; i++) {
       BeginCommitAsyncTask(i);
     }
   }

Reply via email to