This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 83c97c0e35b8bd7a2f871e18fec685355dbbbd9b Author: Andrew Wong <aw...@cloudera.com> AuthorDate: Tue Apr 6 21:34:54 2021 -0700 [txn_participant-test] deflake TxnParticipantTest.TestConcurrentOps The test started failing after 6be9794f91a2eebb291e4db2daf63a0f12c3ae2a since one of the tested invariants is no longer true: in a randomly-ordered sequence of participant ops, the BEGIN_TXN op is no longer guaranteed to succeed. Instead, either it or the ABORT_TXN op must have succeeded. This test failed pretty quickly when looping it. With this fix, I looped it 1000 times and it passed. Change-Id: I58776295a359922a1b72c4286bd8b78f36ea50bd Reviewed-on: http://gerrit.cloudera.org:8080/17278 Reviewed-by: Alexey Serbin <aser...@cloudera.com> Tested-by: Andrew Wong <aw...@cloudera.com> --- src/kudu/tablet/txn_participant-test.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/kudu/tablet/txn_participant-test.cc b/src/kudu/tablet/txn_participant-test.cc index 4e54f31..f21b698 100644 --- a/src/kudu/tablet/txn_participant-test.cc +++ b/src/kudu/tablet/txn_participant-test.cc @@ -50,6 +50,7 @@ #include "kudu/consensus/raft_consensus.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/ref_counted.h" +#include "kudu/gutil/strings/substitute.h" #include "kudu/tablet/ops/op.h" #include "kudu/tablet/ops/op_driver.h" #include "kudu/tablet/ops/op_tracker.h" @@ -83,6 +84,7 @@ using std::string; using std::thread; using std::unique_ptr; using std::vector; +using strings::Substitute; DECLARE_bool(enable_maintenance_manager); DECLARE_bool(log_preallocate_segments); @@ -410,22 +412,27 @@ TEST_F(TxnParticipantTest, TestConcurrentOps) { const auto status_for_op = [&] (ParticipantOpPB::ParticipantOpType type) { return statuses[FindOrDie(kIndexByOps, type)]; }; - // Regardless of order, we should have been able to begin the transaction. - ASSERT_OK(status_for_op(ParticipantOpPB::BEGIN_TXN)); + // The only way we could have failed to begin a transaction is if we + // replicated an ABORT_TXN first. + ASSERT_TRUE(status_for_op(ParticipantOpPB::BEGIN_TXN).ok() || + status_for_op(ParticipantOpPB::ABORT_TXN).ok()) << + Substitute("BEGIN_TXN error: $0, ABORT_TXN error: $1", + status_for_op(ParticipantOpPB::BEGIN_TXN).ToString(), + status_for_op(ParticipantOpPB::ABORT_TXN).ToString()); // If we finalized the commit, we must not have been able to abort. if (status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok()) { ASSERT_EQ(vector<TxnParticipant::TxnEntry>({ { kTxnId, kCommitted, kDummyCommitTimestamp }, }), txn_participant()->GetTxnsForTests()); - ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::ABORT_TXN)].ok()); + ASSERT_FALSE(status_for_op(ParticipantOpPB::ABORT_TXN).ok()); // If we aborted the commit, we could not have finalized the commit. } else if (status_for_op(ParticipantOpPB::ABORT_TXN).ok()) { ASSERT_EQ(vector<TxnParticipant::TxnEntry>({ { kTxnId, kAborted, -1 }, }), txn_participant()->GetTxnsForTests()); - ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::FINALIZE_COMMIT)].ok()); + ASSERT_FALSE(status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok()); // If we neither aborted nor finalized, but we began to commit, we should be // left with the commit in progress.