Roman Kondakov created IGNITE-10785: ---------------------------------------
Summary: MVCC: Grid can hang if transactions is failed to rollback Key: IGNITE-10785 URL: https://issues.apache.org/jira/browse/IGNITE-10785 Project: Ignite Issue Type: Bug Components: mvcc Reporter: Roman Kondakov Sometimes grid can hang if transactions is failed to rollback. Reproducer: {noformat} [2018-12-14 08:48:13,890][WARN ][sys-stripe-9-#12552%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] <test> Failed to acquire lock (transaction has been completed): GridCacheVersion [topVer=156257270, order=1544777270268, nodeOrder=4] [2018-12-14 08:48:13,893][ERROR][sys-stripe-9-#12552%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] <test> Failed to rollback the transaction: GridDhtTxLocal [nearNodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, nearFutId=282d09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, nearMiniId=1, nearFinFutId=382d09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, nearFinMiniId=1, nearXidVer=GridCacheVersion [topVer=156257270, order=1544777270268, nodeOrder=4], lb=null, super=GridDhtTxLocalAdapter [nearOnOriginatingNode=false, nearNodes=KeySetView [], dhtNodes=KeySetView [], explicitLock=false, super=IgniteTxLocalAdapter [completedBase=null, sndTransformedVals=false, depEnabled=false, txState=IgniteTxStateImpl [activeCacheIds=[], recovery=null, mvccEnabled=null, txMap=HashSet []], mvccWaitTxs=null, qryEnlisted=false, forceSkipCompletedVers=false, super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, order=1544777270269, nodeOrder=3], writeVer=null, implicit=false, loc=true, threadId=13949, startTime=1544777293884, nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion [topVer=156257270, order=1544777270269, nodeOrder=3], endVer=null, isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion [topVer=156257270, order=1544777270269, nodeOrder=3], finalizing=NONE, invalidParts=null, state=ROLLED_BACK, timedOut=false, topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], mvccSnapshot=MvccSnapshotResponse [futId=82, crdVer=1544777266400, cntr=848, opCntr=1, txs=[640, 769, 706, 387, 707, 708, 582, 646, 583, 586, 847, 596, 660, 724, 661, 598, 599, 603, 667, 731, 685, 494, 686, 688, 561, 629, 570, 766, 639], cleanupVer=263, tracking=0], parentTx=null, duration=0ms, onePhaseCommit=false], size=0]]] class org.apache.ignite.IgniteCheckedException: Failed to finish transaction [commit=false, tx=GridDhtTxLocal[xid=df386eba761-00000000-0950-4bf6-0000-000000000003, xidVersion=GridCacheVersion [topVer=156257270, order=1544777270269, nodeOrder=3], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, state=ROLLED_BACK, invalidate=false, rollbackOnly=true, nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, timeout=0, duration=0]] at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.finishTx(GridDhtTxLocal.java:482) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.rollbackDhtLocalAsync(GridDhtTxLocal.java:588) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.rollbackDhtLocal(GridDhtTxLocal.java:563) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.initTxTopologyVersion(GridDhtTransactionalCacheAdapter.java:2178) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.processNearTxEnlistRequest(GridDhtTransactionalCacheAdapter.java:2016) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.access$900(GridDhtTransactionalCacheAdapter.java:112) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter$14.apply(GridDhtTransactionalCacheAdapter.java:229) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter$14.apply(GridDhtTransactionalCacheAdapter.java:227) at org.apache.ignite.internal.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1127) at org.apache.ignite.internal.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:592) at org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:391) at org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:317) at org.apache.ignite.internal.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:108) at org.apache.ignite.internal.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:307) at org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1568) at org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1196) at org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:127) at org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1092) at org.apache.ignite.internal.util.StripedExecutor$Stripe.body(StripedExecutor.java:505) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120) at java.lang.Thread.run(Thread.java:748) [2018-12-14 08:48:19,967][WARN ][sys-stripe-14-#12559%transactions.TxRollbackAsyncWithPersistenceTest2%][finish] Received finish request for completed transaction (the message may be too late) [txId=GridCacheVersion [topVer=156257270, order=1544777267977, nodeOrder=2], dhtTxId=null, node=48407150-0d58-4e52-8be2-d91c26f00001, commit=false] [2018-12-14 08:48:19,969][WARN ][sys-stripe-23-#12573%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] <test> Failed to acquire lock (transaction has been completed): GridCacheVersion [topVer=156257270, order=1544777267977, nodeOrder=2] [2018-12-14 08:48:25,007][WARN ][sys-#12871%client%][GridNearTxLocal] The transaction was forcibly rolled back because a timeout is reached: GridNearTxLocal[xid=68586eba761-00000000-0950-4bf6-0000-000000000004, xidVersion=GridCacheVersion [topVer=156257270, order=1544777270662, nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, state=ROLLED_BACK, invalidate=false, rollbackOnly=true, nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=60, label=null] [2018-12-14 08:48:25,058][WARN ][sys-#12872%client%][GridNearTxLocal] The transaction was forcibly rolled back because a timeout is reached: GridNearTxLocal[xid=78586eba761-00000000-0950-4bf6-0000-000000000004, xidVersion=GridCacheVersion [topVer=156257270, order=1544777270663, nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=51, label=null] [2018-12-14 08:48:25,388][WARN ][sys-#12873%client%][GridNearTxLocal] The transaction was forcibly rolled back because a timeout is reached: GridNearTxLocal[xid=d9586eba761-00000000-0950-4bf6-0000-000000000004, xidVersion=GridCacheVersion [topVer=156257270, order=1544777270685, nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=50, label=null] [2018-12-14 08:48:25,442][WARN ][sys-#12874%client%][GridNearTxLocal] The transaction was forcibly rolled back because a timeout is reached: GridNearTxLocal[xid=e9586eba761-00000000-0950-4bf6-0000-000000000004, xidVersion=GridCacheVersion [topVer=156257270, order=1544777270686, nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=51, label=null] [2018-12-14 08:49:46,433][WARN ][grid-timeout-worker-#12482%transactions.TxRollbackAsyncWithPersistenceTest0%][diagnostic] Found long running transaction [startTime=08:48:22.963, curTime=08:49:46.418, tx=GridDhtTxRemote [nearNodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, rmtFutId=204f09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, nearXidVer=GridCacheVersion [topVer=156257270, order=1544777270948, nodeOrder=1], storeWriteThrough=false, super=GridDistributedTxRemoteAdapter [explicitVers=null, started=true, commitAllowed=0, txState=IgniteTxRemoteStateImpl [readMap=EmptyMap {}, writeMap=ConcurrentLinkedHashMap {}], skipCompletedVers=false, txLbl=null, super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, order=1544777271199, nodeOrder=3], writeVer=null, implicit=false, loc=false, threadId=13444, startTime=1544777302963, nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion [topVer=156257270, order=1544777270948, nodeOrder=1], endVer=null, isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion [topVer=156257270, order=1544777271199, nodeOrder=3], finalizing=NONE, invalidParts=null, state=ACTIVE, timedOut=false, topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], mvccSnapshot=MvccSnapshotWithoutTxs [crdVer=1544777266400, cntr=1776, cleanupVer=263, opCntr=0], parentTx=null, duration=83455ms, onePhaseCommit=false]]]] [2018-12-14 08:49:46,441][WARN ][grid-timeout-worker-#12482%transactions.TxRollbackAsyncWithPersistenceTest0%][diagnostic] Found long running transaction [startTime=08:48:22.943, curTime=08:49:46.418, tx=GridNearTxLocal [mappings=IgniteTxMappingsImpl [], nearLocallyMapped=false, colocatedLocallyMapped=false, needCheckBackup=null, hasRemoteLocks=true, trackTimeout=false, lb=wLockTx, mvccTracker=null, mvccOp=true, thread=tx-lock-thread-1, mappings=IgniteTxMappingsImpl [], super=GridDhtTxLocalAdapter [nearOnOriginatingNode=false, nearNodes=KeySetView [], dhtNodes=KeySetView [], explicitLock=false, super=IgniteTxLocalAdapter [completedBase=null, sndTransformedVals=false, depEnabled=false, txState=IgniteTxStateImpl [activeCacheIds=[3556498], recovery=false, mvccEnabled=true, txMap=HashSet []], mvccWaitTxs=null, qryEnlisted=true, forceSkipCompletedVers=false, super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, order=1544777270948, nodeOrder=1], writeVer=null, implicit=false, loc=true, threadId=13969, startTime=1544777302943, nodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, startVer=GridCacheVersion [topVer=156257270, order=1544777270948, nodeOrder=1], endVer=null, isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, sysInvalidate=false, sys=false, plc=2, commitVer=null, finalizing=NONE, invalidParts=null, state=ACTIVE, timedOut=false, topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], mvccSnapshot=MvccSnapshotResponse [futId=0, crdVer=1544777266400, cntr=1776, opCntr=2, txs=[640, 769, 706, 387, 707, 1347, 708, 1348, 582, 646, 583, 1353, 586, 907, 1547, 596, 660, 724, 661, 598, 599, 603, 667, 731, 1312, 996, 997, 685, 494, 686, 688, 561, 629, 1015, 570, 1598, 639, 1599], cleanupVer=263, tracking=387], parentTx=null, duration=83495ms, onePhaseCommit=false], size=0]]]] [2018-12-14 08:49:49,987][WARN ][grid-timeout-worker-#12614%transactions.TxRollbackAsyncWithPersistenceTest1%][diagnostic] Found long running transaction [startTime=08:48:22.963, curTime=08:49:49.986, tx=GridDhtTxRemote [nearNodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, rmtFutId=204f09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, nearXidVer=GridCacheVersion [topVer=156257270, order=1544777270948, nodeOrder=1], storeWriteThrough=false, super=GridDistributedTxRemoteAdapter [explicitVers=null, started=true, commitAllowed=0, txState=IgniteTxRemoteStateImpl [readMap=EmptyMap {}, writeMap=ConcurrentLinkedHashMap {}], skipCompletedVers=false, txLbl=null, super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, order=1544777271199, nodeOrder=3], writeVer=null, implicit=false, loc=false, threadId=13554, startTime=1544777302963, nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion [topVer=156257270, order=1544777268079, nodeOrder=2], endVer=null, isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion [topVer=156257270, order=1544777271199, nodeOrder=3], finalizing=NONE, invalidParts=null, state=ACTIVE, timedOut=false, topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], mvccSnapshot=MvccSnapshotWithoutTxs [crdVer=1544777266400, cntr=1776, cleanupVer=263, opCntr=0], parentTx=null, duration=87023ms, onePhaseCommit=false]]]] Thread [name="test-runner-#12427%transactions.TxRollbackAsyncWithPersistenceTest%", id=13441, state=WAITING, blockCnt=5, waitCnt=54] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) at o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:179) at o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:142) at o.a.i.i.processors.cache.transactions.TxRollbackAsyncTest.testSynchronousRollback0(TxRollbackAsyncTest.java:460) at o.a.i.i.processors.cache.transactions.TxRollbackAsyncTest.testSynchronousRollback(TxRollbackAsyncTest.java:339) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) at o.a.i.testframework.junits.GridAbstractTest$7.run(GridAbstractTest.java:2119) at java.lang.Thread.run(Thread.java:748) {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)