[ https://issues.apache.org/jira/browse/IGNITE-10785?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Vladimir Ozerov updated IGNITE-10785: ------------------------------------- Labels: Hanging transactions (was: Hanging mvcc_stabilization_stage_1 transactions) > MVCC: Grid can hang if transaction is failed to rollback > -------------------------------------------------------- > > Key: IGNITE-10785 > URL: https://issues.apache.org/jira/browse/IGNITE-10785 > Project: Ignite > Issue Type: Bug > Components: mvcc > Reporter: Roman Kondakov > Assignee: Roman Kondakov > Priority: Major > Labels: Hanging, transactions > Fix For: 2.8 > > > Sometimes grid can hang if transactions is failed to rollback. Reproducer: > > {noformat} > [2018-12-14 08:48:13,890][WARN > ][sys-stripe-9-#12552%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] > <test> Failed to acquire lock (transaction has been completed): > GridCacheVersion [topVer=156257270, order=1544777270268, nodeOrder=4] > [2018-12-14 > 08:48:13,893][ERROR][sys-stripe-9-#12552%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] > <test> Failed to rollback the transaction: GridDhtTxLocal > [nearNodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, > nearFutId=282d09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, nearMiniId=1, > nearFinFutId=382d09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, > nearFinMiniId=1, nearXidVer=GridCacheVersion [topVer=156257270, > order=1544777270268, nodeOrder=4], lb=null, super=GridDhtTxLocalAdapter > [nearOnOriginatingNode=false, nearNodes=KeySetView [], dhtNodes=KeySetView > [], explicitLock=false, super=IgniteTxLocalAdapter [completedBase=null, > sndTransformedVals=false, depEnabled=false, txState=IgniteTxStateImpl > [activeCacheIds=[], recovery=null, mvccEnabled=null, txMap=HashSet []], > mvccWaitTxs=null, qryEnlisted=false, forceSkipCompletedVers=false, > super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, > order=1544777270269, nodeOrder=3], writeVer=null, implicit=false, loc=true, > threadId=13949, startTime=1544777293884, > nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion > [topVer=156257270, order=1544777270269, nodeOrder=3], endVer=null, > isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, > sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion > [topVer=156257270, order=1544777270269, nodeOrder=3], finalizing=NONE, > invalidParts=null, state=ROLLED_BACK, timedOut=false, > topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], > mvccSnapshot=MvccSnapshotResponse [futId=82, crdVer=1544777266400, cntr=848, > opCntr=1, txs=[640, 769, 706, 387, 707, 708, 582, 646, 583, 586, 847, 596, > 660, 724, 661, 598, 599, 603, 667, 731, 685, 494, 686, 688, 561, 629, 570, > 766, 639], cleanupVer=263, tracking=0], parentTx=null, duration=0ms, > onePhaseCommit=false], size=0]]] > class org.apache.ignite.IgniteCheckedException: Failed to finish transaction > [commit=false, > tx=GridDhtTxLocal[xid=df386eba761-00000000-0950-4bf6-0000-000000000003, > xidVersion=GridCacheVersion [topVer=156257270, order=1544777270269, > nodeOrder=3], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, > state=ROLLED_BACK, invalidate=false, rollbackOnly=true, > nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, timeout=0, duration=0]] > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.finishTx(GridDhtTxLocal.java:482) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.rollbackDhtLocalAsync(GridDhtTxLocal.java:588) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxLocal.rollbackDhtLocal(GridDhtTxLocal.java:563) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.initTxTopologyVersion(GridDhtTransactionalCacheAdapter.java:2178) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.processNearTxEnlistRequest(GridDhtTransactionalCacheAdapter.java:2016) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter.access$900(GridDhtTransactionalCacheAdapter.java:112) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter$14.apply(GridDhtTransactionalCacheAdapter.java:229) > at > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTransactionalCacheAdapter$14.apply(GridDhtTransactionalCacheAdapter.java:227) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1127) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:592) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:391) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:317) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:108) > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:307) > at > org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1568) > at > org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1196) > at > org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:127) > at > org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1092) > at > org.apache.ignite.internal.util.StripedExecutor$Stripe.body(StripedExecutor.java:505) > at > org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120) > at java.lang.Thread.run(Thread.java:748) > [2018-12-14 08:48:19,967][WARN > ][sys-stripe-14-#12559%transactions.TxRollbackAsyncWithPersistenceTest2%][finish] > Received finish request for completed transaction (the message may be too > late) [txId=GridCacheVersion [topVer=156257270, order=1544777267977, > nodeOrder=2], dhtTxId=null, node=48407150-0d58-4e52-8be2-d91c26f00001, > commit=false] > [2018-12-14 08:48:19,969][WARN > ][sys-stripe-23-#12573%transactions.TxRollbackAsyncWithPersistenceTest2%][GridDhtColocatedCache] > <test> Failed to acquire lock (transaction has been completed): > GridCacheVersion [topVer=156257270, order=1544777267977, nodeOrder=2] > [2018-12-14 08:48:25,007][WARN ][sys-#12871%client%][GridNearTxLocal] The > transaction was forcibly rolled back because a timeout is reached: > GridNearTxLocal[xid=68586eba761-00000000-0950-4bf6-0000-000000000004, > xidVersion=GridCacheVersion [topVer=156257270, order=1544777270662, > nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, > state=ROLLED_BACK, invalidate=false, rollbackOnly=true, > nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=60, > label=null] > [2018-12-14 08:48:25,058][WARN ][sys-#12872%client%][GridNearTxLocal] The > transaction was forcibly rolled back because a timeout is reached: > GridNearTxLocal[xid=78586eba761-00000000-0950-4bf6-0000-000000000004, > xidVersion=GridCacheVersion [topVer=156257270, order=1544777270663, > nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, > state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, > nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=51, > label=null] > [2018-12-14 08:48:25,388][WARN ][sys-#12873%client%][GridNearTxLocal] The > transaction was forcibly rolled back because a timeout is reached: > GridNearTxLocal[xid=d9586eba761-00000000-0950-4bf6-0000-000000000004, > xidVersion=GridCacheVersion [topVer=156257270, order=1544777270685, > nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, > state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, > nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=50, > label=null] > [2018-12-14 08:48:25,442][WARN ][sys-#12874%client%][GridNearTxLocal] The > transaction was forcibly rolled back because a timeout is reached: > GridNearTxLocal[xid=e9586eba761-00000000-0950-4bf6-0000-000000000004, > xidVersion=GridCacheVersion [topVer=156257270, order=1544777270686, > nodeOrder=4], concurrency=PESSIMISTIC, isolation=REPEATABLE_READ, > state=MARKED_ROLLBACK, invalidate=false, rollbackOnly=true, > nodeId=b4ff2bcc-dc6a-49c2-a2ab-f26bae6b68c1, timeout=50, duration=51, > label=null] > [2018-12-14 08:49:46,433][WARN > ][grid-timeout-worker-#12482%transactions.TxRollbackAsyncWithPersistenceTest0%][diagnostic] > Found long running transaction [startTime=08:48:22.963, > curTime=08:49:46.418, tx=GridDhtTxRemote > [nearNodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, > rmtFutId=204f09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, > nearXidVer=GridCacheVersion [topVer=156257270, order=1544777270948, > nodeOrder=1], storeWriteThrough=false, super=GridDistributedTxRemoteAdapter > [explicitVers=null, started=true, commitAllowed=0, > txState=IgniteTxRemoteStateImpl [readMap=EmptyMap {}, > writeMap=ConcurrentLinkedHashMap {}], skipCompletedVers=false, txLbl=null, > super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, > order=1544777271199, nodeOrder=3], writeVer=null, implicit=false, loc=false, > threadId=13444, startTime=1544777302963, > nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion > [topVer=156257270, order=1544777270948, nodeOrder=1], endVer=null, > isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, > sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion > [topVer=156257270, order=1544777271199, nodeOrder=3], finalizing=NONE, > invalidParts=null, state=ACTIVE, timedOut=false, > topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], > mvccSnapshot=MvccSnapshotWithoutTxs [crdVer=1544777266400, cntr=1776, > cleanupVer=263, opCntr=0], parentTx=null, duration=83455ms, > onePhaseCommit=false]]]] > [2018-12-14 08:49:46,441][WARN > ][grid-timeout-worker-#12482%transactions.TxRollbackAsyncWithPersistenceTest0%][diagnostic] > Found long running transaction [startTime=08:48:22.943, > curTime=08:49:46.418, tx=GridNearTxLocal [mappings=IgniteTxMappingsImpl [], > nearLocallyMapped=false, colocatedLocallyMapped=false, needCheckBackup=null, > hasRemoteLocks=true, trackTimeout=false, lb=wLockTx, mvccTracker=null, > mvccOp=true, thread=tx-lock-thread-1, mappings=IgniteTxMappingsImpl [], > super=GridDhtTxLocalAdapter [nearOnOriginatingNode=false, > nearNodes=KeySetView [], dhtNodes=KeySetView [], explicitLock=false, > super=IgniteTxLocalAdapter [completedBase=null, sndTransformedVals=false, > depEnabled=false, txState=IgniteTxStateImpl [activeCacheIds=[3556498], > recovery=false, mvccEnabled=true, txMap=HashSet []], mvccWaitTxs=null, > qryEnlisted=true, forceSkipCompletedVers=false, super=IgniteTxAdapter > [xidVer=GridCacheVersion [topVer=156257270, order=1544777270948, > nodeOrder=1], writeVer=null, implicit=false, loc=true, threadId=13969, > startTime=1544777302943, nodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, > startVer=GridCacheVersion [topVer=156257270, order=1544777270948, > nodeOrder=1], endVer=null, isolation=REPEATABLE_READ, > concurrency=PESSIMISTIC, timeout=0, sysInvalidate=false, sys=false, plc=2, > commitVer=null, finalizing=NONE, invalidParts=null, state=ACTIVE, > timedOut=false, topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], > mvccSnapshot=MvccSnapshotResponse [futId=0, crdVer=1544777266400, cntr=1776, > opCntr=2, txs=[640, 769, 706, 387, 707, 1347, 708, 1348, 582, 646, 583, 1353, > 586, 907, 1547, 596, 660, 724, 661, 598, 599, 603, 667, 731, 1312, 996, 997, > 685, 494, 686, 688, 561, 629, 1015, 570, 1598, 639, 1599], cleanupVer=263, > tracking=387], parentTx=null, duration=83495ms, onePhaseCommit=false], > size=0]]]] > [2018-12-14 08:49:49,987][WARN > ][grid-timeout-worker-#12614%transactions.TxRollbackAsyncWithPersistenceTest1%][diagnostic] > Found long running transaction [startTime=08:48:22.963, > curTime=08:49:49.986, tx=GridDhtTxRemote > [nearNodeId=51b80ee2-85d7-41fd-8d6b-63dc99a00000, > rmtFutId=204f09ca761-8ff10d65-a1d6-4f40-9fd7-afb7afa0b25c, > nearXidVer=GridCacheVersion [topVer=156257270, order=1544777270948, > nodeOrder=1], storeWriteThrough=false, super=GridDistributedTxRemoteAdapter > [explicitVers=null, started=true, commitAllowed=0, > txState=IgniteTxRemoteStateImpl [readMap=EmptyMap {}, > writeMap=ConcurrentLinkedHashMap {}], skipCompletedVers=false, txLbl=null, > super=IgniteTxAdapter [xidVer=GridCacheVersion [topVer=156257270, > order=1544777271199, nodeOrder=3], writeVer=null, implicit=false, loc=false, > threadId=13554, startTime=1544777302963, > nodeId=cb0ed489-aa1c-4e7a-a196-3bea9e500002, startVer=GridCacheVersion > [topVer=156257270, order=1544777268079, nodeOrder=2], endVer=null, > isolation=REPEATABLE_READ, concurrency=PESSIMISTIC, timeout=0, > sysInvalidate=false, sys=false, plc=2, commitVer=GridCacheVersion > [topVer=156257270, order=1544777271199, nodeOrder=3], finalizing=NONE, > invalidParts=null, state=ACTIVE, timedOut=false, > topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0], > mvccSnapshot=MvccSnapshotWithoutTxs [crdVer=1544777266400, cntr=1776, > cleanupVer=263, opCntr=0], parentTx=null, duration=87023ms, > onePhaseCommit=false]]]] > Thread > [name="test-runner-#12427%transactions.TxRollbackAsyncWithPersistenceTest%", > id=13441, state=WAITING, blockCnt=5, waitCnt=54] > at sun.misc.Unsafe.park(Native Method) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) > at > o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:179) > at > o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:142) > at > o.a.i.i.processors.cache.transactions.TxRollbackAsyncTest.testSynchronousRollback0(TxRollbackAsyncTest.java:460) > at > o.a.i.i.processors.cache.transactions.TxRollbackAsyncTest.testSynchronousRollback(TxRollbackAsyncTest.java:339) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) > at > o.a.i.testframework.junits.GridAbstractTest$7.run(GridAbstractTest.java:2119) > at java.lang.Thread.run(Thread.java:748) > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)