[ https://issues.apache.org/jira/browse/GEODE-5376?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16531928#comment-16531928 ]
Eric Shu commented on GEODE-5376: --------------------------------- Currently the client could not failover. The JTA would fail due to SynchronizationCommitConflictException, even though there is only one JTA. {noformat} org.apache.geode.test.dunit.RMIException: While invoking org.apache.geode.internal.cache.tx.ClientServerTxFailoverDistributedTest$$Lambda$28/1423983012.run in VM 3 running on Host 10.118.20.64 with 4 VMs at org.apache.geode.test.dunit.VM.invoke(VM.java:436) at org.apache.geode.test.dunit.VM.invoke(VM.java:405) at org.apache.geode.test.dunit.VM.invoke(VM.java:348) at org.apache.geode.internal.cache.tx.ClientServerTxFailoverDistributedTest.testJTACanFailoverAfterBeforeCompletionCall(ClientServerTxFailoverDistributedTest.java:113) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) at org.apache.geode.test.dunit.rules.AbstractDistributedTestRule$1.evaluate(AbstractDistributedTestRule.java:60) at org.apache.geode.test.dunit.rules.AbstractDistributedTestRule$1.evaluate(AbstractDistributedTestRule.java:60) at org.apache.geode.test.dunit.rules.AbstractDistributedTestRule$1.evaluate(AbstractDistributedTestRule.java:60) at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at org.junit.runner.JUnitCore.run(JUnitCore.java:137) at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68) at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47) at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242) at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70) Caused by: org.apache.geode.cache.client.ServerOperationException: remote server on 10.118.20.64(41619:loner):54245:d3dbf461: While performing a remote AFTER_COMPLETION at org.apache.geode.cache.client.internal.AbstractOp.processObjResponse(AbstractOp.java:291) at org.apache.geode.cache.client.internal.TXSynchronizationOp$Impl.processResponse(TXSynchronizationOp.java:113) at org.apache.geode.cache.client.internal.AbstractOp.processResponse(AbstractOp.java:225) at org.apache.geode.cache.client.internal.AbstractOp.attemptReadResponse(AbstractOp.java:210) at org.apache.geode.cache.client.internal.AbstractOp.attempt(AbstractOp.java:386) at org.apache.geode.cache.client.internal.ConnectionImpl.execute(ConnectionImpl.java:276) at org.apache.geode.cache.client.internal.pooling.PooledConnection.execute(PooledConnection.java:327) at org.apache.geode.cache.client.internal.OpExecutorImpl.executeWithPossibleReAuthentication(OpExecutorImpl.java:885) at org.apache.geode.cache.client.internal.OpExecutorImpl.executeOnServer(OpExecutorImpl.java:378) at org.apache.geode.cache.client.internal.OpExecutorImpl.executeWithServerAffinity(OpExecutorImpl.java:231) at org.apache.geode.cache.client.internal.OpExecutorImpl.execute(OpExecutorImpl.java:140) at org.apache.geode.cache.client.internal.OpExecutorImpl.execute(OpExecutorImpl.java:127) at org.apache.geode.cache.client.internal.PoolImpl.execute(PoolImpl.java:782) at org.apache.geode.cache.client.internal.OpExecutorImpl.executeWithServerAffinity(OpExecutorImpl.java:275) at org.apache.geode.cache.client.internal.OpExecutorImpl.execute(OpExecutorImpl.java:140) at org.apache.geode.cache.client.internal.OpExecutorImpl.execute(OpExecutorImpl.java:127) at org.apache.geode.cache.client.internal.PoolImpl.execute(PoolImpl.java:782) at org.apache.geode.cache.client.internal.TXSynchronizationOp.execute(TXSynchronizationOp.java:48) at org.apache.geode.cache.client.internal.ServerRegionProxy.afterCompletion(ServerRegionProxy.java:808) at org.apache.geode.internal.cache.tx.ClientTXStateStub.afterCompletion(ClientTXStateStub.java:229) at org.apache.geode.internal.cache.tx.ClientServerTxFailoverDistributedTest.doAfterCompletion(ClientServerTxFailoverDistributedTest.java:183) at org.apache.geode.internal.cache.tx.ClientServerTxFailoverDistributedTest.lambda$testJTACanFailoverAfterBeforeCompletionCall$58e057d1$1(ClientServerTxFailoverDistributedTest.java:113) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at hydra.MethExecutor.executeObject(MethExecutor.java:244) at org.apache.geode.test.dunit.standalone.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:70) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357) at sun.rmi.transport.Transport$1.run(Transport.java:200) at sun.rmi.transport.Transport$1.run(Transport.java:197) at java.security.AccessController.doPrivileged(Native Method) at sun.rmi.transport.Transport.serviceCall(Transport.java:196) at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:568) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:826) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:683) at java.security.AccessController.doPrivileged(Native Method) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:682) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.geode.cache.SynchronizationCommitConflictException: Conflict detected in GemFire transaction TXId: 10.118.20.64(41619:loner):54245:d3dbf461:1, caused by org.apache.geode.cache.CommitConflictException: The key 1 in region /__PR/_B__ClientServerTxFailoverDistributedTest__testJTACanFailoverAfterBeforeCompletionCall__region_1 was being modified by another transaction locally. at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXState.beforeCompletion(TXState.java:1048) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXStateProxyImpl.beforeCompletion(TXStateProxyImpl.java:484) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.JtaBeforeCompletionMessage.operateOnTx(JtaBeforeCompletionMessage.java:70) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXMessage.process(TXMessage.java:94) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:378) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.DistributionMessage.schedule(DistributionMessage.java:436) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.scheduleIncomingMessage(ClusterDistributionManager.java:3249) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.handleIncomingDMsg(ClusterDistributionManager.java:2911) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.access$1500(ClusterDistributionManager.java:109) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.messageReceived(ClusterDistributionManager.java:4035) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.dispatchMessage(GMSMembershipManager.java:1120) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.handleOrDeferMessage(GMSMembershipManager.java:1039) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$MyDCReceiver.messageReceived(GMSMembershipManager.java:402) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.direct.DirectChannel.receive(DirectChannel.java:728) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.TCPConduit.messageReceived(TCPConduit.java:868) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.dispatchMessage(Connection.java:3965) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3551) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1827) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.run(Connection.java:1688) at Remote Member '10.118.20.64(41618)<v1>:32770' in java.lang.Thread.run(Thread.java:748) at org.apache.geode.distributed.internal.ReplyException.handleCause(ReplyException.java:87) at org.apache.geode.internal.cache.TXStateStub.beforeCompletion(TXStateStub.java:96) at org.apache.geode.internal.cache.TXStateProxyImpl.beforeCompletion(TXStateProxyImpl.java:484) at org.apache.geode.internal.cache.tier.sockets.command.TXSynchronizationCommand.cmdExecute(TXSynchronizationCommand.java:235) at org.apache.geode.internal.cache.tier.sockets.BaseCommand.execute(BaseCommand.java:164) at org.apache.geode.internal.cache.tier.sockets.ServerConnection.doNormalMsg(ServerConnection.java:869) at org.apache.geode.internal.cache.tier.sockets.OriginalServerConnection.doOneMessage(OriginalServerConnection.java:77) at org.apache.geode.internal.cache.tier.sockets.ServerConnection.run(ServerConnection.java:1248) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at org.apache.geode.internal.cache.tier.sockets.AcceptorImpl$4$1.run(AcceptorImpl.java:644) ... 1 more Caused by: org.apache.geode.cache.CommitConflictException: The key 1 in region /__PR/_B__ClientServerTxFailoverDistributedTest__testJTACanFailoverAfterBeforeCompletionCall__region_1 was being modified by another transaction locally. at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXReservationMgr.checkSetForConflict(TXReservationMgr.java:107) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXReservationMgr.checkForConflict(TXReservationMgr.java:77) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXReservationMgr.makeReservation(TXReservationMgr.java:56) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXLockRequest.txLocalLock(TXLockRequest.java:151) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXLockRequest.obtain(TXLockRequest.java:84) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXState.reserveAndCheck(TXState.java:335) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXState.beforeCompletion(TXState.java:1009) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXStateProxyImpl.beforeCompletion(TXStateProxyImpl.java:484) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.JtaBeforeCompletionMessage.operateOnTx(JtaBeforeCompletionMessage.java:70) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.cache.TXMessage.process(TXMessage.java:94) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:378) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.DistributionMessage.schedule(DistributionMessage.java:436) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.scheduleIncomingMessage(ClusterDistributionManager.java:3249) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.handleIncomingDMsg(ClusterDistributionManager.java:2911) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager.access$1500(ClusterDistributionManager.java:109) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.messageReceived(ClusterDistributionManager.java:4035) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.dispatchMessage(GMSMembershipManager.java:1120) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.handleOrDeferMessage(GMSMembershipManager.java:1039) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$MyDCReceiver.messageReceived(GMSMembershipManager.java:402) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.distributed.internal.direct.DirectChannel.receive(DirectChannel.java:728) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.TCPConduit.messageReceived(TCPConduit.java:868) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.dispatchMessage(Connection.java:3965) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.processNIOBuffer(Connection.java:3551) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.runNioReader(Connection.java:1827) at Remote Member '10.118.20.64(41618)<v1>:32770' in org.apache.geode.internal.tcp.Connection.run(Connection.java:1688) at Remote Member '10.118.20.64(41618)<v1>:32770' in java.lang.Thread.run(Thread.java:748) {noformat} > In JTA, Geode afterCompletion may fail even though the JTA host still > available > ------------------------------------------------------------------------------- > > Key: GEODE-5376 > URL: https://issues.apache.org/jira/browse/GEODE-5376 > Project: Geode > Issue Type: Bug > Components: transactions > Reporter: Eric Shu > Priority: Major > > When JTA is hosted on a server S2, and there is another server S1 handles the > client JTA request. After the S1 executed JTA beforeCompletion initiated from > client, the S2's P2P reader thread for S1 will hold the locks for the JTA. If > at the time, S1 is lost/shut down, client will failover to another server to > try afterCompletion. The afterCompletion attempt will fail as the new thread > does not hold the lock. This will cause the client JTA to fail due to > CommitConflict, even though there is not concurrent commit operations. -- This message was sent by Atlassian JIRA (v7.6.3#76005)