[ https://issues.apache.org/jira/browse/IGNITE-8784?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dmitriy Pavlov updated IGNITE-8784: ----------------------------------- Fix Version/s: (was: 2.6) 2.7 > Deadlock during simultaneous client reconnect and node stop > ----------------------------------------------------------- > > Key: IGNITE-8784 > URL: https://issues.apache.org/jira/browse/IGNITE-8784 > Project: Ignite > Issue Type: Bug > Components: cache > Affects Versions: 2.5 > Reporter: Pavel Kovalenko > Priority: Critical > Fix For: 2.7 > > > {noformat} > [18:48:22,665][ERROR][tcp-client-disco-msg-worker-#467%client%][IgniteKernal%client] > Failed to reconnect, will stop node > class org.apache.ignite.IgniteException: Failed to wait for local node joined > event (grid is stopping). > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.localJoin(GridDiscoveryManager.java:2193) > at > org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStart(GridCachePartitionExchangeManager.java:583) > at > org.apache.ignite.internal.processors.cache.GridCacheSharedContext.onReconnected(GridCacheSharedContext.java:396) > at > org.apache.ignite.internal.processors.cache.GridCacheProcessor.onReconnected(GridCacheProcessor.java:1159) > at > org.apache.ignite.internal.IgniteKernal.onReconnected(IgniteKernal.java:3915) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery0(GridDiscoveryManager.java:830) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery(GridDiscoveryManager.java:589) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2423) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2402) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processNodeAddFinishedMessage(ClientImpl.java:2047) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processDiscoveryMessage(ClientImpl.java:1896) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.body(ClientImpl.java:1788) > at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62) > Caused by: class org.apache.ignite.IgniteCheckedException: Failed to wait for > local node joined event (grid is stopping). > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.onKernalStop0(GridDiscoveryManager.java:1657) > at > org.apache.ignite.internal.managers.GridManagerAdapter.onKernalStop(GridManagerAdapter.java:652) > at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2218) > at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2166) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2588) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551) > at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372) > at org.apache.ignite.Ignition.stop(Ignition.java:229) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1088) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1128) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1109) > at > org.gridgain.grid.internal.processors.cache.database.IgniteDbSnapshotNotStableTopologiesTest.afterTest(IgniteDbSnapshotNotStableTopologiesTest.java:250) > at > org.apache.ignite.testframework.junits.GridAbstractTest.tearDown(GridAbstractTest.java:1694) > at > org.apache.ignite.testframework.junits.common.GridCommonAbstractTest.tearDown(GridCommonAbstractTest.java:492) > at junit.framework.TestCase.runBare(TestCase.java:146) > at junit.framework.TestResult$1.protect(TestResult.java:122) > at junit.framework.TestResult.runProtected(TestResult.java:142) > at junit.framework.TestResult.run(TestResult.java:125) > at junit.framework.TestCase.run(TestCase.java:129) > at junit.framework.TestSuite.runTest(TestSuite.java:255) > at junit.framework.TestSuite.run(TestSuite.java:250) > at junit.framework.TestSuite.runTest(TestSuite.java:255) > at junit.framework.TestSuite.run(TestSuite.java:250) > at > org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:84) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:369) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:275) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:239) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:160) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.maven.surefire.util.ReflectionUtils.invokeMethodWithArray2(ReflectionUtils.java:206) > at > org.apache.maven.surefire.booter.ProviderFactory$ProviderProxy.invoke(ProviderFactory.java:160) > at > org.apache.maven.surefire.booter.ProviderFactory.invokeProvider(ProviderFactory.java:83) > at > org.apache.maven.plugin.surefire.InPluginVMSurefireStarter.runSuitesInProcess(InPluginVMSurefireStarter.java:84) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeProvider(AbstractSurefireMojo.java:1107) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeAfterPreconditionsChecked(AbstractSurefireMojo.java:954) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.execute(AbstractSurefireMojo.java:832) > at > org.apache.maven.plugin.DefaultBuildPluginManager.executeMojo(DefaultBuildPluginManager.java:137) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:208) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:154) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:146) > at > org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:117) > at > org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:81) > at > org.apache.maven.lifecycle.internal.builder.singlethreaded.SingleThreadedBuilder.build(SingleThreadedBuilder.java:56) > at > org.apache.maven.lifecycle.internal.LifecycleStarter.execute(LifecycleStarter.java:128) > at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:305) > at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:192) > at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:105) > at org.apache.maven.cli.MavenCli.execute(MavenCli.java:956) > at org.apache.maven.cli.MavenCli.doMain(MavenCli.java:290) > at org.apache.maven.cli.MavenCli.main(MavenCli.java:194) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.codehaus.plexus.classworlds.launcher.Launcher.launchEnhanced(Launcher.java:289) > at > org.codehaus.plexus.classworlds.launcher.Launcher.launch(Launcher.java:229) > at > org.codehaus.plexus.classworlds.launcher.Launcher.mainWithExitCode(Launcher.java:415) > at > org.codehaus.plexus.classworlds.launcher.Launcher.main(Launcher.java:356) > {noformat} > {noformat} > "main" #1 prio=5 os_prio=0 tid=0x00007fd36000e800 nid=0x53c2 in Object.wait() > [0x00007fd3680df000] > java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > at java.lang.Thread.join(Thread.java:1252) > - locked <0x00000007aacf7fd0> (a > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker) > at java.lang.Thread.join(Thread.java:1326) > at > org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4608) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl.spiStop(ClientImpl.java:313) > at > org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStop(TcpDiscoverySpi.java:2079) > at > org.apache.ignite.internal.managers.GridManagerAdapter.stopSpi(GridManagerAdapter.java:330) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.stop(GridDiscoveryManager.java:1675) > at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2288) > at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2166) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2588) > - locked <0x00000007aacf8170> (a > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551) > at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372) > at org.apache.ignite.Ignition.stop(Ignition.java:229) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1088) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1128) > at > org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1109) > at > org.gridgain.grid.internal.processors.cache.database.IgniteDbSnapshotNotStableTopologiesTest.afterTest(IgniteDbSnapshotNotStableTopologiesTest.java:250) > at > org.apache.ignite.testframework.junits.GridAbstractTest.tearDown(GridAbstractTest.java:1694) > at > org.apache.ignite.testframework.junits.common.GridCommonAbstractTest.tearDown(GridCommonAbstractTest.java:492) > at junit.framework.TestCase.runBare(TestCase.java:146) > at junit.framework.TestResult$1.protect(TestResult.java:122) > at junit.framework.TestResult.runProtected(TestResult.java:142) > at junit.framework.TestResult.run(TestResult.java:125) > at junit.framework.TestCase.run(TestCase.java:129) > at junit.framework.TestSuite.runTest(TestSuite.java:255) > at junit.framework.TestSuite.run(TestSuite.java:250) > at junit.framework.TestSuite.runTest(TestSuite.java:255) > at junit.framework.TestSuite.run(TestSuite.java:250) > at > org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:84) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:369) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:275) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:239) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:160) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.maven.surefire.util.ReflectionUtils.invokeMethodWithArray2(ReflectionUtils.java:206) > at > org.apache.maven.surefire.booter.ProviderFactory$ProviderProxy.invoke(ProviderFactory.java:160) > at > org.apache.maven.surefire.booter.ProviderFactory.invokeProvider(ProviderFactory.java:83) > at > org.apache.maven.plugin.surefire.InPluginVMSurefireStarter.runSuitesInProcess(InPluginVMSurefireStarter.java:84) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeProvider(AbstractSurefireMojo.java:1107) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeAfterPreconditionsChecked(AbstractSurefireMojo.java:954) > at > org.apache.maven.plugin.surefire.AbstractSurefireMojo.execute(AbstractSurefireMojo.java:832) > at > org.apache.maven.plugin.DefaultBuildPluginManager.executeMojo(DefaultBuildPluginManager.java:137) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:208) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:154) > at > org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:146) > at > org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:117) > at > org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:81) > at > org.apache.maven.lifecycle.internal.builder.singlethreaded.SingleThreadedBuilder.build(SingleThreadedBuilder.java:56) > at > org.apache.maven.lifecycle.internal.LifecycleStarter.execute(LifecycleStarter.java:128) > at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:305) > at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:192) > at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:105) > at org.apache.maven.cli.MavenCli.execute(MavenCli.java:956) > at org.apache.maven.cli.MavenCli.doMain(MavenCli.java:290) > at org.apache.maven.cli.MavenCli.main(MavenCli.java:194) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.codehaus.plexus.classworlds.launcher.Launcher.launchEnhanced(Launcher.java:289) > at > org.codehaus.plexus.classworlds.launcher.Launcher.launch(Launcher.java:229) > at > org.codehaus.plexus.classworlds.launcher.Launcher.mainWithExitCode(Launcher.java:415) > at > org.codehaus.plexus.classworlds.launcher.Launcher.main(Launcher.java:356) > {noformat} > {noformat} > "tcp-client-disco-msg-worker-#467%client%" #8800 prio=5 os_prio=0 > tid=0x00007fd362555000 nid=0x76df waiting for monitor entry > [0x00007fd1cb6f7000] > java.lang.Thread.State: BLOCKED (on object monitor) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2559) > - waiting to lock <0x00000007aacf8170> (a > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551) > at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372) > at org.apache.ignite.Ignition.stop(Ignition.java:229) > at org.apache.ignite.internal.IgniteKernal.close(IgniteKernal.java:3487) > at > org.apache.ignite.internal.IgniteKernal.onReconnected(IgniteKernal.java:3974) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery0(GridDiscoveryManager.java:830) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery(GridDiscoveryManager.java:589) > - locked <0x00000007a9caf4e8> (a java.lang.Object) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2423) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2402) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processNodeAddFinishedMessage(ClientImpl.java:2047) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processDiscoveryMessage(ClientImpl.java:1896) > at > org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.body(ClientImpl.java:1788) > at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62) > {noformat} > Possible solution: > Complete local join future at DiscoveryManager with NodeStoppingException and > don't invoke close() if node is already stopping. -- This message was sent by Atlassian JIRA (v7.6.3#76005)