[ https://issues.apache.org/jira/browse/IGNITE-6967?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alexandr Kuramshin updated IGNITE-6967: --------------------------------------- Description: With a service deployment when topology change occurs the discovery event listener calls {{GridServiceProcessor.reassign()}} causing to acquire a lock on utility cache (where the GridServiceAssignments stored) which prevents PME from completion. Stack traces: {noformat} Thread [name="test-runner-#186%service.IgniteServiceDynamicCachesSelfTest%", id=232, state=WAITING, blockCnt=0, waitCnt=8] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) at o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) at o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) at o.a.i.i.IgniteKernal.createCache(IgniteKernal.java:2841) at o.a.i.i.processors.service.IgniteServiceDynamicCachesSelfTest.testDeployCalledBeforeCacheStart(IgniteServiceDynamicCachesSelfTest.java:140) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at junit.framework.TestCase.runTest(TestCase.java:176) at o.a.i.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000) at o.a.i.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132) at o.a.i.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915) at java.lang.Thread.run(Thread.java:748) Thread [name="srvc-deploy-#38%service.IgniteServiceDynamicCachesSelfTest0%", id=56, state=WAITING, blockCnt=5, waitCnt=9] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) at o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) at o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) at o.a.i.i.processors.cache.GridCacheContext.awaitStarted(GridCacheContext.java:443) at o.a.i.i.processors.affinity.GridAffinityProcessor.affinityCache(GridAffinityProcessor.java:373) at o.a.i.i.processors.affinity.GridAffinityProcessor.keysToNodes(GridAffinityProcessor.java:347) at o.a.i.i.processors.affinity.GridAffinityProcessor.mapKeyToNode(GridAffinityProcessor.java:259) at o.a.i.i.processors.service.GridServiceProcessor.reassign(GridServiceProcessor.java:1163) at o.a.i.i.processors.service.GridServiceProcessor.access$2400(GridServiceProcessor.java:123) at o.a.i.i.processors.service.GridServiceProcessor$TopologyListener$1.run0(GridServiceProcessor.java:1763) at o.a.i.i.processors.service.GridServiceProcessor$DepRunnable.run(GridServiceProcessor.java:1976) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:748) Locked synchronizers: java.util.concurrent.ThreadPoolExecutor$Worker@27f723 {noformat} Problematic code: {noformat} org.apache.ignite.internal.processors.service.GridServiceProcessor#reassign try (GridNearTxLocal tx = cache.txStartEx(PESSIMISTIC, REPEATABLE_READ)) { GridServiceAssignmentsKey key = new GridServiceAssignmentsKey(cfg.getName()); GridServiceAssignments oldAssigns = (GridServiceAssignments)cache.get(key); Map<UUID, Integer> cnts = new HashMap<>(); if (affKey != null) { ClusterNode n = ctx.affinity().mapKeyToNode(cacheName, affKey, topVer); // WAIT HERE UNTIL PME FINISHED (INFINITELY) {noformat} was: With a service deployment when topology change occurs the discovery event listener calls {{GridServiceProcessor.reassign()}} causing to acquire a lock on utility cache (where the GridServiceAssignments stored) which prevents PME from completion. Stack traces: {{noformat}} Thread [name="test-runner-#186%service.IgniteServiceDynamicCachesSelfTest%", id=232, state=WAITING, blockCnt=0, waitCnt=8] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) at o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) at o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) at o.a.i.i.IgniteKernal.createCache(IgniteKernal.java:2841) at o.a.i.i.processors.service.IgniteServiceDynamicCachesSelfTest.testDeployCalledBeforeCacheStart(IgniteServiceDynamicCachesSelfTest.java:140) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at junit.framework.TestCase.runTest(TestCase.java:176) at o.a.i.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000) at o.a.i.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132) at o.a.i.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915) at java.lang.Thread.run(Thread.java:748) Thread [name="srvc-deploy-#38%service.IgniteServiceDynamicCachesSelfTest0%", id=56, state=WAITING, blockCnt=5, waitCnt=9] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) at o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) at o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) at o.a.i.i.processors.cache.GridCacheContext.awaitStarted(GridCacheContext.java:443) at o.a.i.i.processors.affinity.GridAffinityProcessor.affinityCache(GridAffinityProcessor.java:373) at o.a.i.i.processors.affinity.GridAffinityProcessor.keysToNodes(GridAffinityProcessor.java:347) at o.a.i.i.processors.affinity.GridAffinityProcessor.mapKeyToNode(GridAffinityProcessor.java:259) at o.a.i.i.processors.service.GridServiceProcessor.reassign(GridServiceProcessor.java:1163) at o.a.i.i.processors.service.GridServiceProcessor.access$2400(GridServiceProcessor.java:123) at o.a.i.i.processors.service.GridServiceProcessor$TopologyListener$1.run0(GridServiceProcessor.java:1763) at o.a.i.i.processors.service.GridServiceProcessor$DepRunnable.run(GridServiceProcessor.java:1976) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:748) Locked synchronizers: java.util.concurrent.ThreadPoolExecutor$Worker@27f723 {{noformat}} Problematic code: {{noformat}} org.apache.ignite.internal.processors.service.GridServiceProcessor#reassign try (GridNearTxLocal tx = cache.txStartEx(PESSIMISTIC, REPEATABLE_READ)) { GridServiceAssignmentsKey key = new GridServiceAssignmentsKey(cfg.getName()); GridServiceAssignments oldAssigns = (GridServiceAssignments)cache.get(key); Map<UUID, Integer> cnts = new HashMap<>(); if (affKey != null) { ClusterNode n = ctx.affinity().mapKeyToNode(cacheName, affKey, topVer); // WAIT HERE UNTIL PME FINISHED (INFINITELY) {{noformat}} > PME deadlock on reassigning service deployment > ---------------------------------------------- > > Key: IGNITE-6967 > URL: https://issues.apache.org/jira/browse/IGNITE-6967 > Project: Ignite > Issue Type: Bug > Components: general > Affects Versions: 2.3 > Reporter: Alexandr Kuramshin > > With a service deployment when topology change occurs the discovery event > listener calls {{GridServiceProcessor.reassign()}} causing to acquire a lock > on utility cache (where the GridServiceAssignments stored) which prevents PME > from completion. > Stack traces: > {noformat} > Thread [name="test-runner-#186%service.IgniteServiceDynamicCachesSelfTest%", > id=232, state=WAITING, blockCnt=0, waitCnt=8] > at sun.misc.Unsafe.park(Native Method) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) > at > o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) > at > o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) > at o.a.i.i.IgniteKernal.createCache(IgniteKernal.java:2841) > at > o.a.i.i.processors.service.IgniteServiceDynamicCachesSelfTest.testDeployCalledBeforeCacheStart(IgniteServiceDynamicCachesSelfTest.java:140) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at junit.framework.TestCase.runTest(TestCase.java:176) > at > o.a.i.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000) > at > o.a.i.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132) > at > o.a.i.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915) > at java.lang.Thread.run(Thread.java:748) > Thread [name="srvc-deploy-#38%service.IgniteServiceDynamicCachesSelfTest0%", > id=56, state=WAITING, blockCnt=5, waitCnt=9] > at sun.misc.Unsafe.park(Native Method) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304) > at > o.a.i.i.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) > at > o.a.i.i.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) > at > o.a.i.i.processors.cache.GridCacheContext.awaitStarted(GridCacheContext.java:443) > at > o.a.i.i.processors.affinity.GridAffinityProcessor.affinityCache(GridAffinityProcessor.java:373) > at > o.a.i.i.processors.affinity.GridAffinityProcessor.keysToNodes(GridAffinityProcessor.java:347) > at > o.a.i.i.processors.affinity.GridAffinityProcessor.mapKeyToNode(GridAffinityProcessor.java:259) > at > o.a.i.i.processors.service.GridServiceProcessor.reassign(GridServiceProcessor.java:1163) > at > o.a.i.i.processors.service.GridServiceProcessor.access$2400(GridServiceProcessor.java:123) > at > o.a.i.i.processors.service.GridServiceProcessor$TopologyListener$1.run0(GridServiceProcessor.java:1763) > at > o.a.i.i.processors.service.GridServiceProcessor$DepRunnable.run(GridServiceProcessor.java:1976) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:748) > Locked synchronizers: > java.util.concurrent.ThreadPoolExecutor$Worker@27f723 > {noformat} > Problematic code: > {noformat} > org.apache.ignite.internal.processors.service.GridServiceProcessor#reassign > try (GridNearTxLocal tx = cache.txStartEx(PESSIMISTIC, > REPEATABLE_READ)) { > GridServiceAssignmentsKey key = new > GridServiceAssignmentsKey(cfg.getName()); > GridServiceAssignments oldAssigns = > (GridServiceAssignments)cache.get(key); > Map<UUID, Integer> cnts = new HashMap<>(); > if (affKey != null) { > ClusterNode n = ctx.affinity().mapKeyToNode(cacheName, > affKey, topVer); > // WAIT HERE UNTIL PME FINISHED (INFINITELY) > {noformat} -- This message was sent by Atlassian JIRA (v6.4.14#64029)