[ https://issues.apache.org/jira/browse/IGNITE-25819?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17987888#comment-17987888 ]
Roman Puchkovskiy edited comment on IGNITE-25819 at 7/2/25 12:39 PM: --------------------------------------------------------------------- There seems to be a deadlock while stopping MetastorageManager. It goes away when avoiding to block a networking thread for start activities. {noformat} "%icdrt_ripwanwfcmoj_3346%MessagingService-inbound-Default-0-0" #27758 prio=10 os_prio=0 cpu=17,48ms elapsed=270,83s tid=0x000074e6fc0fd800 nid=0x86484 waiting for monitor entry [0x000074e4792fe000] java.lang.Thread.State: BLOCKED (on object monitor) at org.apache.ignite.internal.app.LifecycleManager.lambda$allComponentsStartFuture$1(LifecycleManager.java:141) - waiting to lock <0x00000000ca000090> (a org.apache.ignite.internal.app.LifecycleManager) at org.apache.ignite.internal.app.LifecycleManager$$Lambda$4398/0x00000001011b7c40.accept(Unknown Source) at java.util.concurrent.CompletableFuture.uniWhenComplete(java.base@11.0.25/CompletableFuture.java:859) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(java.base@11.0.25/CompletableFuture.java:837) at java.util.concurrent.CompletableFuture.postComplete(java.base@11.0.25/CompletableFuture.java:506) at java.util.concurrent.CompletableFuture.complete(java.base@11.0.25/CompletableFuture.java:2075) at org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$50(RaftGroupServiceImpl.java:719) at org.apache.ignite.internal.raft.RaftGroupServiceImpl$$Lambda$2235/0x0000000100d56840.accept(Unknown Source) at java.util.concurrent.CompletableFuture.uniWhenComplete(java.base@11.0.25/CompletableFuture.java:859) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(java.base@11.0.25/CompletableFuture.java:837) at java.util.concurrent.CompletableFuture.postComplete(java.base@11.0.25/CompletableFuture.java:506) at java.util.concurrent.CompletableFuture.complete(java.base@11.0.25/CompletableFuture.java:2075) at org.apache.ignite.internal.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:606) at org.apache.ignite.internal.network.DefaultMessagingService.handleInvokeResponse(DefaultMessagingService.java:499) at org.apache.ignite.internal.network.DefaultMessagingService.lambda$handleMessageFromNetwork$5(DefaultMessagingService.java:433) at org.apache.ignite.internal.network.DefaultMessagingService$$Lambda$2256/0x0000000100d5f440.run(Unknown Source) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.25/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.25/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.25/Thread.java:834) {noformat} and {noformat} "ForkJoinPool.commonPool-worker-11" #144 daemon prio=5 os_prio=0 cpu=1404,79ms elapsed=374,77s tid=0x000074e508001000 nid=0x7c748 waiting on condition [0x000074e6b81aa000] java.lang.Thread.State: TIMED_WAITING (sleeping) at java.lang.Thread.sleep(java.base@11.0.25/Native Method) at org.apache.ignite.internal.util.IgniteSpinReadWriteLock.writeLock(IgniteSpinReadWriteLock.java:257) at org.apache.ignite.internal.util.IgniteSpinBusyLock.block(IgniteSpinBusyLock.java:68) at org.apache.ignite.internal.raft.RaftGroupServiceImpl.shutdown(RaftGroupServiceImpl.java:561) at org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupService.shutdown(TopologyAwareRaftGroupService.java:522) at org.apache.ignite.internal.metastorage.impl.MetaStorageServiceContext.close(MetaStorageServiceContext.java:75) at org.apache.ignite.internal.metastorage.impl.MetaStorageServiceImpl.close(MetaStorageServiceImpl.java:308) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5059/0x00000001012a2440.accept(Unknown Source) at org.apache.ignite.internal.util.IgniteUtils.consumeIfFinishedSuccessfully(IgniteUtils.java:1034) at org.apache.ignite.internal.util.IgniteUtils.failOrConsume(IgniteUtils.java:1020) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.lambda$stopAsync$29(MetaStorageManagerImpl.java:800) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5057/0x00000001012a1c40.close(Unknown Source) at org.apache.ignite.internal.util.IgniteUtils.lambda$closeAllManually$1(IgniteUtils.java:631) at org.apache.ignite.internal.util.IgniteUtils$$Lambda$4862/0x0000000101264840.accept(Unknown Source) at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(java.base@11.0.25/ForEachOps.java:183) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.Spliterators$ArraySpliterator.forEachRemaining(java.base@11.0.25/Spliterators.java:948) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.AbstractPipeline.wrapAndCopyInto(java.base@11.0.25/AbstractPipeline.java:474) at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(java.base@11.0.25/ForEachOps.java:150) at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(java.base@11.0.25/ForEachOps.java:173) at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.25/AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.forEach(java.base@11.0.25/ReferencePipeline.java:497) at org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:629) at org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:663) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.stopAsync(MetaStorageManagerImpl.java:797) at org.apache.ignite.internal.util.IgniteUtils.lambda$stopAsync$6(IgniteUtils.java:1295) at org.apache.ignite.internal.util.IgniteUtils$$Lambda$5003/0x0000000101294040.apply(Unknown Source) at java.util.stream.ReferencePipeline$3$1.accept(java.base@11.0.25/ReferencePipeline.java:195) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(java.base@11.0.25/ArrayList.java:1655) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.AbstractPipeline.wrapAndCopyInto(java.base@11.0.25/AbstractPipeline.java:474) at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.25/AbstractPipeline.java:550) at java.util.stream.AbstractPipeline.evaluateToArrayNode(java.base@11.0.25/AbstractPipeline.java:260) at java.util.stream.ReferencePipeline.toArray(java.base@11.0.25/ReferencePipeline.java:517) at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1301) at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1343) at org.apache.ignite.internal.app.LifecycleManager.initiateAllComponentsStop(LifecycleManager.java:181) - locked <0x00000000ca000090> (a org.apache.ignite.internal.app.LifecycleManager) at org.apache.ignite.internal.app.LifecycleManager.stopNode(LifecycleManager.java:155) at org.apache.ignite.internal.app.IgniteImpl.stopAsync(IgniteImpl.java:1692) at org.apache.ignite.internal.app.IgniteServerImpl.triggerStopOnCurrentIgnite(IgniteServerImpl.java:345) at org.apache.ignite.internal.app.IgniteServerImpl.shutdownAsync(IgniteServerImpl.java:334) at org.apache.ignite.internal.app.IgniteServerImpl.shutdown(IgniteServerImpl.java:369) at org.apache.ignite.internal.Cluster$$Lambda$5808/0x0000000101236840.accept(Unknown Source) at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(java.base@11.0.25/ForEachOps.java:183) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(java.base@11.0.25/ArrayList.java:1655) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.ForEachOps$ForEachTask.compute(java.base@11.0.25/ForEachOps.java:290) at java.util.concurrent.CountedCompleter.exec(java.base@11.0.25/CountedCompleter.java:746) at java.util.concurrent.ForkJoinTask.doExec$$$capture(java.base@11.0.25/ForkJoinTask.java:290) at java.util.concurrent.ForkJoinTask.doExec(java.base@11.0.25/ForkJoinTask.java) at java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(java.base@11.0.25/ForkJoinPool.java:1020) at java.util.concurrent.ForkJoinPool.scan(java.base@11.0.25/ForkJoinPool.java:1656) at java.util.concurrent.ForkJoinPool.runWorker(java.base@11.0.25/ForkJoinPool.java:1594) at java.util.concurrent.ForkJoinWorkerThread.run(java.base@11.0.25/ForkJoinWorkerThread.java:183) {noformat} was (Author: rpuch): There seems to be a deadlock while stopping MetastorageManager. It goes away when avoiding to block a networking thread for start activities. "%icdrt_ripwanwfcmoj_3346%MessagingService-inbound-Default-0-0" #27758 prio=10 os_prio=0 cpu=17,48ms elapsed=270,83s tid=0x000074e6fc0fd800 nid=0x86484 waiting for monitor entry [0x000074e4792fe000] java.lang.Thread.State: BLOCKED (on object monitor) at org.apache.ignite.internal.app.LifecycleManager.lambda$allComponentsStartFuture$1(LifecycleManager.java:141) - waiting to lock <0x00000000ca000090> (a org.apache.ignite.internal.app.LifecycleManager) at org.apache.ignite.internal.app.LifecycleManager$$Lambda$4398/0x00000001011b7c40.accept(Unknown Source) at java.util.concurrent.CompletableFuture.uniWhenComplete(java.base@11.0.25/CompletableFuture.java:859) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(java.base@11.0.25/CompletableFuture.java:837) at java.util.concurrent.CompletableFuture.postComplete(java.base@11.0.25/CompletableFuture.java:506) at java.util.concurrent.CompletableFuture.complete(java.base@11.0.25/CompletableFuture.java:2075) at org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$50(RaftGroupServiceImpl.java:719) at org.apache.ignite.internal.raft.RaftGroupServiceImpl$$Lambda$2235/0x0000000100d56840.accept(Unknown Source) at java.util.concurrent.CompletableFuture.uniWhenComplete(java.base@11.0.25/CompletableFuture.java:859) at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(java.base@11.0.25/CompletableFuture.java:837) at java.util.concurrent.CompletableFuture.postComplete(java.base@11.0.25/CompletableFuture.java:506) at java.util.concurrent.CompletableFuture.complete(java.base@11.0.25/CompletableFuture.java:2075) at org.apache.ignite.internal.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:606) at org.apache.ignite.internal.network.DefaultMessagingService.handleInvokeResponse(DefaultMessagingService.java:499) at org.apache.ignite.internal.network.DefaultMessagingService.lambda$handleMessageFromNetwork$5(DefaultMessagingService.java:433) at org.apache.ignite.internal.network.DefaultMessagingService$$Lambda$2256/0x0000000100d5f440.run(Unknown Source) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.25/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.25/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.25/Thread.java:834) "ForkJoinPool.commonPool-worker-11" #144 daemon prio=5 os_prio=0 cpu=1404,79ms elapsed=374,77s tid=0x000074e508001000 nid=0x7c748 waiting on condition [0x000074e6b81aa000] java.lang.Thread.State: TIMED_WAITING (sleeping) at java.lang.Thread.sleep(java.base@11.0.25/Native Method) at org.apache.ignite.internal.util.IgniteSpinReadWriteLock.writeLock(IgniteSpinReadWriteLock.java:257) at org.apache.ignite.internal.util.IgniteSpinBusyLock.block(IgniteSpinBusyLock.java:68) at org.apache.ignite.internal.raft.RaftGroupServiceImpl.shutdown(RaftGroupServiceImpl.java:561) at org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupService.shutdown(TopologyAwareRaftGroupService.java:522) at org.apache.ignite.internal.metastorage.impl.MetaStorageServiceContext.close(MetaStorageServiceContext.java:75) at org.apache.ignite.internal.metastorage.impl.MetaStorageServiceImpl.close(MetaStorageServiceImpl.java:308) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5059/0x00000001012a2440.accept(Unknown Source) at org.apache.ignite.internal.util.IgniteUtils.consumeIfFinishedSuccessfully(IgniteUtils.java:1034) at org.apache.ignite.internal.util.IgniteUtils.failOrConsume(IgniteUtils.java:1020) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.lambda$stopAsync$29(MetaStorageManagerImpl.java:800) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5057/0x00000001012a1c40.close(Unknown Source) at org.apache.ignite.internal.util.IgniteUtils.lambda$closeAllManually$1(IgniteUtils.java:631) at org.apache.ignite.internal.util.IgniteUtils$$Lambda$4862/0x0000000101264840.accept(Unknown Source) at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(java.base@11.0.25/ForEachOps.java:183) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.Spliterators$ArraySpliterator.forEachRemaining(java.base@11.0.25/Spliterators.java:948) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.AbstractPipeline.wrapAndCopyInto(java.base@11.0.25/AbstractPipeline.java:474) at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(java.base@11.0.25/ForEachOps.java:150) at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(java.base@11.0.25/ForEachOps.java:173) at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.25/AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.forEach(java.base@11.0.25/ReferencePipeline.java:497) at org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:629) at org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:663) at org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.stopAsync(MetaStorageManagerImpl.java:797) at org.apache.ignite.internal.util.IgniteUtils.lambda$stopAsync$6(IgniteUtils.java:1295) at org.apache.ignite.internal.util.IgniteUtils$$Lambda$5003/0x0000000101294040.apply(Unknown Source) at java.util.stream.ReferencePipeline$3$1.accept(java.base@11.0.25/ReferencePipeline.java:195) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(java.base@11.0.25/ArrayList.java:1655) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.AbstractPipeline.wrapAndCopyInto(java.base@11.0.25/AbstractPipeline.java:474) at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.25/AbstractPipeline.java:550) at java.util.stream.AbstractPipeline.evaluateToArrayNode(java.base@11.0.25/AbstractPipeline.java:260) at java.util.stream.ReferencePipeline.toArray(java.base@11.0.25/ReferencePipeline.java:517) at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1301) at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1343) at org.apache.ignite.internal.app.LifecycleManager.initiateAllComponentsStop(LifecycleManager.java:181) - locked <0x00000000ca000090> (a org.apache.ignite.internal.app.LifecycleManager) at org.apache.ignite.internal.app.LifecycleManager.stopNode(LifecycleManager.java:155) at org.apache.ignite.internal.app.IgniteImpl.stopAsync(IgniteImpl.java:1692) at org.apache.ignite.internal.app.IgniteServerImpl.triggerStopOnCurrentIgnite(IgniteServerImpl.java:345) at org.apache.ignite.internal.app.IgniteServerImpl.shutdownAsync(IgniteServerImpl.java:334) at org.apache.ignite.internal.app.IgniteServerImpl.shutdown(IgniteServerImpl.java:369) at org.apache.ignite.internal.Cluster$$Lambda$5808/0x0000000101236840.accept(Unknown Source) at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(java.base@11.0.25/ForEachOps.java:183) at java.util.stream.ReferencePipeline$2$1.accept(java.base@11.0.25/ReferencePipeline.java:177) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(java.base@11.0.25/ArrayList.java:1655) at java.util.stream.AbstractPipeline.copyInto(java.base@11.0.25/AbstractPipeline.java:484) at java.util.stream.ForEachOps$ForEachTask.compute(java.base@11.0.25/ForEachOps.java:290) at java.util.concurrent.CountedCompleter.exec(java.base@11.0.25/CountedCompleter.java:746) at java.util.concurrent.ForkJoinTask.doExec$$$capture(java.base@11.0.25/ForkJoinTask.java:290) at java.util.concurrent.ForkJoinTask.doExec(java.base@11.0.25/ForkJoinTask.java) at java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(java.base@11.0.25/ForkJoinPool.java:1020) at java.util.concurrent.ForkJoinPool.scan(java.base@11.0.25/ForkJoinPool.java:1656) at java.util.concurrent.ForkJoinPool.runWorker(java.base@11.0.25/ForkJoinPool.java:1594) at java.util.concurrent.ForkJoinWorkerThread.run(java.base@11.0.25/ForkJoinWorkerThread.java:183) > ItCmgDisasterRecoveryTest#repairIsPossibleWhenAllNodesWaitForCmgMajorityOnJoin > may hang > --------------------------------------------------------------------------------------- > > Key: IGNITE-25819 > URL: https://issues.apache.org/jira/browse/IGNITE-25819 > Project: Ignite > Issue Type: Bug > Reporter: Alexander Lapin > Assignee: Roman Puchkovskiy > Priority: Major > Labels: Ignite > Time Spent: 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.20.10#820010)