[
https://issues.apache.org/jira/browse/IGNITE-25819?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17987888#comment-17987888
]
Roman Puchkovskiy edited comment on IGNITE-25819 at 7/2/25 12:39 PM:
---------------------------------------------------------------------
There seems to be a deadlock while stopping MetastorageManager. It goes away
when avoiding to block a networking thread for start activities.
{noformat}
"%icdrt_ripwanwfcmoj_3346%MessagingService-inbound-Default-0-0" #27758 prio=10
os_prio=0 cpu=17,48ms elapsed=270,83s tid=0x000074e6fc0fd800 nid=0x86484
waiting for monitor entry [0x000074e4792fe000] java.lang.Thread.State:
BLOCKED (on object monitor) at
org.apache.ignite.internal.app.LifecycleManager.lambda$allComponentsStartFuture$1(LifecycleManager.java:141)
- waiting to lock <0x00000000ca000090> (a
org.apache.ignite.internal.app.LifecycleManager) at
org.apache.ignite.internal.app.LifecycleManager$$Lambda$4398/0x00000001011b7c40.accept(Unknown
Source) at
java.util.concurrent.CompletableFuture.uniWhenComplete([email protected]/CompletableFuture.java:859)
at
java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire([email protected]/CompletableFuture.java:837)
at
java.util.concurrent.CompletableFuture.postComplete([email protected]/CompletableFuture.java:506)
at
java.util.concurrent.CompletableFuture.complete([email protected]/CompletableFuture.java:2075)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$50(RaftGroupServiceImpl.java:719)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl$$Lambda$2235/0x0000000100d56840.accept(Unknown
Source) at
java.util.concurrent.CompletableFuture.uniWhenComplete([email protected]/CompletableFuture.java:859)
at
java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire([email protected]/CompletableFuture.java:837)
at
java.util.concurrent.CompletableFuture.postComplete([email protected]/CompletableFuture.java:506)
at
java.util.concurrent.CompletableFuture.complete([email protected]/CompletableFuture.java:2075)
at
org.apache.ignite.internal.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:606)
at
org.apache.ignite.internal.network.DefaultMessagingService.handleInvokeResponse(DefaultMessagingService.java:499)
at
org.apache.ignite.internal.network.DefaultMessagingService.lambda$handleMessageFromNetwork$5(DefaultMessagingService.java:433)
at
org.apache.ignite.internal.network.DefaultMessagingService$$Lambda$2256/0x0000000100d5f440.run(Unknown
Source) at
java.util.concurrent.ThreadPoolExecutor.runWorker([email protected]/ThreadPoolExecutor.java:1128)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run([email protected]/ThreadPoolExecutor.java:628)
at java.lang.Thread.run([email protected]/Thread.java:834) {noformat}
and
{noformat}
"ForkJoinPool.commonPool-worker-11" #144 daemon prio=5 os_prio=0 cpu=1404,79ms
elapsed=374,77s tid=0x000074e508001000 nid=0x7c748 waiting on condition
[0x000074e6b81aa000] java.lang.Thread.State: TIMED_WAITING (sleeping) at
java.lang.Thread.sleep([email protected]/Native Method) at
org.apache.ignite.internal.util.IgniteSpinReadWriteLock.writeLock(IgniteSpinReadWriteLock.java:257)
at
org.apache.ignite.internal.util.IgniteSpinBusyLock.block(IgniteSpinBusyLock.java:68)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl.shutdown(RaftGroupServiceImpl.java:561)
at
org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupService.shutdown(TopologyAwareRaftGroupService.java:522)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageServiceContext.close(MetaStorageServiceContext.java:75)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageServiceImpl.close(MetaStorageServiceImpl.java:308)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5059/0x00000001012a2440.accept(Unknown
Source) at
org.apache.ignite.internal.util.IgniteUtils.consumeIfFinishedSuccessfully(IgniteUtils.java:1034)
at
org.apache.ignite.internal.util.IgniteUtils.failOrConsume(IgniteUtils.java:1020)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.lambda$stopAsync$29(MetaStorageManagerImpl.java:800)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5057/0x00000001012a1c40.close(Unknown
Source) at
org.apache.ignite.internal.util.IgniteUtils.lambda$closeAllManually$1(IgniteUtils.java:631)
at
org.apache.ignite.internal.util.IgniteUtils$$Lambda$4862/0x0000000101264840.accept(Unknown
Source) at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept([email protected]/ForEachOps.java:183)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.Spliterators$ArraySpliterator.forEachRemaining([email protected]/Spliterators.java:948)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto([email protected]/AbstractPipeline.java:474)
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential([email protected]/ForEachOps.java:150)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential([email protected]/ForEachOps.java:173)
at
java.util.stream.AbstractPipeline.evaluate([email protected]/AbstractPipeline.java:234)
at
java.util.stream.ReferencePipeline.forEach([email protected]/ReferencePipeline.java:497)
at
org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:629)
at
org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:663)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.stopAsync(MetaStorageManagerImpl.java:797)
at
org.apache.ignite.internal.util.IgniteUtils.lambda$stopAsync$6(IgniteUtils.java:1295)
at
org.apache.ignite.internal.util.IgniteUtils$$Lambda$5003/0x0000000101294040.apply(Unknown
Source) at
java.util.stream.ReferencePipeline$3$1.accept([email protected]/ReferencePipeline.java:195)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining([email protected]/ArrayList.java:1655)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto([email protected]/AbstractPipeline.java:474)
at
java.util.stream.AbstractPipeline.evaluate([email protected]/AbstractPipeline.java:550)
at
java.util.stream.AbstractPipeline.evaluateToArrayNode([email protected]/AbstractPipeline.java:260)
at
java.util.stream.ReferencePipeline.toArray([email protected]/ReferencePipeline.java:517)
at
org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1301) at
org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1343) at
org.apache.ignite.internal.app.LifecycleManager.initiateAllComponentsStop(LifecycleManager.java:181)
- locked <0x00000000ca000090> (a
org.apache.ignite.internal.app.LifecycleManager) at
org.apache.ignite.internal.app.LifecycleManager.stopNode(LifecycleManager.java:155)
at org.apache.ignite.internal.app.IgniteImpl.stopAsync(IgniteImpl.java:1692)
at
org.apache.ignite.internal.app.IgniteServerImpl.triggerStopOnCurrentIgnite(IgniteServerImpl.java:345)
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdownAsync(IgniteServerImpl.java:334)
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdown(IgniteServerImpl.java:369)
at
org.apache.ignite.internal.Cluster$$Lambda$5808/0x0000000101236840.accept(Unknown
Source) at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept([email protected]/ForEachOps.java:183)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining([email protected]/ArrayList.java:1655)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.ForEachOps$ForEachTask.compute([email protected]/ForEachOps.java:290)
at
java.util.concurrent.CountedCompleter.exec([email protected]/CountedCompleter.java:746)
at
java.util.concurrent.ForkJoinTask.doExec$$$capture([email protected]/ForkJoinTask.java:290)
at
java.util.concurrent.ForkJoinTask.doExec([email protected]/ForkJoinTask.java)
at
java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec([email protected]/ForkJoinPool.java:1020)
at
java.util.concurrent.ForkJoinPool.scan([email protected]/ForkJoinPool.java:1656)
at
java.util.concurrent.ForkJoinPool.runWorker([email protected]/ForkJoinPool.java:1594)
at
java.util.concurrent.ForkJoinWorkerThread.run([email protected]/ForkJoinWorkerThread.java:183)
{noformat}
was (Author: rpuch):
There seems to be a deadlock while stopping MetastorageManager. It goes away
when avoiding to block a networking thread for start activities.
"%icdrt_ripwanwfcmoj_3346%MessagingService-inbound-Default-0-0" #27758 prio=10
os_prio=0 cpu=17,48ms elapsed=270,83s tid=0x000074e6fc0fd800 nid=0x86484
waiting for monitor entry [0x000074e4792fe000]
java.lang.Thread.State: BLOCKED (on object monitor)
at
org.apache.ignite.internal.app.LifecycleManager.lambda$allComponentsStartFuture$1(LifecycleManager.java:141)
- waiting to lock <0x00000000ca000090> (a
org.apache.ignite.internal.app.LifecycleManager)
at
org.apache.ignite.internal.app.LifecycleManager$$Lambda$4398/0x00000001011b7c40.accept(Unknown
Source)
at
java.util.concurrent.CompletableFuture.uniWhenComplete([email protected]/CompletableFuture.java:859)
at
java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire([email protected]/CompletableFuture.java:837)
at
java.util.concurrent.CompletableFuture.postComplete([email protected]/CompletableFuture.java:506)
at
java.util.concurrent.CompletableFuture.complete([email protected]/CompletableFuture.java:2075)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$50(RaftGroupServiceImpl.java:719)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl$$Lambda$2235/0x0000000100d56840.accept(Unknown
Source)
at
java.util.concurrent.CompletableFuture.uniWhenComplete([email protected]/CompletableFuture.java:859)
at
java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire([email protected]/CompletableFuture.java:837)
at
java.util.concurrent.CompletableFuture.postComplete([email protected]/CompletableFuture.java:506)
at
java.util.concurrent.CompletableFuture.complete([email protected]/CompletableFuture.java:2075)
at
org.apache.ignite.internal.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:606)
at
org.apache.ignite.internal.network.DefaultMessagingService.handleInvokeResponse(DefaultMessagingService.java:499)
at
org.apache.ignite.internal.network.DefaultMessagingService.lambda$handleMessageFromNetwork$5(DefaultMessagingService.java:433)
at
org.apache.ignite.internal.network.DefaultMessagingService$$Lambda$2256/0x0000000100d5f440.run(Unknown
Source)
at
java.util.concurrent.ThreadPoolExecutor.runWorker([email protected]/ThreadPoolExecutor.java:1128)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run([email protected]/ThreadPoolExecutor.java:628)
at java.lang.Thread.run([email protected]/Thread.java:834)
"ForkJoinPool.commonPool-worker-11" #144 daemon prio=5 os_prio=0 cpu=1404,79ms
elapsed=374,77s tid=0x000074e508001000 nid=0x7c748 waiting on condition
[0x000074e6b81aa000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep([email protected]/Native Method)
at
org.apache.ignite.internal.util.IgniteSpinReadWriteLock.writeLock(IgniteSpinReadWriteLock.java:257)
at
org.apache.ignite.internal.util.IgniteSpinBusyLock.block(IgniteSpinBusyLock.java:68)
at
org.apache.ignite.internal.raft.RaftGroupServiceImpl.shutdown(RaftGroupServiceImpl.java:561)
at
org.apache.ignite.internal.raft.client.TopologyAwareRaftGroupService.shutdown(TopologyAwareRaftGroupService.java:522)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageServiceContext.close(MetaStorageServiceContext.java:75)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageServiceImpl.close(MetaStorageServiceImpl.java:308)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5059/0x00000001012a2440.accept(Unknown
Source)
at
org.apache.ignite.internal.util.IgniteUtils.consumeIfFinishedSuccessfully(IgniteUtils.java:1034)
at
org.apache.ignite.internal.util.IgniteUtils.failOrConsume(IgniteUtils.java:1020)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.lambda$stopAsync$29(MetaStorageManagerImpl.java:800)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl$$Lambda$5057/0x00000001012a1c40.close(Unknown
Source)
at
org.apache.ignite.internal.util.IgniteUtils.lambda$closeAllManually$1(IgniteUtils.java:631)
at
org.apache.ignite.internal.util.IgniteUtils$$Lambda$4862/0x0000000101264840.accept(Unknown
Source)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept([email protected]/ForEachOps.java:183)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.Spliterators$ArraySpliterator.forEachRemaining([email protected]/Spliterators.java:948)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto([email protected]/AbstractPipeline.java:474)
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential([email protected]/ForEachOps.java:150)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential([email protected]/ForEachOps.java:173)
at
java.util.stream.AbstractPipeline.evaluate([email protected]/AbstractPipeline.java:234)
at
java.util.stream.ReferencePipeline.forEach([email protected]/ReferencePipeline.java:497)
at
org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:629)
at
org.apache.ignite.internal.util.IgniteUtils.closeAllManually(IgniteUtils.java:663)
at
org.apache.ignite.internal.metastorage.impl.MetaStorageManagerImpl.stopAsync(MetaStorageManagerImpl.java:797)
at
org.apache.ignite.internal.util.IgniteUtils.lambda$stopAsync$6(IgniteUtils.java:1295)
at
org.apache.ignite.internal.util.IgniteUtils$$Lambda$5003/0x0000000101294040.apply(Unknown
Source)
at
java.util.stream.ReferencePipeline$3$1.accept([email protected]/ReferencePipeline.java:195)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining([email protected]/ArrayList.java:1655)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto([email protected]/AbstractPipeline.java:474)
at
java.util.stream.AbstractPipeline.evaluate([email protected]/AbstractPipeline.java:550)
at
java.util.stream.AbstractPipeline.evaluateToArrayNode([email protected]/AbstractPipeline.java:260)
at
java.util.stream.ReferencePipeline.toArray([email protected]/ReferencePipeline.java:517)
at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1301)
at org.apache.ignite.internal.util.IgniteUtils.stopAsync(IgniteUtils.java:1343)
at
org.apache.ignite.internal.app.LifecycleManager.initiateAllComponentsStop(LifecycleManager.java:181)
- locked <0x00000000ca000090> (a
org.apache.ignite.internal.app.LifecycleManager)
at
org.apache.ignite.internal.app.LifecycleManager.stopNode(LifecycleManager.java:155)
at org.apache.ignite.internal.app.IgniteImpl.stopAsync(IgniteImpl.java:1692)
at
org.apache.ignite.internal.app.IgniteServerImpl.triggerStopOnCurrentIgnite(IgniteServerImpl.java:345)
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdownAsync(IgniteServerImpl.java:334)
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdown(IgniteServerImpl.java:369)
at
org.apache.ignite.internal.Cluster$$Lambda$5808/0x0000000101236840.accept(Unknown
Source)
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept([email protected]/ForEachOps.java:183)
at
java.util.stream.ReferencePipeline$2$1.accept([email protected]/ReferencePipeline.java:177)
at
java.util.ArrayList$ArrayListSpliterator.forEachRemaining([email protected]/ArrayList.java:1655)
at
java.util.stream.AbstractPipeline.copyInto([email protected]/AbstractPipeline.java:484)
at
java.util.stream.ForEachOps$ForEachTask.compute([email protected]/ForEachOps.java:290)
at
java.util.concurrent.CountedCompleter.exec([email protected]/CountedCompleter.java:746)
at
java.util.concurrent.ForkJoinTask.doExec$$$capture([email protected]/ForkJoinTask.java:290)
at java.util.concurrent.ForkJoinTask.doExec([email protected]/ForkJoinTask.java)
at
java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec([email protected]/ForkJoinPool.java:1020)
at
java.util.concurrent.ForkJoinPool.scan([email protected]/ForkJoinPool.java:1656)
at
java.util.concurrent.ForkJoinPool.runWorker([email protected]/ForkJoinPool.java:1594)
at
java.util.concurrent.ForkJoinWorkerThread.run([email protected]/ForkJoinWorkerThread.java:183)
> ItCmgDisasterRecoveryTest#repairIsPossibleWhenAllNodesWaitForCmgMajorityOnJoin
> may hang
> ---------------------------------------------------------------------------------------
>
> Key: IGNITE-25819
> URL: https://issues.apache.org/jira/browse/IGNITE-25819
> Project: Ignite
> Issue Type: Bug
> Reporter: Alexander Lapin
> Assignee: Roman Puchkovskiy
> Priority: Major
> Labels: Ignite
> Time Spent: 10m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)