[ https://issues.apache.org/jira/browse/IGNITE-8136?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Vladislav Pyatkov updated IGNITE-8136: -------------------------------------- Description: Node stopping in long GC pause, after that it will be segmented, but if it not stopped, like this: {noformat} "Thread-76137" #4835330 daemon prio=5 os_prio=0 tid=0x00007ef23c042800 nid=0x27992c in Object.wait() [0x00007e57bbbba000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.Object.wait(Object.java:502) at org.apache.ignite.internal.util.worker.GridWorker.join(GridWorker.java:233) - locked <0x00007ef8babdb0f8> (a java.lang.Object) at org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4655) at org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4681) at org.apache.ignite.internal.processors.job.GridJobProcessor.onKernalStop(GridJobProcessor.java:311) at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2039) at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:1987) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2512) - locked <0x00007ef7a166eb70> (a org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2475) at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:362) at org.apache.ignite.Ignition.stop(Ignition.java:224) at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$10.run(GridDiscoveryManager.java:2373) at java.lang.Thread.run(Thread.java:745) "pub-#1032155%DPL_GRID%DplGridNodeName%" #4832845 prio=5 os_prio=0 tid=0x00007ef2ec10c000 nid=0x277864 waiting on condition [0x00007e57b652e000] java.lang.Thread.State: RUNNABLE at org.apache.ignite.internal.binary.streams.BinaryMemoryAllocatorChunk.reallocate(BinaryMemoryAllocatorChunk.java:69) at org.apache.ignite.internal.binary.streams.BinaryHeapOutputStream.ensureCapacity(BinaryHeapOutputStream.java:65) at org.apache.ignite.internal.binary.streams.BinaryAbstractOutputStream.writeByte(BinaryAbstractOutputStream.java:34) at org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteString(BinaryWriterExImpl.java:413) at org.apache.ignite.internal.binary.BinaryWriterExImpl.writeStringField(BinaryWriterExImpl.java:1124) at org.apache.ignite.internal.binary.BinaryFieldAccessor$DefaultFinalClassAccessor.write(BinaryFieldAccessor.java:531) at org.apache.ignite.internal.binary.BinaryClassDescriptor.write(BinaryClassDescriptor.java:794) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal0(BinaryWriterExImpl.java:206) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:147) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:134) at org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteObject(BinaryWriterExImpl.java:496) at org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteCollection(BinaryWriterExImpl.java:764) at org.apache.ignite.internal.binary.BinaryClassDescriptor.write(BinaryClassDescriptor.java:694) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal0(BinaryWriterExImpl.java:206) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:147) at org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:134) at org.apache.ignite.internal.binary.GridBinaryMarshaller.marshal(GridBinaryMarshaller.java:251) at org.apache.ignite.internal.binary.BinaryMarshaller.marshal0(BinaryMarshaller.java:82) at org.apache.ignite.marshaller.AbstractNodeNameAwareMarshaller.marshal(AbstractNodeNameAwareMarshaller.java:58) at org.apache.ignite.internal.util.IgniteUtils.marshal(IgniteUtils.java:9971) at org.apache.ignite.internal.processors.job.GridJobWorker.finishJob(GridJobWorker.java:832) at org.apache.ignite.internal.processors.job.GridJobWorker.finishJob(GridJobWorker.java:773) at org.apache.ignite.internal.processors.job.GridJobWorker.execute0(GridJobWorker.java:625) at org.apache.ignite.internal.processors.job.GridJobWorker.body(GridJobWorker.java:489) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at org.apache.ignite.internal.processors.job.GridJobProcessor.processJobExecuteRequest(GridJobProcessor.java:1189) at org.apache.ignite.internal.processors.job.GridJobProcessor$JobExecutionListener.onMessage(GridJobProcessor.java:1921) at org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1555) at org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1183) at org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:126) at org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1090) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) {noformat} Half of cluster nodes will detect, which the node was failed (with less order). In the result we got different topology on various nodes. was: Node stopping in long GC pause, after that it will be segmented, but if it not stopped, like this: {noformat} Ignition.stop() {noformat} Half of cluster nodes will detect, which the node was failed (with less order). In the result we got different topology on various nodes. > Discovery service wrong works if node stopping by segmentation and hangs > ------------------------------------------------------------------------ > > Key: IGNITE-8136 > URL: https://issues.apache.org/jira/browse/IGNITE-8136 > Project: Ignite > Issue Type: Bug > Reporter: Vladislav Pyatkov > Priority: Major > > Node stopping in long GC pause, after that it will be segmented, but if it > not stopped, like this: > {noformat} > "Thread-76137" #4835330 daemon prio=5 os_prio=0 tid=0x00007ef23c042800 > nid=0x27992c in Object.wait() [0x00007e57bbbba000] > java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > at java.lang.Object.wait(Object.java:502) > at > org.apache.ignite.internal.util.worker.GridWorker.join(GridWorker.java:233) > - locked <0x00007ef8babdb0f8> (a java.lang.Object) > at > org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4655) > at > org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4681) > at > org.apache.ignite.internal.processors.job.GridJobProcessor.onKernalStop(GridJobProcessor.java:311) > at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2039) > at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:1987) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2512) > - locked <0x00007ef7a166eb70> (a > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance) > at > org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2475) > at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:362) > at org.apache.ignite.Ignition.stop(Ignition.java:224) > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$10.run(GridDiscoveryManager.java:2373) > at java.lang.Thread.run(Thread.java:745) > "pub-#1032155%DPL_GRID%DplGridNodeName%" #4832845 prio=5 os_prio=0 > tid=0x00007ef2ec10c000 nid=0x277864 waiting on condition [0x00007e57b652e000] > java.lang.Thread.State: RUNNABLE > at > org.apache.ignite.internal.binary.streams.BinaryMemoryAllocatorChunk.reallocate(BinaryMemoryAllocatorChunk.java:69) > at > org.apache.ignite.internal.binary.streams.BinaryHeapOutputStream.ensureCapacity(BinaryHeapOutputStream.java:65) > at > org.apache.ignite.internal.binary.streams.BinaryAbstractOutputStream.writeByte(BinaryAbstractOutputStream.java:34) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteString(BinaryWriterExImpl.java:413) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.writeStringField(BinaryWriterExImpl.java:1124) > at > org.apache.ignite.internal.binary.BinaryFieldAccessor$DefaultFinalClassAccessor.write(BinaryFieldAccessor.java:531) > at > org.apache.ignite.internal.binary.BinaryClassDescriptor.write(BinaryClassDescriptor.java:794) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal0(BinaryWriterExImpl.java:206) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:147) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:134) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteObject(BinaryWriterExImpl.java:496) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.doWriteCollection(BinaryWriterExImpl.java:764) > at > org.apache.ignite.internal.binary.BinaryClassDescriptor.write(BinaryClassDescriptor.java:694) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal0(BinaryWriterExImpl.java:206) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:147) > at > org.apache.ignite.internal.binary.BinaryWriterExImpl.marshal(BinaryWriterExImpl.java:134) > at > org.apache.ignite.internal.binary.GridBinaryMarshaller.marshal(GridBinaryMarshaller.java:251) > at > org.apache.ignite.internal.binary.BinaryMarshaller.marshal0(BinaryMarshaller.java:82) > at > org.apache.ignite.marshaller.AbstractNodeNameAwareMarshaller.marshal(AbstractNodeNameAwareMarshaller.java:58) > at > org.apache.ignite.internal.util.IgniteUtils.marshal(IgniteUtils.java:9971) > at > org.apache.ignite.internal.processors.job.GridJobWorker.finishJob(GridJobWorker.java:832) > at > org.apache.ignite.internal.processors.job.GridJobWorker.finishJob(GridJobWorker.java:773) > at > org.apache.ignite.internal.processors.job.GridJobWorker.execute0(GridJobWorker.java:625) > at > org.apache.ignite.internal.processors.job.GridJobWorker.body(GridJobWorker.java:489) > at > org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) > at > org.apache.ignite.internal.processors.job.GridJobProcessor.processJobExecuteRequest(GridJobProcessor.java:1189) > at > org.apache.ignite.internal.processors.job.GridJobProcessor$JobExecutionListener.onMessage(GridJobProcessor.java:1921) > at > org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1555) > at > org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1183) > at > org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:126) > at > org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1090) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {noformat} > Half of cluster nodes will detect, which the node was failed (with less > order). > In the result we got different topology on various nodes. -- This message was sent by Atlassian JIRA (v7.6.3#76005)