Hi guys, we are using Ignite 2.3.0 we have a Ignite cluster in production which has 4 server nodes, recently we found that Ignite throws "Connection reset by peer" occasionally after some complex query, we know it is caused by connection closed while socket reading/writing, but why it closed?
can you observe any abnormal info from our log below? Ignite server and client almost threw the exception at the same time, and this exception occurs only 3 seconds after we issued the query. i see bytesSent and Rcvd is very big, don't know whether it is related.. Thanks. Server log 2018-09-06_00:28:13.996 [ERROR] [grid-nio-worker-tcp-comm-23-#192%PROD_IDEA_default_SZ_NewCluster%] [o.a.i.s.c.tcp.TcpCommunicationSpi] Failed to process selec tor key [ses=GridSelectorNioSessionImpl [worker=DirectNioClientWorker [super=AbstractNioClientWorker [idx=23, bytesRcvd=10166800, bytesSent=27235902413, bytesR cvd0=730, bytesSent0=119379534, select=true, super=GridWorker [name=grid-nio-worker-tcp-comm-23, igniteInstanceName=PROD_IDEA_default_SZ_NewCluster, finished=false, hashCode=1200501937, interrupted=false, runner=grid-nio-worker-tcp-comm-23-#192%PROD_IDEA_default_SZ_NewCluster%]]], writeBuf=java.nio.DirectByteBuffer[pos=4786 lim=32768 cap=32768], readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768], inRecovery=GridNioRecoveryDescriptor [acked=2784, resendCnt=0, rcvCnt=2261, sentCnt=2805, reserved=true, lastAck=2240, nodeLeft=false, node=TcpDiscoveryNode [id=9770d3e3-83e8-498c-a10b-c0bb991cbd60, addrs=[10.42.223.207, 127.0.0.1], sockAddrs=[/10.42.223.207:0, /127.0.0.1:0], discPort=0, order=335, intOrder=179, lastExchangeTime=1536094829004, loc=false, ver=2.3.0#20171028-sha1:8add7fd5, isClient=true], connected=true, connectCnt=0, queueLimit=4096, reserveCnt=1, pairedConnections=false], outRecovery=GridNioRecoveryDescriptor [acked=2784, resendCnt=0, rcvCnt=2261, sentCnt=2805, reserved=true, lastAck=2240, nodeLeft=false, node=TcpDiscoveryNode [id=9770d3e3-83e8-498c-a10b-c0bb991cbd60, addrs=[10.42.223.207, 127.0.0.1], sockAddrs=[/10.42.223.207:0, /127.0.0.1:0], discPort=0, order=335, intOrder=179, lastExchangeTime=1536094829004, loc=false, ver=2.3.0#20171028-sha1:8add7fd5, isClient=true], connected=true, connectCnt=0, queueLimit=4096, reserveCnt=1, pairedConnections=false], super=GridNioSessionImpl [locAddr=/26.2.17.163:47100, rmtAddr=/26.2.17.15:37836, createTime=1536094833590, closeTime=0, bytesSent=5894311985, bytesRcvd=2541962, bytesSent0=119379534, bytesRcvd0=730, sndSchedTime=1536163292987, lastSndTime=1536164892990, lastRcvTime=1536164892980, readsPaused=false, filterChain=FilterChain[filters=[GridNioCodecFilter [parser=o.a.i.i.util.nio.GridDirectParser@40ad221b, directMode=true], GridConnectionBytesVerifyFilter], accepted=true]]] java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcherImpl.read0(Native Method) at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) at sun.nio.ch.IOUtil.read(IOUtil.java:192) at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) at org.apache.ignite.internal.util.nio.GridNioServer$DirectNioClientWorker.processRead(GridNioServer.java:1233) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.processSelectedKeysOptimized(GridNioServer.java:2272) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.bodyInternal(GridNioServer.java:2048) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.body(GridNioServer.java:1717) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at java.lang.Thread.run(Thread.java:748) 2018-09-06_00:28:13.997 [WARN ] [grid-nio-worker-tcp-comm-23-#192%PROD_IDEA_default_SZ_NewCluster%] [o.a.i.s.c.tcp.TcpCommunicationSpi] Closing NIO session bec ause of unhandled exception [cls=class o.a.i.i.util.nio.GridNioException, msg=Connection reset by peer] Client log 2018-09-06_00:28:13.995 [ERROR] [grid-nio-worker-tcp-comm-5-#181%PROD_IDEA_default_SZ_NewCluster%] [o.a.i.s.c.tcp.TcpCommunicationSpi] Failed to process select or key [ses=GridSelectorNioSessionImpl [worker=DirectNioClientWorker [super=AbstractNioClientWorker [idx=5, bytesRcvd=5830373027, bytesSent=2485475, bytesRcvd0 =233844279, bytesSent0=9333, select=true, super=GridWorker [name=grid-nio-worker-tcp-comm-5, igniteInstanceName=PROD_IDEA_default_SZ_NewCluster, finished=false , hashCode=918657525, interrupted=false, runner=grid-nio-worker-tcp-comm-5-#181%PROD_IDEA_default_SZ_NewCluster%]]], writeBuf=java.nio.DirectByteBuffer[pos=0 l im=32768 cap=32768], readBuf=java.nio.DirectByteBuffer[pos=0 lim=32768 cap=32768], inRecovery=GridNioRecoveryDescriptor [acked=2240, resendCnt=0, rcvCnt=2804, sentCnt=2261, reserved=true, lastAck=2784, nodeLeft=false, node=TcpDiscoveryNode [id=ad141a60-e830-4e48-850a-ece6a8a45eb1, addrs=[26.2.17.163], sockAddrs=[stsz 030013/26.2.17.163:47500], discPort=47500, order=4, intOrder=4, lastExchangeTime=1536094829041, loc=false, ver=2.3.0#20171028-sha1:8add7fd5, isClient=false], c onnected=false, connectCnt=1, queueLimit=4096, reserveCnt=1, pairedConnections=false], outRecovery=GridNioRecoveryDescriptor [acked=2240, resendCnt=0, rcvCnt=2 804, sentCnt=2261, reserved=true, lastAck=2784, nodeLeft=false, node=TcpDiscoveryNode [id=ad141a60-e830-4e48-850a-ece6a8a45eb1, addrs=[26.2.17.163], sockAddrs= [stsz030013/26.2.17.163:47500], discPort=47500, order=4, intOrder=4, lastExchangeTime=1536094829041, loc=false, ver=2.3.0#20171028-sha1:8add7fd5, isClient=fals e], connected=false, connectCnt=1, queueLimit=4096, reserveCnt=1, pairedConnections=false], super=GridNioSessionImpl [locAddr=/10.42.223.207:37836, rmtAddr=sts z030013/26.2.17.163:47100, createTime=1536094833584, closeTime=0, bytesSent=2541920, bytesRcvd=5894286307, bytesSent0=9333, bytesRcvd0=233844279, sndSchedTime= 1536163834744, lastSndTime=1536164892989, lastRcvTime=1536164892989, readsPaused=false, filterChain=FilterChain[filters=[GridNioCodecFilter [parser=o.a.i.i.uti l.nio.GridDirectParser@5df3dd31, directMode=true], GridConnectionBytesVerifyFilter], accepted=false]]] java.io.IOException: Connection reset by peer at sun.nio.ch.FileDispatcherImpl.read0(Native Method) at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) at sun.nio.ch.IOUtil.read(IOUtil.java:192) at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) at org.apache.ignite.internal.util.nio.GridNioServer$DirectNioClientWorker.processRead(GridNioServer.java:1233) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.processSelectedKeysOptimized(GridNioServer.java:2272) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.bodyInternal(GridNioServer.java:2048) at org.apache.ignite.internal.util.nio.GridNioServer$AbstractNioClientWorker.body(GridNioServer.java:1717) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at java.lang.Thread.run(Thread.java:748) 2018-09-06_00:28:13.996 [WARN ] [grid-nio-worker-tcp-comm-5-#181%PROD_IDEA_default_SZ_NewCluster%] [o.a.i.s.c.tcp.TcpCommunicationSpi] Closing NIO session beca use of unhandled exception [cls=class o.a.i.i.util.nio.GridNioException, msg=Connection reset by peer] -- Sent from: http://apache-ignite-users.70518.x6.nabble.com/