I see that you use both ipv4 and ipv6 for some nodes, there is a known
issue with this. I would recommend to restrict Ignite to IPv4 via the
-Djava.net.preferIPv4Stack=true JVM parameter for all nodes in cluster,
including clients. I've seen communication issues with this before.

Evgenii

ср, 15 апр. 2020 г. в 11:31, Rajan Ahlawat <rajan.ahla...@gmail.com>:

> Client logs and stack_trace is shared.
> Client just keep trying to connect and server keep throwing socket timeout.
> Stack trace I gave is what I get when I try to connect to this
> problematic ignite server and caught this stack trace.
>
> About this default settings, on our environment we do have only
> default timeouts, though we tried increasing all these timeouts on
> client side, but of no success.
> On server side right now, we can't tweak these timeouts value, unless
> we are sure of fix.
>
>
> On Wed, Apr 15, 2020 at 8:06 PM Evgenii Zhuravlev
> <e.zhuravlev...@gmail.com> wrote:
> >
> > Hi,
> >
> > Please provide logs not only from the server node, bu from the client
> node too. You mentioned that only one client has this problems, so, please
> provide full log from this node.
> >
> > Also, you said that you set not default timeouts for clients, while
> there are still default values for server node - I wouldn't recommend to do
> this, timeouts should be the same for all nodes in cluster.
> >
> > Evgenii
> >
> > ср, 15 апр. 2020 г. в 03:04, Rajan Ahlawat <rajan.ahla...@gmail.com>:
> >>
> >> Shared file with email-id:
> >> e.zhuravlev...@gmail.com
> >>
> >> We have single instance of ignite, File contains all log of date Mar
> >> 30, 2019. Line 6429 is the first incident of occurrence.
> >>
> >> On Tue, Apr 14, 2020 at 8:27 PM Evgenii Zhuravlev
> >> <e.zhuravlev...@gmail.com> wrote:
> >> >
> >> > Can you provide full log files from all nodes? it's impossible to
> find the root cause from this.
> >> >
> >> > Evgenii
> >> >
> >> > вт, 14 апр. 2020 г. в 07:49, Rajan Ahlawat <rajan.ahla...@gmail.com>:
> >> >>
> >> >> server starts with following configuration:
> >> >>
> >> >> ignite_application-1-2020-03-17.log:14:[2020-03-17T08:23:33,664][INFO
> >> >> ][main][IgniteKernal%igniteStart] IgniteConfiguration
> >> >> [igniteInstanceName=igniteStart, pubPoolSize=32, svcPoolSize=32,
> >> >> callbackPoolSize=32, stripedPoolSize=32, sysPoolSize=30,
> >> >> mgmtPoolSize=4, igfsPoolSize=32, dataStreamerPoolSize=32,
> >> >> utilityCachePoolSize=32, utilityCacheKeepAliveTime=60000,
> >> >> p2pPoolSize=2, qryPoolSize=32,
> >> >>
> igniteHome=/home/patrochandan01/ignite/apache-ignite-fabric-2.6.0-bin,
> >> >>
> igniteWorkDir=/home/patrochandan01/ignite/apache-ignite-fabric-2.6.0-bin/work,
> >> >> mbeanSrv=com.sun.jmx.mbeanserver.JmxMBeanServer@6f94fa3e,
> >> >> nodeId=53396cb7-1b66-43da-bf10-ebb5f7cc9693,
> >> >> marsh=org.apache.ignite.internal.binary.BinaryMarshaller@42b3b079,
> >> >> marshLocJobs=false, daemon=false, p2pEnabled=false, netTimeout=5000,
> >> >> sndRetryDelay=1000, sndRetryCnt=3, metricsHistSize=10000,
> >> >> metricsUpdateFreq=2000, metricsExpTime=9223372036854775807,
> >> >> discoSpi=TcpDiscoverySpi [addrRslvr=null, sockTimeout=0,
> ackTimeout=0,
> >> >> marsh=null, reconCnt=100, reconDelay=10000, maxAckTimeout=600000,
> >> >> forceSrvMode=false, clientReconnectDisabled=false,
> internalLsnr=null],
> >> >> segPlc=STOP, segResolveAttempts=2, waitForSegOnStart=true,
> >> >> allResolversPassReq=true, segChkFreq=10000,
> >> >> commSpi=TcpCommunicationSpi [connectGate=null, connPlc=null,
> >> >> enableForcibleNodeKill=false, enableTroubleshootingLog=false,
> >> >>
> srvLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi$2@6692b6c6
> ,
> >> >> locAddr=null, locHost=null, locPort=47100, locPortRange=100,
> >> >> shmemPort=-1, directBuf=true, directSndBuf=false,
> >> >> idleConnTimeout=600000, connTimeout=5000, maxConnTimeout=600000,
> >> >> reconCnt=10, sockSndBuf=32768, sockRcvBuf=32768, msgQueueLimit=1024,
> >> >> slowClientQueueLimit=1000, nioSrvr=null, shmemSrv=null,
> >> >> usePairedConnections=false, connectionsPerNode=1, tcpNoDelay=true,
> >> >> filterReachableAddresses=false, ackSndThreshold=32,
> >> >> unackedMsgsBufSize=0, sockWriteTimeout=2000, lsnr=null,
> >> >> boundTcpPort=-1, boundTcpShmemPort=-1, selectorsCnt=16,
> >> >> selectorSpins=0, addrRslvr=null,
> >> >> ctxInitLatch=java.util.concurrent.CountDownLatch@1cd629b3[Count =
> 1],
> >> >> stopping=false,
> >> >>
> metricsLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationMetricsListener@589da3f3
> ],
> >> >>
> evtSpi=org.apache.ignite.spi.eventstorage.NoopEventStorageSpi@39d76cb5,
> >> >> colSpi=NoopCollisionSpi [], deploySpi=LocalDeploymentSpi [lsnr=null],
> >> >>
> indexingSpi=org.apache.ignite.spi.indexing.noop.NoopIndexingSpi@1cb346ea,
> >> >> addrRslvr=null, clientMode=false, rebalanceThreadPoolSize=1,
> >> >>
> txCfg=org.apache.ignite.configuration.TransactionConfiguration@4c012563,
> >> >> cacheSanityCheckEnabled=true, discoStartupDelay=60000,
> >> >> deployMode=SHARED, p2pMissedCacheSize=100, locHost=null,
> >> >> timeSrvPortBase=31100, timeSrvPortRange=100,
> >> >> failureDetectionTimeout=10000, clientFailureDetectionTimeout=30000,
> >> >> metricsLogFreq=60000, hadoopCfg=null,
> >> >>
> connectorCfg=org.apache.ignite.configuration.ConnectorConfiguration@14a50707
> ,
> >> >> odbcCfg=null, warmupClos=null, atomicCfg=AtomicConfiguration
> >> >> [seqReserveSize=1000, cacheMode=PARTITIONED, backups=1, aff=null,
> >> >> grpName=null], classLdr=null, sslCtxFactory=null, platformCfg=null,
> >> >> binaryCfg=null, memCfg=null, pstCfg=null,
> >> >> dsCfg=DataStorageConfiguration [sysRegionInitSize=41943040,
> >> >> sysCacheMaxSize=104857600, pageSize=0, concLvl=25,
> >> >> dfltDataRegConf=DataRegionConfiguration [name=Default_Region,
> >> >> maxSize=20971520, initSize=15728640, swapPath=null,
> >> >> pageEvictionMode=RANDOM_2_LRU, evictionThreshold=0.9,
> >> >> emptyPagesPoolSize=100, metricsEnabled=false,
> >> >> metricsSubIntervalCount=5, metricsRateTimeInterval=60000,
> >> >> persistenceEnabled=false, checkpointPageBufSize=0], storagePath=null,
> >> >> checkpointFreq=180000, lockWaitTime=10000, checkpointThreads=4,
> >> >> checkpointWriteOrder=SEQUENTIAL, walHistSize=20, walSegments=10,
> >> >> walSegmentSize=67108864, walPath=db/wal,
> >> >> walArchivePath=db/wal/archive, metricsEnabled=false,
> walMode=LOG_ONLY,
> >> >> walTlbSize=131072, walBuffSize=0, walFlushFreq=2000,
> >> >> walFsyncDelay=1000, walRecordIterBuffSize=67108864,
> >> >> alwaysWriteFullPages=false,
> >> >>
> fileIOFactory=org.apache.ignite.internal.processors.cache.persistence.file.AsyncFileIOFactory@4bd31064
> ,
> >> >> metricsSubIntervalCnt=5, metricsRateTimeInterval=60000,
> >> >> walAutoArchiveAfterInactivity=-1, writeThrottlingEnabled=false,
> >> >> walCompactionEnabled=false], activeOnStart=true, autoActivation=true,
> >> >> longQryWarnTimeout=3000, sqlConnCfg=null,
> >> >> cliConnCfg=ClientConnectorConfiguration [host=null, port=10800,
> >> >> portRange=100, sockSndBufSize=0, sockRcvBufSize=0, tcpNoDelay=true,
> >> >> maxOpenCursorsPerConn=128, threadPoolSize=32, idleTimeout=0,
> >> >> jdbcEnabled=true, odbcEnabled=true, thinCliEnabled=true,
> >> >> sslEnabled=false, useIgniteSslCtxFactory=true, sslClientAuth=false,
> >> >> sslCtxFactory=null], authEnabled=false, failureHnd=null,
> >> >> commFailureRslvr=null]
> >> >>
> >> >>
> >> >>
> >> >> and error while connecting client:
> >> >>
> >> >> [2020-04-14T09:41:33,547][WARN
> >> >> ][grid-timeout-worker-#71%igniteStart%][TcpDiscoverySpi] Socket write
> >> >> has timed out (consider increasing 'sockTimeout' configuration
> >> >> property) [sockTimeout=5000, rmtAddr=/10.80.104.224:51856,
> >> >> rmtPort=51856, sockTimeout=5000]
> >> >>
> >> >> In server configuration we didn't define any socketTimeout, server
> >> >> might be throwing socket timeout not client. But It occurs for only
> >> >> one particular client and this server. Other web applications are
> able
> >> >> to connect with same server on our production environment.
> >> >>
> >> >> Thanks
> >> >>
> >> >> On Mon, Apr 13, 2020 at 8:09 PM Evgenii Zhuravlev
> >> >> <e.zhuravlev...@gmail.com> wrote:
> >> >> >
> >> >> > Hi,
> >> >> >
> >> >> > Can you share full logs from all nodes? I mean log files, not the
> console output.
> >> >> >
> >> >> > Evgenii
> >> >> >
> >> >> > вс, 12 апр. 2020 г. в 20:30, Rajan Ahlawat <
> rajan.ahla...@gmail.com>:
> >> >> >>
> >> >> >> ?
> >> >> >>
> >> >> >> On Thu, Apr 9, 2020 at 3:11 AM Rajan Ahlawat <
> rajan.ahla...@gmail.com> wrote:
> >> >> >> >
> >> >> >> > ---------- Forwarded message ---------
> >> >> >> > From: Rajan Ahlawat <rajan.ahla...@gmail.com>
> >> >> >> > Date: Thu, Apr 9, 2020 at 3:09 AM
> >> >> >> > Subject: org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi -
> Failed
> >> >> >> > to reconnect to cluster (will retry): class
> >> >> >> > o.a.i.IgniteCheckedException: Failed to deserialize object with
> given
> >> >> >> > class loader:
> org.springframework.boot.loader.LaunchedURLClassLoader
> >> >> >> > To: <user@ignite.apache.org>
> >> >> >> >
> >> >> >> >
> >> >> >> > Hi
> >> >> >> >
> >> >> >> > We suddenly started getting following exception on client side
> after
> >> >> >> > node running application got restarted:
> >> >> >> >
> >> >> >> > org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi - Failed to
> >> >> >> > reconnect to cluster (will retry): class
> o.a.i.IgniteCheckedException:
> >> >> >> > Failed to deserialize object with given class loader:
> >> >> >> > org.springframework.boot.loader.LaunchedURLClassLoader
> >> >> >> >
> >> >> >> > I see similar bug was raised here for version 2.7.0:
> >> >> >> > https://issues.apache.org/jira/browse/IGNITE-11730
> >> >> >> >
> >> >> >> > We are currently using version 2.6.0
> >> >> >> > Following is our tcpDiscoveryApi configurations:
> >> >> >> >
> >> >> >> > private void setDiscoverySpiConfig(IgniteConfiguration cfg) {
> >> >> >> >     TcpDiscoverySpi discoverySpi = new TcpDiscoverySpi();
> >> >> >> >
> >> >> >> >     setIpFinder(discoverySpi);
> >> >> >> >
>  
> discoverySpi.setNetworkTimeout(platformCachingConfiguration.getIgnite().getSocketTimeout());
> >> >> >> >
>  
> discoverySpi.setSocketTimeout(platformCachingConfiguration.getIgnite().getSocketTimeout());
> >> >> >> >
>  
> discoverySpi.setJoinTimeout(platformCachingConfiguration.getIgnite().getJoinTimeout());
> >> >> >> >
>  
> discoverySpi.setClientReconnectDisabled(platformCachingConfiguration.getIgnite().isClientReconnectDisabled());
> >> >> >> >
>  
> discoverySpi.setReconnectCount(platformCachingConfiguration.getIgnite().getReconnectCount());
> >> >> >> >
>  
> discoverySpi.setReconnectDelay(platformCachingConfiguration.getIgnite().getReconnectDelay());
> >> >> >> >
> >> >> >> >     cfg.setDiscoverySpi(discoverySpi);
> >> >> >> > }
> >> >> >> >
> >> >> >> > Its IPfinder config is
> >> >> >> >
> >> >> >> > private void setTcpIpFinder(TcpDiscoverySpi discoverySpi) {
> >> >> >> >     TcpDiscoveryVmIpFinder ipFinder = new
> TcpDiscoveryVmIpFinder();
> >> >> >> >
> >> >> >> >
>  ipFinder.setAddresses(platformCachingConfiguration.getIgnite().getNodes());
> >> >> >> >     discoverySpi.setIpFinder(ipFinder);
> >> >> >> > }
> >> >> >> >
> >> >> >> > We have tried every combination of timeouts, right now timeouts
> are
> >> >> >> > set at very hight value .
> >> >> >> >
> >> >> >> > (1) If we are having same bug mentioned for 2.7.0 version, but
> bug
> >> >> >> > desc says it occurs on server side, but we are getting exact
> same
> >> >> >> > stack trance in ClientImpl.java on client side.
> >> >> >> > (2) assuming it is same issues, is there a way to disable data
> bag
> >> >> >> > compression check, since upgrading both client and server
> version
> >> >> >> > would not be possible immediately.
> >> >> >> >
> >> >> >> > Thanks in advance.
>

Reply via email to