Rebalancing Errors
Hello Folks The attempts to partition rebalancing in my ignite clusters fail with the following errors. Any help would be greatly appreciated. I am using ignite 2.8.1. [2021-02-02 19:57:03,428][WARN ][tcp-disco-msg-worker-[ef238b3f 10.244.69.14:47500]-#2][root] Possible failure suppressed accordingly to a configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=SYSTEM_WORKER_BLOCKED, err=class o.a.i.IgniteException: GridWorker [name=partition-exchanger, igniteInstanceName=null, finished=false, heartbeatTs=1612295797718]]] class org.apache.ignite.IgniteException: GridWorker [name=partition-exchanger, igniteInstanceName=null, finished=false, heartbeatTs=1612295797718] at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance$3.apply(IgnitionEx.java:1810) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance$3.apply(IgnitionEx.java:1805) at org.apache.ignite.internal.worker.WorkersRegistry.onIdle(WorkersRegistry.java:234) at org.apache.ignite.internal.util.worker.GridWorker.onIdle(GridWorker.java:297) at org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.lambda$new$0(ServerImpl.java:2858) at org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorker.body(ServerImpl.java:7759) at org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2946) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120) at org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerThread.body(ServerImpl.java:7697) at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:61) [2021-02-02 19:57:03,437][WARN ][tcp-disco-msg-worker-[ef238b3f 10.244.69.14:47500]-#2][CacheDiagnosticManager] Page locks dump: Thread=[name=checkpoint-runner-#57, id=107], state=WAITING Locked pages = [] Locked pages log: name=checkpoint-runner-#57 time=(1612295823428, 2021-02-02 19:57:03.428) .. . [2021-02-02 19:58:43,446][WARN ][grid-timeout-worker-#23][G] >>> Possible starvation in striped pool. Thread name: sys-stripe-0-#1 Queue: [Message closure [msg=GridIoMessage [plc=2, topic=TOPIC_CACHE, topicOrd=8, ordered=false, timeout=0, skipOnTimeout=false, msg=CacheContinuousQueryBatchAck [routineId=d5b79564-d336-4b10-895f-f3f59651754c, updateCntrs=HashMap {713=691} Deadlock: false Completed: 5 Thread [name="sys-stripe-0-#1", id=13, state=WAITING, blockCnt=0, waitCnt=8] Lock [object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@1c908a6f, ownerName=null, ownerId=-1] at java.base@11.0.9.1/jdk.internal.misc.Unsafe.park(Native Method) at java.base@11.0.9.1/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:885) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:1009) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1324) at java.base@11.0.9.1/java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:738) at app//o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1631) at app//o.a.i.i.processors.cache.transactions.IgniteTxManager.lockMultiple(IgniteTxManager.java:1906) at app//o.a.i.i.processors.cache.transactions.IgniteTxManager.prepareTx(IgniteTxManager.java:1136) at app//o.a.i.i.processors.cache.distributed.GridDistributedTxRemoteAdapter.prepareRemoteTx(GridDistributedTxRemoteAdapter.java:432) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.startRemoteTx(IgniteTxHandler.java:1858) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.processDhtTxPrepareRequest(IgniteTxHandler.java:1201) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.access$400(IgniteTxHandler.java:123) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler$5.apply(IgniteTxHandler.java:229) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler$5.apply(IgniteTxHandler.java:227) at app//o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1142) at app//o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591) at app//o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392) at app//o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318) at app//o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109) at
Partition Rebalancing Error
Hello Folks The attempts to partition rebalancing in my ignite clusters fail with the following errors. Any help would be greatly appreciated. I am using ignite 2.8.1. [2021-02-02 19:57:03,428][WARN ][tcp-disco-msg-worker-[ef238b3f 10.244.69.14:47500]-#2][root] Possible failure suppressed accordingly to a configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=SYSTEM_WORKER_BLOCKED, err=class o.a.i.IgniteException: GridWorker [name=partition-exchanger, igniteInstanceName=null, finished=false, heartbeatTs=1612295797718]]] class org.apache.ignite.IgniteException: GridWorker [name=partition-exchanger, igniteInstanceName=null, finished=false, heartbeatTs=1612295797718] at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance$3.apply(IgnitionEx.java:1810) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance$3.apply(IgnitionEx.java:1805) at org.apache.ignite.internal.worker.WorkersRegistry.onIdle(WorkersRegistry.java:234) at org.apache.ignite.internal.util.worker.GridWorker.onIdle(GridWorker.java:297) at org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.lambda$new$0(ServerImpl.java:2858) at org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorker.body(ServerImpl.java:7759) at org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2946) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120) at org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerThread.body(ServerImpl.java:7697) at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:61) [2021-02-02 19:57:03,437][WARN ][tcp-disco-msg-worker-[ef238b3f 10.244.69.14:47500]-#2][CacheDiagnosticManager] Page locks dump: Thread=[name=checkpoint-runner-#57, id=107], state=WAITING Locked pages = [] Locked pages log: name=checkpoint-runner-#57 time=(1612295823428, 2021-02-02 19:57:03.428) .. . [2021-02-02 19:58:43,446][WARN ][grid-timeout-worker-#23][G] >>> Possible starvation in striped pool. Thread name: sys-stripe-0-#1 Queue: [Message closure [msg=GridIoMessage [plc=2, topic=TOPIC_CACHE, topicOrd=8, ordered=false, timeout=0, skipOnTimeout=false, msg=CacheContinuousQueryBatchAck [routineId=d5b79564-d336-4b10-895f-f3f59651754c, updateCntrs=HashMap {713=691} Deadlock: false Completed: 5 Thread [name="sys-stripe-0-#1", id=13, state=WAITING, blockCnt=0, waitCnt=8] Lock [object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@1c908a6f, ownerName=null, ownerId=-1] at java.base@11.0.9.1/jdk.internal.misc.Unsafe.park(Native Method) at java.base@11.0.9.1/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:885) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:1009) at java.base@11.0.9.1/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1324) at java.base@11.0.9.1/java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:738) at app//o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1631) at app//o.a.i.i.processors.cache.transactions.IgniteTxManager.lockMultiple(IgniteTxManager.java:1906) at app//o.a.i.i.processors.cache.transactions.IgniteTxManager.prepareTx(IgniteTxManager.java:1136) at app//o.a.i.i.processors.cache.distributed.GridDistributedTxRemoteAdapter.prepareRemoteTx(GridDistributedTxRemoteAdapter.java:432) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.startRemoteTx(IgniteTxHandler.java:1858) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.processDhtTxPrepareRequest(IgniteTxHandler.java:1201) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler.access$400(IgniteTxHandler.java:123) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler$5.apply(IgniteTxHandler.java:229) at app//o.a.i.i.processors.cache.transactions.IgniteTxHandler$5.apply(IgniteTxHandler.java:227) at app//o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1142) at app//o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591) at app//o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392) at app//o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318) at app//o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109) at