Nikita Amelchev created IGNITE-26152:
----------------------------------------
Summary: Fix node stop hang during snapshot creation.
Key: IGNITE-26152
URL: https://issues.apache.org/jira/browse/IGNITE-26152
Project: Ignite
Issue Type: Bug
Reporter: Nikita Amelchev
Assignee: Nikita Amelchev
The "Snapshots 1" suite hangs every 10th run. This issue can be reproduced
locally by running the `testCrdChangeDuringHandlerCompleteOnSnapshotCreate`
test (failure rate of about 1 in 5-15 runs). The cause is a deadlock between
the starting snapshot task and node shutdown process. Stack traces are provided
below:
{noformat}
"test-runner-#4327%snapshot.IgniteClusterSnapshotHandlerTest%" #4502 prio=5
os_prio=31 cpu=264.91ms elapsed=25.24s tid=0x00000003e5c67000 nid=0x3dc6b
waiting on condition [0x0000000171f65000]
java.lang.Thread.State: WAITING (parking)
at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
- parking to wait for <0x0000000597e84688> (a
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
at
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt([email protected]/AbstractQueuedSynchronizer.java:885)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued([email protected]/AbstractQueuedSynchronizer.java:917)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire([email protected]/AbstractQueuedSynchronizer.java:1240)
at
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock([email protected]/ReentrantReadWriteLock.java:959)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.onKernalStop0(GridEventStorageManager.java:250)
at
org.apache.ignite.internal.managers.GridManagerAdapter.onKernalStop(GridManagerAdapter.java:636)
at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:1766)
at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:1715)
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2300)
- locked <0x00000005986015a8> (a
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2120)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:315)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid0(GridAbstractTest.java:1587)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1555)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1649)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1621)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1613)
at
org.apache.ignite.internal.processors.cache.persistence.snapshot.AbstractSnapshotSelfTest.afterTestSnapshot(AbstractSnapshotSelfTest.java:275)
at
jdk.internal.reflect.NativeMethodAccessorImpl.invoke0([email protected]/Native
Method)
at
jdk.internal.reflect.NativeMethodAccessorImpl.invoke([email protected]/NativeMethodAccessorImpl.java:62)
at
jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke([email protected]/DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke([email protected]/Method.java:566)
at
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:33)
at
org.apache.ignite.testframework.junits.GridAbstractTest$6.run(GridAbstractTest.java:2486)
at java.lang.Thread.run([email protected]/Thread.java:829)
"disco-event-worker-#4473%snapshot.IgniteClusterSnapshotHandlerTest1%" #4653
prio=5 os_prio=31 cpu=2.82ms elapsed=25.03s tid=0x00000003e5cb8000 nid=0x3d15f
waiting on condition [0x000000039de7d000]
java.lang.Thread.State: WAITING (parking)
at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
at
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:323)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:181)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:144)
at
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.onDoneBeforeTopologyUnlock(IgniteSnapshotManager.java:2297)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onDone(GridDhtPartitionsExchangeFuture.java:2467)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onDone(GridDhtPartitionsExchangeFuture.java:163)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.exchangeFuture(GridCachePartitionExchangeManager.java:1681)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onDiscoveryEvent(GridCachePartitionExchangeManager.java:626)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$1.onEvent(GridCachePartitionExchangeManager.java:344)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager$DiscoveryListenerWrapper.onEvent(GridEventStorageManager.java:1455)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:900)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:885)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:356)
at
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:319)
at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3205)
at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3052)
at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
at java.lang.Thread.run([email protected]/Thread.java:829)
"exchange-worker-#4606%snapshot.IgniteClusterSnapshotHandlerTest0%" #4789
prio=5 os_prio=31 cpu=16.60ms elapsed=18.41s tid=0x000000012d250000 nid=0x20f5f
waiting on condition [0x000000039e8ba000]
java.lang.Thread.State: TIMED_WAITING (parking)
at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
at
java.util.concurrent.locks.LockSupport.parkNanos([email protected]/LockSupport.java:357)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:222)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:163)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.ExchangeLatchManager$CompletableLatch.await(ExchangeLatchManager.java:774)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.waitPartitionRelease(GridDhtPartitionsExchangeFuture.java:2011)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1646)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:1052)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body0(GridCachePartitionExchangeManager.java:3151)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2985)
at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
at java.lang.Thread.run([email protected]/Thread.java:829)
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)