Nikita Amelchev created IGNITE-26152:
----------------------------------------

             Summary: Fix node stop hang during snapshot creation.
                 Key: IGNITE-26152
                 URL: https://issues.apache.org/jira/browse/IGNITE-26152
             Project: Ignite
          Issue Type: Bug
            Reporter: Nikita Amelchev
            Assignee: Nikita Amelchev


The "Snapshots 1" suite hangs every 10th run. This issue can be reproduced 
locally by running the `testCrdChangeDuringHandlerCompleteOnSnapshotCreate` 
test (failure rate of about 1 in 5-15 runs). The cause is a deadlock between 
the starting snapshot task and node shutdown process. Stack traces are provided 
below:


{noformat}
"test-runner-#4327%snapshot.IgniteClusterSnapshotHandlerTest%" #4502 prio=5 
os_prio=31 cpu=264.91ms elapsed=25.24s tid=0x00000003e5c67000 nid=0x3dc6b 
waiting on condition  [0x0000000171f65000]
   java.lang.Thread.State: WAITING (parking)
        at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
        - parking to wait for  <0x0000000597e84688> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
        at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt([email protected]/AbstractQueuedSynchronizer.java:885)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued([email protected]/AbstractQueuedSynchronizer.java:917)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire([email protected]/AbstractQueuedSynchronizer.java:1240)
        at 
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock([email protected]/ReentrantReadWriteLock.java:959)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.onKernalStop0(GridEventStorageManager.java:250)
        at 
org.apache.ignite.internal.managers.GridManagerAdapter.onKernalStop(GridManagerAdapter.java:636)
        at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:1766)
        at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:1715)
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2300)
        - locked <0x00000005986015a8> (a 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2120)
        at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:315)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid0(GridAbstractTest.java:1587)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1555)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1649)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1621)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1613)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.AbstractSnapshotSelfTest.afterTestSnapshot(AbstractSnapshotSelfTest.java:275)
        at 
jdk.internal.reflect.NativeMethodAccessorImpl.invoke0([email protected]/Native 
Method)
        at 
jdk.internal.reflect.NativeMethodAccessorImpl.invoke([email protected]/NativeMethodAccessorImpl.java:62)
        at 
jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke([email protected]/DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke([email protected]/Method.java:566)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:33)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest$6.run(GridAbstractTest.java:2486)
        at java.lang.Thread.run([email protected]/Thread.java:829)

"disco-event-worker-#4473%snapshot.IgniteClusterSnapshotHandlerTest1%" #4653 
prio=5 os_prio=31 cpu=2.82ms elapsed=25.03s tid=0x00000003e5cb8000 nid=0x3d15f 
waiting on condition  [0x000000039de7d000]
   java.lang.Thread.State: WAITING (parking)
        at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
        at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:323)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:181)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:144)
        at 
org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.onDoneBeforeTopologyUnlock(IgniteSnapshotManager.java:2297)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onDone(GridDhtPartitionsExchangeFuture.java:2467)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onDone(GridDhtPartitionsExchangeFuture.java:163)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.exchangeFuture(GridCachePartitionExchangeManager.java:1681)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onDiscoveryEvent(GridCachePartitionExchangeManager.java:626)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$1.onEvent(GridCachePartitionExchangeManager.java:344)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager$DiscoveryListenerWrapper.onEvent(GridEventStorageManager.java:1455)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:900)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:885)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:356)
        at 
org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:319)
        at 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3205)
        at 
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3052)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
        at java.lang.Thread.run([email protected]/Thread.java:829)

"exchange-worker-#4606%snapshot.IgniteClusterSnapshotHandlerTest0%" #4789 
prio=5 os_prio=31 cpu=16.60ms elapsed=18.41s tid=0x000000012d250000 nid=0x20f5f 
waiting on condition  [0x000000039e8ba000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
        at 
java.util.concurrent.locks.LockSupport.parkNanos([email protected]/LockSupport.java:357)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:222)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:163)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.ExchangeLatchManager$CompletableLatch.await(ExchangeLatchManager.java:774)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.waitPartitionRelease(GridDhtPartitionsExchangeFuture.java:2011)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1646)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:1052)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body0(GridCachePartitionExchangeManager.java:3151)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2985)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125)
        at java.lang.Thread.run([email protected]/Thread.java:829)

{noformat}




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to