[ 
https://issues.apache.org/jira/browse/IGNITE-7618?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Dmitriy Pavlov updated IGNITE-7618:
-----------------------------------
    Fix Version/s:     (was: 2.6)
                   2.7

> validateCache synchronously waits for state change
> --------------------------------------------------
>
>                 Key: IGNITE-7618
>                 URL: https://issues.apache.org/jira/browse/IGNITE-7618
>             Project: Ignite
>          Issue Type: Bug
>          Components: cache
>    Affects Versions: 2.4
>            Reporter: Alexey Goncharuk
>            Assignee: Ryabov Dmitrii
>            Priority: Major
>             Fix For: 2.7
>
>
> Currently, we call publicApiActiveState(waitForTransition=true) in 
> GridDhtTopologyFutureAdapter#validateCache. This is incorrect, because the 
> cluster state is updated from discovery thread, and mapping may be happening 
> on a previous topology version. We must capture the cluster active state flag 
> to the exchange future (I believe we already have all the mechanics for this 
> in ExchangeActions class) and validate the state in the exchange future 
> itself, without touching ClusterStateProcessor.
> Below is an example of a deadlock happened because of synchronous wait:
> {code}
> "db-checkpoint-thread-#12318%database.IgniteDbBaselineTopologySelfTest0%" 
> #15498 prio=5 os_prio=0 tid=0x00007fa173e80800 nid=0x3d08 waiting on 
> condition [0x00007fa2069a8000]
>    java.lang.Thread.State: WAITING (parking)
>       at sun.misc.Unsafe.park(Native Method)
>       - parking to wait for  <0x00000007abfc16e8> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
>       at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
>       at 
> java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:943)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.markCheckpointBegin(GridCacheDatabaseSharedManager.java:2991)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.doCheckpoint(GridCacheDatabaseSharedManager.java:2797)
>       at 
> org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.body(GridCacheDatabaseSharedManager.java:2722)
>       at 
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
>       at java.lang.Thread.run(Thread.java:745)
> "snapshot-scheduler-restats-#12202%database.IgniteDbBaselineTopologySelfTest0%"
>  #15332 prio=5 os_prio=0 tid=0x00007fa228143000 nid=0x3c65 waiting on 
> condition [0x00007fa2be919000]
>    java.lang.Thread.State: WAITING (parking)
>       at sun.misc.Unsafe.park(Native Method)
>       at java.util.concurrent.locks.LockSupport.park(LockSupport.java:304)
>       at 
> org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177)
>       at 
> org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140)
>       at 
> org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor.publicApiActiveState(GridClusterStateProcessor.java:194)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTopologyFutureAdapter.validateCache(GridDhtTopologyFutureAdapter.java:83)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.colocated.GridDhtColocatedLockFuture.mapOnTopology(GridDhtColocatedLockFuture.java:787)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.colocated.GridDhtColocatedLockFuture.map(GridDhtColocatedLockFuture.java:763)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.colocated.GridDhtColocatedCache.lockAllAsync(GridDhtColocatedCache.java:646)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.GridDistributedCacheAdapter.txLockAsync(GridDistributedCacheAdapter.java:109)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.near.GridNearTxLocal.getAllAsync(GridNearTxLocal.java:1723)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.colocated.GridDhtColocatedCache$4.op(GridDhtColocatedCache.java:197)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheAdapter$AsyncOp.op(GridCacheAdapter.java:5140)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.asyncOp(GridCacheAdapter.java:4275)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.colocated.GridDhtColocatedCache.getAsync(GridDhtColocatedCache.java:195)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.get0(GridCacheAdapter.java:4565)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.get(GridCacheAdapter.java:4546)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.get(GridCacheAdapter.java:1347)
>       at 
> org.gridgain.grid.internal.processors.cache.database.snapshot.schedule.SnapshotScheduleProcessor$2.run(SnapshotScheduleProcessor.java:174)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> "sys-#12350%database.IgniteDbBaselineTopologySelfTest0%" #15530 prio=5 
> os_prio=0 tid=0x00007fa28801a800 nid=0x3d28 waiting on condition 
> [0x00007fa1ffefd000]
>    java.lang.Thread.State: WAITING (parking)
>       at sun.misc.Unsafe.park(Native Method)
>       - parking to wait for  <0x00000007aae59b70> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
>       at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
>       at 
> java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:943)
>       at 
> org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock0(StripedCompositeReadWriteLock.java:154)
>       at 
> org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock(StripedCompositeReadWriteLock.java:123)
>       at 
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.update(GridDhtPartitionTopologyImpl.java:1631)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.processSinglePartitionUpdate(GridCachePartitionExchangeManager.java:1521)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.access$1000(GridCachePartitionExchangeManager.java:133)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$2.onMessage(GridCachePartitionExchangeManager.java:332)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$2.onMessage(GridCachePartitionExchangeManager.java:312)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$MessageHandler.apply(GridCachePartitionExchangeManager.java:2689)
>       at 
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$MessageHandler.apply(GridCachePartitionExchangeManager.java:2668)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1060)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:579)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:378)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:304)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:99)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:293)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1555)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1183)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager.access$4200(GridIoManager.java:126)
>       at 
> org.apache.ignite.internal.managers.communication.GridIoManager$9.run(GridIoManager.java:1090)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> "main" #1 prio=5 os_prio=0 tid=0x00007fa32800c000 nid=0x7a72 waiting on 
> condition [0x00007fa330fa4000]
>    java.lang.Thread.State: TIMED_WAITING (parking)
>       at sun.misc.Unsafe.park(Native Method)
>       - parking to wait for  <0x00000007ac72e078> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
>       at 
> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireNanos(AbstractQueuedSynchronizer.java:934)
>       at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireNanos(AbstractQueuedSynchronizer.java:1247)
>       at 
> java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.tryLock(ReentrantReadWriteLock.java:1115)
>       at 
> org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.tryLock(StripedCompositeReadWriteLock.java:201)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.writeLock(GridCacheIoManager.java:509)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheIoManager.onKernalStop0(GridCacheIoManager.java:543)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheSharedManagerAdapter.onKernalStop(GridCacheSharedManagerAdapter.java:120)
>       at 
> org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStop(GridCacheProcessor.java:961)
>       at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2163)
>       at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2111)
>       at 
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2545)
>       - locked <0x00000007ac72e160> (a 
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
>       at 
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2508)
>       at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:365)
>       at org.apache.ignite.Ignition.stop(Ignition.java:224)
>       at 
> org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1026)
>       at 
> org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1069)
>       at 
> org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1047)
>       at 
> org.gridgain.grid.internal.processors.cache.database.IgniteDbBaselineTopologySelfTest.afterTest(IgniteDbBaselineTopologySelfTest.java:90)
>       at 
> org.apache.ignite.testframework.junits.GridAbstractTest.tearDown(GridAbstractTest.java:1623)
>       at 
> org.apache.ignite.testframework.junits.common.GridCommonAbstractTest.tearDown(GridCommonAbstractTest.java:491)
>       at junit.framework.TestCase.runBare(TestCase.java:146)
>       at junit.framework.TestResult$1.protect(TestResult.java:122)
>       at junit.framework.TestResult.runProtected(TestResult.java:142)
>       at junit.framework.TestResult.run(TestResult.java:125)
>       at junit.framework.TestCase.run(TestCase.java:129)
>       at junit.framework.TestSuite.runTest(TestSuite.java:255)
>       at junit.framework.TestSuite.run(TestSuite.java:250)
>       at 
> org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:84)
>       at org.junit.runner.JUnitCore.run(JUnitCore.java:160)
>       at 
> com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:117)
>       at 
> com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:52)
>       at 
> com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:253)
>       at 
> com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:84)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:498)
>       at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to