[ https://issues.apache.org/jira/browse/IGNITE-12594?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Anton Kalashnikov reassigned IGNITE-12594: ------------------------------------------ Assignee: Sergey Chugunov (was: Anton Kalashnikov) > Deadlock between GridCacheDataStore#purgeExpiredInternal and > GridNearTxLocal#enlistWriteEntry > --------------------------------------------------------------------------------------------- > > Key: IGNITE-12594 > URL: https://issues.apache.org/jira/browse/IGNITE-12594 > Project: Ignite > Issue Type: Bug > Reporter: Anton Kalashnikov > Assignee: Sergey Chugunov > Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > The deadlock is reproduced occasionally in PDS3 suite and can be seen in the > thread dump below. > One thread attempts to unwind evicts, acquires checkpoint read lock and then > locks {{GridCacheMapEntry}}. Another thread does > {{GridCacheMapEntry#unswap}}, determines that the entry is expired and > acquires checkpoint read lock to remove the entry from the store. > We should not acquire checkpoint read lock inside of a locked > {{GridCacheMapEntry}}. > {code:java}Thread [name="updater-1", id=29900, state=WAITING, blockCnt=2, > waitCnt=4450] > Lock > [object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@2fc51685, > ownerName=null, ownerId=-1] > at sun.misc.Unsafe.park(Native Method) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283) > at > java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727) > at > o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1632) > <- CP read lock > at > o.a.i.i.processors.cache.GridCacheMapEntry.onExpired(GridCacheMapEntry.java:4081) > at > o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:559) > at > o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:519) > <- locked entry > at > o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWriteEntry(GridNearTxLocal.java:1437) > at > o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWrite(GridNearTxLocal.java:1303) > at > o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync0(GridNearTxLocal.java:957) > at > o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync(GridNearTxLocal.java:491) > at > o.a.i.i.processors.cache.GridCacheAdapter$29.inOp(GridCacheAdapter.java:2526) > at > o.a.i.i.processors.cache.GridCacheAdapter$SyncInOp.op(GridCacheAdapter.java:4727) > at > o.a.i.i.processors.cache.GridCacheAdapter.syncOp(GridCacheAdapter.java:3740) > at > o.a.i.i.processors.cache.GridCacheAdapter.putAll0(GridCacheAdapter.java:2524) > at > o.a.i.i.processors.cache.GridCacheAdapter.putAll(GridCacheAdapter.java:2513) > at > o.a.i.i.processors.cache.IgniteCacheProxyImpl.putAll(IgniteCacheProxyImpl.java:1264) > at > o.a.i.i.processors.cache.GatewayProtectedCacheProxy.putAll(GatewayProtectedCacheProxy.java:863) > at > o.a.i.i.processors.cache.persistence.IgnitePdsContinuousRestartTest$1.call(IgnitePdsContinuousRestartTest.java:291) > at o.a.i.testframework.GridTestThread.run(GridTestThread.java:83) > Locked synchronizers: > java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7 > Thread > [name="sys-stripe-0-#24086%persistence.IgnitePdsContinuousRestartTestWithExpiryPolicy0%", > id=29617, state=WAITING, blockCnt=2, waitCnt=65381] > Lock > [object=java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7, > ownerName=updater-1, ownerId=29900] > at sun.misc.Unsafe.park(Native Method) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) > at > java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) > at > java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) > <- lock entry > at > o.a.i.i.processors.cache.GridCacheMapEntry.lockEntry(GridCacheMapEntry.java:5017) > at > o.a.i.i.processors.cache.GridCacheMapEntry.markObsoleteVersion(GridCacheMapEntry.java:2799) > at > o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.removeVersionedEntry(GridDhtLocalPartition.java:392) > at > o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.cleanupRemoveQueue(GridDhtLocalPartition.java:416) > at > o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.onDeferredDelete(GridDhtLocalPartition.java:441) > at > o.a.i.i.processors.cache.distributed.dht.GridDhtCacheAdapter.onDeferredDelete(GridDhtCacheAdapter.java:1696) > at > o.a.i.i.processors.cache.GridCacheContext.onDeferredDelete(GridCacheContext.java:1710) > at > o.a.i.i.processors.cache.GridCacheMapEntry.onTtlExpired(GridCacheMapEntry.java:4037) > at > o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:75) > at > o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:66) > at > o.a.i.i.util.lang.IgniteInClosure2X.apply(IgniteInClosure2X.java:37) > at > o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpiredInternal(GridCacheOffheapManager.java:2725) > <- CP read lock > at > o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpired(GridCacheOffheapManager.java:2651) > at > o.a.i.i.processors.cache.persistence.GridCacheOffheapManager.expire(GridCacheOffheapManager.java:1047) > at > o.a.i.i.processors.cache.GridCacheTtlManager.expire(GridCacheTtlManager.java:242) > at > o.a.i.i.processors.cache.GridCacheUtils.unwindEvicts(GridCacheUtils.java:874) > at > o.a.i.i.processors.cache.transactions.IgniteTxStateImpl.unwindEvicts(IgniteTxStateImpl.java:106) > at > o.a.i.i.processors.cache.GridCacheIoManager.onMessageProcessed(GridCacheIoManager.java:1182) > at > o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1161) > at > o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591) > at > o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392) > at > o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318) > at > o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109) > at > o.a.i.i.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:308) > at > o.a.i.i.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1607) > at > o.a.i.i.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1231) > at > o.a.i.i.managers.communication.GridIoManager.access$4300(GridIoManager.java:132) > at > o.a.i.i.managers.communication.GridIoManager$8.run(GridIoManager.java:1124) > at o.a.i.i.util.StripedExecutor$Stripe.body(StripedExecutor.java:559) > at o.a.i.i.util.worker.GridWorker.run(GridWorker.java:119) > at java.lang.Thread.run(Thread.java:748){code} > Reproduced by PDS 3 > [https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=<default>|https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=%3Cdefault%3E] -- This message was sent by Atlassian Jira (v8.3.4#803005)