[ 
https://issues.apache.org/jira/browse/GEODE-9887?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jakov Varenina updated GEODE-9887:
----------------------------------
    Description: 
See deadlock in below logs:

1. "Distributed system shutdown hook" takes lock 0x00000000c445e988, initiate 
"ConcurrentParallelGatewaySenderEventProcessor Stopper Thread" threads and 
waits for them to finish.

2. "ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" set flag 
AckReaderThread.shutdown to true and wait for shutdown to finish by joining 
threads for max 15 seconds.

3. "AckReaderThread for : Event Processor for GatewaySender_sender1_4" thread 
waits for the lock 0x00000000c445e988 owned by "Distributed system shutdown 
hook"  thread

 
{code:java}
"Distributed system shutdown hook" #14 prio=5 os_prio=0 cpu=20.78ms 
elapsed=11.33s tid=0x00007f848c005000 nid=0x1e04 waiting on condition  
[0x00007f83ec415000]
   java.lang.Thread.State: WAITING (parking)
        at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
        - parking to wait for  <0x00000000fcc00e50> (a 
java.util.concurrent.FutureTask)
        at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
        at 
java.util.concurrent.FutureTask.awaitDone([email protected]/FutureTask.java:447)
        at 
java.util.concurrent.FutureTask.get([email protected]/FutureTask.java:190)
        at 
java.util.concurrent.AbstractExecutorService.invokeAll([email protected]/AbstractExecutorService.java:247)
        at 
org.apache.geode.internal.cache.wan.parallel.ConcurrentParallelGatewaySenderEventProcessor.stopProcessing(ConcurrentParallelGatewaySenderEventProcessor.java:258)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySender.stopProcessing(AbstractGatewaySender.java:726)
        at 
org.apache.geode.internal.cache.wan.parallel.ParallelGatewaySenderImpl.stop(ParallelGatewaySenderImpl.java:118)
        at 
org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2165)
        - locked <0x00000000c11a7400> (a java.lang.Class for 
org.apache.geode.internal.cache.GemFireCacheImpl)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1559)
        - locked <0x00000000c11a7400> (a java.lang.Class for 
org.apache.geode.internal.cache.GemFireCacheImpl)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem.lambda$static$7(InternalDistributedSystem.java:2202)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem$$Lambda$110/0x0000000100226840.run(Unknown
 Source)
        at java.lang.Thread.run([email protected]/Thread.java:829)
   Locked ownable synchronizers:
        - <0x00000000c445e988> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)



"AckReaderThread for : Event Processor for GatewaySender_sender1_4" #402 daemon 
prio=5 os_prio=0 cpu=3168.26ms elapsed=640.74s tid=0x00007f8434023000 
nid=0x1181 waiting on condition  [0x00007f83eda2b000]
   java.lang.Thread.State: WAITING (parking)
    at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
    - parking to wait for  <0x00000000c445e988> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
    at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
   at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt([email protected]/AbstractQueuedSynchronizer.java:885)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued([email protected]/AbstractQueuedSynchronizer.java:917)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire([email protected]/AbstractQueuedSynchronizer.java:1240)
    at 
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock([email protected]/ReentrantReadWriteLock.java:959)
    at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.run(GatewaySenderEventRemoteDispatcher.java:665)
  Locked ownable synchronizers:
    - None




"ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" #872 daemon 
prio=5 os_prio=0 cpu=1.39ms elapsed=14.09s tid=0x00007f849801a000 nid=0x1e13 in 
Object.wait()  [0x00007f849c442000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
        at java.lang.Object.wait([email protected]/Native Method)
        - waiting on <no object reference available>
        at java.lang.Thread.join([email protected]/Thread.java:1308)
        - waiting to re-lock in wait() <0x00000000c542ce20> (a 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.shutdown(GatewaySenderEventRemoteDispatcher.java:771)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stopAckReaderThread(GatewaySenderEventRemoteDispatcher.java:802)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stop(GatewaySenderEventRemoteDispatcher.java:826)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor.stopProcessing(AbstractGatewaySenderEventProcessor.java:1222)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1399)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1387)
        at 
java.util.concurrent.FutureTask.run([email protected]/FutureTask.java:264)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker([email protected]/ThreadPoolExecutor.java:1128)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run([email protected]/ThreadPoolExecutor.java:628)
        at java.lang.Thread.run([email protected]/Thread.java:829)   Locked 
ownable synchronizers:
        - <0x00000000fcf4daa8> (a 
java.util.concurrent.ThreadPoolExecutor$Worker)
{code}
 

  was:
See in below logs:

1. "Distributed system shutdown hook" takes lock 0x00000000c445e988, initiate 
"ConcurrentParallelGatewaySenderEventProcessor Stopper Thread" threads and 
waits for them to finish.

2. "ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" set flag 
AckReaderThread.shutdown to true and wait for shutdown to finish by joining 
threads for max 15 seconds.

3. "AckReaderThread for : Event Processor for GatewaySender_sender1_4" thread 
waits for the lock 0x00000000c445e988 owned by "Distributed system shutdown 
hook"  thread

 
{code:java}
"Distributed system shutdown hook" #14 prio=5 os_prio=0 cpu=20.78ms 
elapsed=11.33s tid=0x00007f848c005000 nid=0x1e04 waiting on condition  
[0x00007f83ec415000]
   java.lang.Thread.State: WAITING (parking)
        at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
        - parking to wait for  <0x00000000fcc00e50> (a 
java.util.concurrent.FutureTask)
        at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
        at 
java.util.concurrent.FutureTask.awaitDone([email protected]/FutureTask.java:447)
        at 
java.util.concurrent.FutureTask.get([email protected]/FutureTask.java:190)
        at 
java.util.concurrent.AbstractExecutorService.invokeAll([email protected]/AbstractExecutorService.java:247)
        at 
org.apache.geode.internal.cache.wan.parallel.ConcurrentParallelGatewaySenderEventProcessor.stopProcessing(ConcurrentParallelGatewaySenderEventProcessor.java:258)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySender.stopProcessing(AbstractGatewaySender.java:726)
        at 
org.apache.geode.internal.cache.wan.parallel.ParallelGatewaySenderImpl.stop(ParallelGatewaySenderImpl.java:118)
        at 
org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2165)
        - locked <0x00000000c11a7400> (a java.lang.Class for 
org.apache.geode.internal.cache.GemFireCacheImpl)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1559)
        - locked <0x00000000c11a7400> (a java.lang.Class for 
org.apache.geode.internal.cache.GemFireCacheImpl)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem.lambda$static$7(InternalDistributedSystem.java:2202)
        at 
org.apache.geode.distributed.internal.InternalDistributedSystem$$Lambda$110/0x0000000100226840.run(Unknown
 Source)
        at java.lang.Thread.run([email protected]/Thread.java:829)
   Locked ownable synchronizers:
        - <0x00000000c445e988> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)



"AckReaderThread for : Event Processor for GatewaySender_sender1_4" #402 daemon 
prio=5 os_prio=0 cpu=3168.26ms elapsed=640.74s tid=0x00007f8434023000 
nid=0x1181 waiting on condition  [0x00007f83eda2b000]
   java.lang.Thread.State: WAITING (parking)
    at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
    - parking to wait for  <0x00000000c445e988> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
    at 
java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
   at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt([email protected]/AbstractQueuedSynchronizer.java:885)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued([email protected]/AbstractQueuedSynchronizer.java:917)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire([email protected]/AbstractQueuedSynchronizer.java:1240)
    at 
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock([email protected]/ReentrantReadWriteLock.java:959)
    at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.run(GatewaySenderEventRemoteDispatcher.java:665)
  Locked ownable synchronizers:
    - None




"ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" #872 daemon 
prio=5 os_prio=0 cpu=1.39ms elapsed=14.09s tid=0x00007f849801a000 nid=0x1e13 in 
Object.wait()  [0x00007f849c442000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
        at java.lang.Object.wait([email protected]/Native Method)
        - waiting on <no object reference available>
        at java.lang.Thread.join([email protected]/Thread.java:1308)
        - waiting to re-lock in wait() <0x00000000c542ce20> (a 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.shutdown(GatewaySenderEventRemoteDispatcher.java:771)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stopAckReaderThread(GatewaySenderEventRemoteDispatcher.java:802)
        at 
org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stop(GatewaySenderEventRemoteDispatcher.java:826)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor.stopProcessing(AbstractGatewaySenderEventProcessor.java:1222)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1399)
        at 
org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1387)
        at 
java.util.concurrent.FutureTask.run([email protected]/FutureTask.java:264)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker([email protected]/ThreadPoolExecutor.java:1128)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run([email protected]/ThreadPoolExecutor.java:628)
        at java.lang.Thread.run([email protected]/Thread.java:829)   Locked 
ownable synchronizers:
        - <0x00000000fcf4daa8> (a 
java.util.concurrent.ThreadPoolExecutor$Worker)
{code}
 


> Deadlock when shutting down gws threads unecessary delay shutdown of server
> ---------------------------------------------------------------------------
>
>                 Key: GEODE-9887
>                 URL: https://issues.apache.org/jira/browse/GEODE-9887
>             Project: Geode
>          Issue Type: Bug
>          Components: wan
>            Reporter: Jakov Varenina
>            Assignee: Jakov Varenina
>            Priority: Major
>              Labels: pull-request-available
>
> See deadlock in below logs:
> 1. "Distributed system shutdown hook" takes lock 0x00000000c445e988, initiate 
> "ConcurrentParallelGatewaySenderEventProcessor Stopper Thread" threads and 
> waits for them to finish.
> 2. "ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" set flag 
> AckReaderThread.shutdown to true and wait for shutdown to finish by joining 
> threads for max 15 seconds.
> 3. "AckReaderThread for : Event Processor for GatewaySender_sender1_4" thread 
> waits for the lock 0x00000000c445e988 owned by "Distributed system shutdown 
> hook"  thread
>  
> {code:java}
> "Distributed system shutdown hook" #14 prio=5 os_prio=0 cpu=20.78ms 
> elapsed=11.33s tid=0x00007f848c005000 nid=0x1e04 waiting on condition  
> [0x00007f83ec415000]
>    java.lang.Thread.State: WAITING (parking)
>         at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
>         - parking to wait for  <0x00000000fcc00e50> (a 
> java.util.concurrent.FutureTask)
>         at 
> java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
>         at 
> java.util.concurrent.FutureTask.awaitDone([email protected]/FutureTask.java:447)
>         at 
> java.util.concurrent.FutureTask.get([email protected]/FutureTask.java:190)
>         at 
> java.util.concurrent.AbstractExecutorService.invokeAll([email protected]/AbstractExecutorService.java:247)
>         at 
> org.apache.geode.internal.cache.wan.parallel.ConcurrentParallelGatewaySenderEventProcessor.stopProcessing(ConcurrentParallelGatewaySenderEventProcessor.java:258)
>         at 
> org.apache.geode.internal.cache.wan.AbstractGatewaySender.stopProcessing(AbstractGatewaySender.java:726)
>         at 
> org.apache.geode.internal.cache.wan.parallel.ParallelGatewaySenderImpl.stop(ParallelGatewaySenderImpl.java:118)
>         at 
> org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2165)
>         - locked <0x00000000c11a7400> (a java.lang.Class for 
> org.apache.geode.internal.cache.GemFireCacheImpl)
>         at 
> org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1559)
>         - locked <0x00000000c11a7400> (a java.lang.Class for 
> org.apache.geode.internal.cache.GemFireCacheImpl)
>         at 
> org.apache.geode.distributed.internal.InternalDistributedSystem.lambda$static$7(InternalDistributedSystem.java:2202)
>         at 
> org.apache.geode.distributed.internal.InternalDistributedSystem$$Lambda$110/0x0000000100226840.run(Unknown
>  Source)
>         at java.lang.Thread.run([email protected]/Thread.java:829)
>    Locked ownable synchronizers:
>         - <0x00000000c445e988> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
> "AckReaderThread for : Event Processor for GatewaySender_sender1_4" #402 
> daemon prio=5 os_prio=0 cpu=3168.26ms elapsed=640.74s tid=0x00007f8434023000 
> nid=0x1181 waiting on condition  [0x00007f83eda2b000]
>    java.lang.Thread.State: WAITING (parking)
>     at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
>     - parking to wait for  <0x00000000c445e988> (a 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
>     at 
> java.util.concurrent.locks.LockSupport.park([email protected]/LockSupport.java:194)
>    at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt([email protected]/AbstractQueuedSynchronizer.java:885)
>     at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued([email protected]/AbstractQueuedSynchronizer.java:917)
>     at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire([email protected]/AbstractQueuedSynchronizer.java:1240)
>     at 
> java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock([email protected]/ReentrantReadWriteLock.java:959)
>     at 
> org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.run(GatewaySenderEventRemoteDispatcher.java:665)
>   Locked ownable synchronizers:
>     - None
> "ConcurrentParallelGatewaySenderEventProcessor Stopper Thread5" #872 daemon 
> prio=5 os_prio=0 cpu=1.39ms elapsed=14.09s tid=0x00007f849801a000 nid=0x1e13 
> in Object.wait()  [0x00007f849c442000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
>         at java.lang.Object.wait([email protected]/Native Method)
>         - waiting on <no object reference available>
>         at java.lang.Thread.join([email protected]/Thread.java:1308)
>         - waiting to re-lock in wait() <0x00000000c542ce20> (a 
> org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread)
>         at 
> org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher$AckReaderThread.shutdown(GatewaySenderEventRemoteDispatcher.java:771)
>         at 
> org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stopAckReaderThread(GatewaySenderEventRemoteDispatcher.java:802)
>         at 
> org.apache.geode.internal.cache.wan.GatewaySenderEventRemoteDispatcher.stop(GatewaySenderEventRemoteDispatcher.java:826)
>         at 
> org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor.stopProcessing(AbstractGatewaySenderEventProcessor.java:1222)
>         at 
> org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1399)
>         at 
> org.apache.geode.internal.cache.wan.AbstractGatewaySenderEventProcessor$SenderStopperCallable.call(AbstractGatewaySenderEventProcessor.java:1387)
>         at 
> java.util.concurrent.FutureTask.run([email protected]/FutureTask.java:264)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker([email protected]/ThreadPoolExecutor.java:1128)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run([email protected]/ThreadPoolExecutor.java:628)
>         at java.lang.Thread.run([email protected]/Thread.java:829)   Locked 
> ownable synchronizers:
>         - <0x00000000fcf4daa8> (a 
> java.util.concurrent.ThreadPoolExecutor$Worker)
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to