[ https://issues.apache.org/jira/browse/GEODE-5676?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16750319#comment-16750319 ]
Dan Smith edited comment on GEODE-5676 at 1/23/19 6:49 PM: ----------------------------------------------------------- Callstacks are working again, and we have a run with stack dumps now: http://files.apachegeode-ci.info/builds/apache-develop-pr/geode-pr-3106/test-artifacts/1548194844/distributedtestfiles-geode-pr-3106.tgz Looking into the stack dumps, it looks like we are stuck waiting for some elder initialization {noformat} at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:848) at org.apache.geode.distributed.internal.locks.ElderInitProcessor.init(ElderInitProcessor.java:69) at org.apache.geode.distributed.internal.locks.ElderState.<init>(ElderState.java:53) at org.apache.geode.distributed.internal.ClusterElderManager.lambda$new$0(ClusterElderManager.java:41) at org.apache.geode.distributed.internal.ClusterElderManager$$Lambda$172/0x00000008402ac440.get(Unknown Source) at org.apache.geode.distributed.internal.ClusterElderManager.initializeElderState(ClusterElderManager.java:107) at org.apache.geode.distributed.internal.ClusterElderManager.getElderState(ClusterElderManager.java:98) at org.apache.geode.distributed.internal.ClusterDistributionManager.getElderState(ClusterDistributionManager.java:2912) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.startElderCall(GrantorRequestProcessor.java:222) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.basicOp(GrantorRequestProcessor.java:329) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.basicOp(GrantorRequestProcessor.java:314) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.peekGrantor(GrantorRequestProcessor.java:276) at org.apache.geode.distributed.internal.locks.DLockService.peekGrantor(DLockService.java:897) at org.apache.geode.distributed.internal.locks.DLockService.notLockGrantorId(DLockService.java:828) at org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1501) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1239) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1230) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1225) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1221) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.lockSharedConfiguration(InternalConfigurationPersistenceService.java:823) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.createConfigurationResponse(InternalConfigurationPersistenceService.java:627) at org.apache.geode.management.internal.configuration.functions.GetClusterConfigurationFunction.execute(GetClusterConfigurationFunction.java:53) at org.apache.geode.internal.cache.MemberFunctionStreamingMessage.process(MemberFunctionStreamingMessage.java:193) at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:367) at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:433) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at org.apache.geode.distributed.internal.ClusterDistributionManager.runUntilShutdown(ClusterDistributionManager.java:956) at org.apache.geode.distributed.internal.ClusterDistributionManager.doFunctionExecutionThread(ClusterDistributionManager.java:810) at org.apache.geode.distributed.internal.ClusterDistributionManager$$Lambda$189/0x00000008402e2040.invoke(Unknown Source) at org.apache.geode.internal.logging.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:121) at org.apache.geode.internal.logging.LoggingThreadFactory$$Lambda$179/0x00000008402a9840.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) {noformat} I don't see any response to this message in the callstacks, and it is not changing. There are some other interesting stack traces that might be related. The same maber that is in ElderInitProcessor.init is also in the middle of doing a reconnect, maybe that messed up the ElderInit process somehow? {noformat} at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.internal.util.concurrent.FutureResult.get(FutureResult.java:69) at org.apache.geode.distributed.internal.locks.DLockService.waitForLockGrantorFutureResult(DLockService.java:748) at org.apache.geode.distributed.internal.locks.DLockService.getLockGrantorId(DLockService.java:297) at org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1443) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1239) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1230) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1225) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1221) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.lockSharedConfiguration(InternalConfigurationPersistenceService.java:823) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.createConfigurationResponse(InternalConfigurationPersistenceService.java:627) at org.apache.geode.management.internal.configuration.functions.GetClusterConfigurationFunction.execute(GetClusterConfigurationFunction.java:53) at org.apache.geode.internal.cache.MemberFunctionStreamingMessage.process(MemberFunctionStreamingMessage.java:193) at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:367) at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:433) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at org.apache.geode.distributed.internal.ClusterDistributionManager.runUntilShutdown(ClusterDistributionManager.java:956) at org.apache.geode.distributed.internal.ClusterDistributionManager.doFunctionExecutionThread(ClusterDistributionManager.java:810) at org.apache.geode.distributed.internal.ClusterDistributionManager$$Lambda$189/0x00000008402e2040.invoke(Unknown Source) at org.apache.geode.internal.logging.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:121) at org.apache.geode.internal.logging.LoggingThreadFactory$$Lambda$179/0x00000008402a9840.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) ----3 instances of this stack at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.internal.cache.execute.FunctionStreamingResultCollector.getResult(FunctionStreamingResultCollector.java:142) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromOneLocator(ClusterConfigurationLoader.java:313) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromLocators(ClusterConfigurationLoader.java:282) at org.apache.geode.internal.cache.GemFireCacheImpl.requestSharedConfiguration(GemFireCacheImpl.java:1074) at org.apache.geode.internal.cache.GemFireCacheImpl.<init>(GemFireCacheImpl.java:859) at org.apache.geode.internal.cache.GemFireCacheImpl.basicCreate(GemFireCacheImpl.java:796) at org.apache.geode.internal.cache.GemFireCacheImpl.create(GemFireCacheImpl.java:775) at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2691) at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2456) at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1214) at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3420) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1552) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2570) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$277/0x000000084049f440.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) ----3 instances of this stack at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.internal.cache.execute.FunctionStreamingResultCollector.getResult(FunctionStreamingResultCollector.java:142) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromOneLocator(ClusterConfigurationLoader.java:313) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromLocators(ClusterConfigurationLoader.java:282) at org.apache.geode.internal.cache.GemFireCacheImpl.requestSharedConfiguration(GemFireCacheImpl.java:1074) at org.apache.geode.internal.cache.GemFireCacheImpl.<init>(GemFireCacheImpl.java:859) at org.apache.geode.internal.cache.GemFireCacheImpl.basicCreate(GemFireCacheImpl.java:796) at org.apache.geode.internal.cache.GemFireCacheImpl.create(GemFireCacheImpl.java:785) at org.apache.geode.cache.CacheFactory.create(CacheFactory.java:176) at org.apache.geode.cache.CacheFactory.create(CacheFactory.java:223) at org.apache.geode.test.junit.rules.ServerStarterRule.startServer(ServerStarterRule.java:174) at org.apache.geode.test.junit.rules.ServerStarterRule.before(ServerStarterRule.java:80) at org.apache.geode.test.dunit.rules.ClusterStartupRule.lambda$startServerVM$729766c4$1(ClusterStartupRule.java:248) at org.apache.geode.test.dunit.rules.ClusterStartupRule$$Lambda$131/0x00000008401c0840.call(Unknown Source) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(java.base@11.0.1/Native Method) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke(java.base@11.0.1/NativeMethodAccessorImpl.java:62) at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(java.base@11.0.1/DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(java.base@11.0.1/Method.java:566) at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123) at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(java.base@11.0.1/Native Method) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke(java.base@11.0.1/NativeMethodAccessorImpl.java:62) at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(java.base@11.0.1/DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(java.base@11.0.1/Method.java:566) at sun.rmi.server.UnicastServerRef.dispatch(java.rmi@11.0.1/UnicastServerRef.java:359) at sun.rmi.transport.Transport$1.run(java.rmi@11.0.1/Transport.java:200) at sun.rmi.transport.Transport$1.run(java.rmi@11.0.1/Transport.java:197) at java.security.AccessController.doPrivileged(java.base@11.0.1/Native Method) at sun.rmi.transport.Transport.serviceCall(java.rmi@11.0.1/Transport.java:196) at sun.rmi.transport.tcp.TCPTransport.handleMessages(java.rmi@11.0.1/TCPTransport.java:562) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(java.rmi@11.0.1/TCPTransport.java:796) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(java.rmi@11.0.1/TCPTransport.java:677) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$126/0x000000084019f440.run(java.rmi@11.0.1/Unknown Source) at java.security.AccessController.doPrivileged(java.base@11.0.1/Native Method) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(java.rmi@11.0.1/TCPTransport.java:676) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) {noformat} was (Author: upthewaterspout): Callstacks are working again, and we have a run with stack dumps now: http://files.apachegeode-ci.info/builds/apache-develop-pr/geode-pr-3106/test-artifacts/1548194844/distributedtestfiles-geode-pr-3106.tgz Looking into the stack dumps, it looks like we are stuck waiting for some elder initialization {noformat} at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:848) at org.apache.geode.distributed.internal.locks.ElderInitProcessor.init(ElderInitProcessor.java:69) at org.apache.geode.distributed.internal.locks.ElderState.<init>(ElderState.java:53) at org.apache.geode.distributed.internal.ClusterElderManager.lambda$new$0(ClusterElderManager.java:41) at org.apache.geode.distributed.internal.ClusterElderManager$$Lambda$172/0x00000008402ac440.get(Unknown Source) at org.apache.geode.distributed.internal.ClusterElderManager.initializeElderState(ClusterElderManager.java:107) at org.apache.geode.distributed.internal.ClusterElderManager.getElderState(ClusterElderManager.java:98) at org.apache.geode.distributed.internal.ClusterDistributionManager.getElderState(ClusterDistributionManager.java:2912) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.startElderCall(GrantorRequestProcessor.java:222) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.basicOp(GrantorRequestProcessor.java:329) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.basicOp(GrantorRequestProcessor.java:314) at org.apache.geode.distributed.internal.locks.GrantorRequestProcessor.peekGrantor(GrantorRequestProcessor.java:276) at org.apache.geode.distributed.internal.locks.DLockService.peekGrantor(DLockService.java:897) at org.apache.geode.distributed.internal.locks.DLockService.notLockGrantorId(DLockService.java:828) at org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1501) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1239) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1230) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1225) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1221) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.lockSharedConfiguration(InternalConfigurationPersistenceService.java:823) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.createConfigurationResponse(InternalConfigurationPersistenceService.java:627) at org.apache.geode.management.internal.configuration.functions.GetClusterConfigurationFunction.execute(GetClusterConfigurationFunction.java:53) at org.apache.geode.internal.cache.MemberFunctionStreamingMessage.process(MemberFunctionStreamingMessage.java:193) at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:367) at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:433) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at org.apache.geode.distributed.internal.ClusterDistributionManager.runUntilShutdown(ClusterDistributionManager.java:956) at org.apache.geode.distributed.internal.ClusterDistributionManager.doFunctionExecutionThread(ClusterDistributionManager.java:810) at org.apache.geode.distributed.internal.ClusterDistributionManager$$Lambda$189/0x00000008402e2040.invoke(Unknown Source) at org.apache.geode.internal.logging.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:121) at org.apache.geode.internal.logging.LoggingThreadFactory$$Lambda$179/0x00000008402a9840.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) {noformat} I don't see any response to this message in the callstacks, and it is not changing. There are some other interesting stack traces that might be related: {noformat} at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.internal.util.concurrent.FutureResult.get(FutureResult.java:69) at org.apache.geode.distributed.internal.locks.DLockService.waitForLockGrantorFutureResult(DLockService.java:748) at org.apache.geode.distributed.internal.locks.DLockService.getLockGrantorId(DLockService.java:297) at org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1443) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1239) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1230) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1225) at org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1221) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.lockSharedConfiguration(InternalConfigurationPersistenceService.java:823) at org.apache.geode.distributed.internal.InternalConfigurationPersistenceService.createConfigurationResponse(InternalConfigurationPersistenceService.java:627) at org.apache.geode.management.internal.configuration.functions.GetClusterConfigurationFunction.execute(GetClusterConfigurationFunction.java:53) at org.apache.geode.internal.cache.MemberFunctionStreamingMessage.process(MemberFunctionStreamingMessage.java:193) at org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:367) at org.apache.geode.distributed.internal.DistributionMessage$1.run(DistributionMessage.java:433) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at org.apache.geode.distributed.internal.ClusterDistributionManager.runUntilShutdown(ClusterDistributionManager.java:956) at org.apache.geode.distributed.internal.ClusterDistributionManager.doFunctionExecutionThread(ClusterDistributionManager.java:810) at org.apache.geode.distributed.internal.ClusterDistributionManager$$Lambda$189/0x00000008402e2040.invoke(Unknown Source) at org.apache.geode.internal.logging.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:121) at org.apache.geode.internal.logging.LoggingThreadFactory$$Lambda$179/0x00000008402a9840.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) ----3 instances of this stack at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.internal.cache.execute.FunctionStreamingResultCollector.getResult(FunctionStreamingResultCollector.java:142) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromOneLocator(ClusterConfigurationLoader.java:313) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromLocators(ClusterConfigurationLoader.java:282) at org.apache.geode.internal.cache.GemFireCacheImpl.requestSharedConfiguration(GemFireCacheImpl.java:1074) at org.apache.geode.internal.cache.GemFireCacheImpl.<init>(GemFireCacheImpl.java:859) at org.apache.geode.internal.cache.GemFireCacheImpl.basicCreate(GemFireCacheImpl.java:796) at org.apache.geode.internal.cache.GemFireCacheImpl.create(GemFireCacheImpl.java:775) at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2691) at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2456) at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1214) at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3420) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1552) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2570) at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$277/0x000000084049f440.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) ----3 instances of this stack at jdk.internal.misc.Unsafe.park(java.base@11.0.1/Native Method) at java.util.concurrent.locks.LockSupport.parkNanos(java.base@11.0.1/LockSupport.java:234) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1079) at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(java.base@11.0.1/AbstractQueuedSynchronizer.java:1369) at java.util.concurrent.CountDownLatch.await(java.base@11.0.1/CountDownLatch.java:278) at org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:61) at org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:714) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:785) at org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:762) at org.apache.geode.internal.cache.execute.FunctionStreamingResultCollector.getResult(FunctionStreamingResultCollector.java:142) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromOneLocator(ClusterConfigurationLoader.java:313) at org.apache.geode.internal.cache.ClusterConfigurationLoader.requestConfigurationFromLocators(ClusterConfigurationLoader.java:282) at org.apache.geode.internal.cache.GemFireCacheImpl.requestSharedConfiguration(GemFireCacheImpl.java:1074) at org.apache.geode.internal.cache.GemFireCacheImpl.<init>(GemFireCacheImpl.java:859) at org.apache.geode.internal.cache.GemFireCacheImpl.basicCreate(GemFireCacheImpl.java:796) at org.apache.geode.internal.cache.GemFireCacheImpl.create(GemFireCacheImpl.java:785) at org.apache.geode.cache.CacheFactory.create(CacheFactory.java:176) at org.apache.geode.cache.CacheFactory.create(CacheFactory.java:223) at org.apache.geode.test.junit.rules.ServerStarterRule.startServer(ServerStarterRule.java:174) at org.apache.geode.test.junit.rules.ServerStarterRule.before(ServerStarterRule.java:80) at org.apache.geode.test.dunit.rules.ClusterStartupRule.lambda$startServerVM$729766c4$1(ClusterStartupRule.java:248) at org.apache.geode.test.dunit.rules.ClusterStartupRule$$Lambda$131/0x00000008401c0840.call(Unknown Source) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(java.base@11.0.1/Native Method) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke(java.base@11.0.1/NativeMethodAccessorImpl.java:62) at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(java.base@11.0.1/DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(java.base@11.0.1/Method.java:566) at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123) at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(java.base@11.0.1/Native Method) at jdk.internal.reflect.NativeMethodAccessorImpl.invoke(java.base@11.0.1/NativeMethodAccessorImpl.java:62) at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(java.base@11.0.1/DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(java.base@11.0.1/Method.java:566) at sun.rmi.server.UnicastServerRef.dispatch(java.rmi@11.0.1/UnicastServerRef.java:359) at sun.rmi.transport.Transport$1.run(java.rmi@11.0.1/Transport.java:200) at sun.rmi.transport.Transport$1.run(java.rmi@11.0.1/Transport.java:197) at java.security.AccessController.doPrivileged(java.base@11.0.1/Native Method) at sun.rmi.transport.Transport.serviceCall(java.rmi@11.0.1/Transport.java:196) at sun.rmi.transport.tcp.TCPTransport.handleMessages(java.rmi@11.0.1/TCPTransport.java:562) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(java.rmi@11.0.1/TCPTransport.java:796) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(java.rmi@11.0.1/TCPTransport.java:677) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$126/0x000000084019f440.run(java.rmi@11.0.1/Unknown Source) at java.security.AccessController.doPrivileged(java.base@11.0.1/Native Method) at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(java.rmi@11.0.1/TCPTransport.java:676) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.1/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.1/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.1/Thread.java:834) {noformat} > ClusterConfigLocatorRestartDUnitTest hung in CI > ----------------------------------------------- > > Key: GEODE-5676 > URL: https://issues.apache.org/jira/browse/GEODE-5676 > Project: Geode > Issue Type: Bug > Components: benchmarks > Reporter: Dan Smith > Assignee: Dan Smith > Priority: Major > Labels: pull-request-available, swat > Attachments: callstacks.txt > > Time Spent: 20m > Remaining Estimate: 0h > > This test hung in a couple of runs of DistributedTest > > https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//430 > > https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//370 > {noformat} > Started @ 2018-08-30 04:23:46.599 +0000 > 2018-08-30 04:48:33.135 +0000 > org.apache.geode.management.internal.configuration.ClusterConfigLocatorRestartDUnitTest > serverRestartsAfterLocatorReconnects > Ended @ 2018-08-30 05:21:34.897 +0000 > {noformat} > It seems to be stuck in tear down > {noformat} > "ReconnectThread" #416 prio=5 os_prio=0 tid=0x00007fa86cad2000 nid=0xd07 in > Object.wait() [0x00007fa744ecd000] > java.lang.Thread.State: TIMED_WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > at > org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2697) > at > org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2558) > - locked <0x00000000e00bedc8> (a java.lang.Object) > - locked <0x00000000e07af498> (a java.lang.Class for > org.apache.geode.internal.cache.GemFireCacheImpl) > - locked <0x00000000e00bedd8> (a java.lang.Class for > org.apache.geode.cache.CacheFactory) > at > org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1041) > at > org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3987) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1552) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$1(GMSMembershipManager.java:2564) > at > org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$81/1816825082.run(Unknown > Source) > at java.lang.Thread.run(Thread.java:748) > Locked ownable synchronizers: > - None > "RMI TCP Connection(8)-172.17.0.13" #32 daemon prio=5 os_prio=0 > tid=0x00007fa874001800 nid=0x2ff waiting for monitor entry > [0x00007fa8f0d15000] > java.lang.Thread.State: BLOCKED (on object monitor) > at > org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1367) > - waiting to lock <0x00000000e07af498> (a java.lang.Class for > org.apache.geode.internal.cache.GemFireCacheImpl) > at > org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1022) > at > org.apache.geode.test.junit.rules.MemberStarterRule.disconnectDSIfAny(MemberStarterRule.java:182) > at > org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:129) > at > org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:385) > at > org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:42) > at > org.apache.geode.test.junit.rules.VMProvider$$Lambda$77/1844235204.run(Unknown > Source) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at hydra.MethExecutor.executeObject(MethExecutor.java:244) > at > org.apache.geode.test.dunit.standalone.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:70) > at sun.reflect.GeneratedMethodAccessor116.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357) > at sun.rmi.transport.Transport$1.run(Transport.java:200) > at sun.rmi.transport.Transport$1.run(Transport.java:197) > at java.security.AccessController.doPrivileged(Native Method) > at sun.rmi.transport.Transport.serviceCall(Transport.java:196) > at > sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573) > at > sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834) > at > sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688) > at > sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$7/137422085.run(Unknown > Source) > at java.security.AccessController.doPrivileged(Native Method) > at > sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Locked ownable synchronizers: > - <0x00000000e0639ed0> (a > java.util.concurrent.ThreadPoolExecutor$Worker) > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)