[ https://issues.apache.org/jira/browse/GEODE-6950?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17157007#comment-17157007 ]
Eugene Nedzvetsky edited comment on GEODE-6950 at 7/13/20, 10:06 PM: --------------------------------------------------------------------- org.apache.geode.distributed.internal.PrimaryHandler:85 Current version: {code:java} @Override public Object processRequest(Object request) throws IOException { long giveup = 0; while (giveup == 0 || System.currentTimeMillis() < giveup) { TcpHandler handler; if (request instanceof PeerLocatorRequest) { handler = handlerMapping.get(PeerLocatorRequest.class); } else { handler = handlerMapping.get(request.getClass()); } if (handler != null) { return handler.processRequest(request); } if (locatorListener != null) { return locatorListener.handleRequest(request); } // either there is a configuration problem or the locator is still starting up if (giveup == 0) { int locatorWaitTime = internalLocator.getConfig().getLocatorWaitTime(); if (locatorWaitTime <= 0) { // always retry some number of times locatorWaitTime = 30; } giveup = System.currentTimeMillis() + locatorWaitTime * 1000L; try { Thread.sleep(1000); } catch (InterruptedException ignored) { // running in an executor - no need to set the interrupted flag on the thread return null; } } } logger.info( "Received a location request of class {} but the handler for this is either not enabled or is not ready to process requests", request.getClass().getSimpleName()); return null; } {code} Fix: {code} @Override public Object processRequest(Object request) throws IOException { long giveup = 0; while (giveup == 0 || System.currentTimeMillis() < giveup) { TcpHandler handler; if (request instanceof PeerLocatorRequest) { handler = handlerMapping.get(PeerLocatorRequest.class); } else { handler = handlerMapping.get(request.getClass()); } if (handler != null) { return handler.processRequest(request); } if (locatorListener != null) { return locatorListener.handleRequest(request); } // either there is a configuration problem or the locator is still starting up if (giveup == 0) { int locatorWaitTime = internalLocator.getConfig().getLocatorWaitTime(); if (locatorWaitTime <= 0) { // always retry some number of times locatorWaitTime = 30; } giveup = System.currentTimeMillis() + locatorWaitTime * 1000L; } try { Thread.sleep(1000); } catch (InterruptedException ignored) { // running in an executor - no need to set the interrupted flag on the thread return null; } } logger.info( "Received a location request of class {} but the handler for this is either not enabled or is not ready to process requests", request.getClass().getSimpleName()); return null; } {code} was (Author: eugenex9): Current version: {code:java} @Override public Object processRequest(Object request) throws IOException { long giveup = 0; while (giveup == 0 || System.currentTimeMillis() < giveup) { TcpHandler handler; if (request instanceof PeerLocatorRequest) { handler = handlerMapping.get(PeerLocatorRequest.class); } else { handler = handlerMapping.get(request.getClass()); } if (handler != null) { return handler.processRequest(request); } if (locatorListener != null) { return locatorListener.handleRequest(request); } // either there is a configuration problem or the locator is still starting up if (giveup == 0) { int locatorWaitTime = internalLocator.getConfig().getLocatorWaitTime(); if (locatorWaitTime <= 0) { // always retry some number of times locatorWaitTime = 30; } giveup = System.currentTimeMillis() + locatorWaitTime * 1000L; try { Thread.sleep(1000); } catch (InterruptedException ignored) { // running in an executor - no need to set the interrupted flag on the thread return null; } } } logger.info( "Received a location request of class {} but the handler for this is either not enabled or is not ready to process requests", request.getClass().getSimpleName()); return null; } {code} Fix: {code} @Override public Object processRequest(Object request) throws IOException { long giveup = 0; while (giveup == 0 || System.currentTimeMillis() < giveup) { TcpHandler handler; if (request instanceof PeerLocatorRequest) { handler = handlerMapping.get(PeerLocatorRequest.class); } else { handler = handlerMapping.get(request.getClass()); } if (handler != null) { return handler.processRequest(request); } if (locatorListener != null) { return locatorListener.handleRequest(request); } // either there is a configuration problem or the locator is still starting up if (giveup == 0) { int locatorWaitTime = internalLocator.getConfig().getLocatorWaitTime(); if (locatorWaitTime <= 0) { // always retry some number of times locatorWaitTime = 30; } giveup = System.currentTimeMillis() + locatorWaitTime * 1000L; } try { Thread.sleep(1000); } catch (InterruptedException ignored) { // running in an executor - no need to set the interrupted flag on the thread return null; } } logger.info( "Received a location request of class {} but the handler for this is either not enabled or is not ready to process requests", request.getClass().getSimpleName()); return null; } {code} > Locator can't start if a lot of clients already started > ------------------------------------------------------- > > Key: GEODE-6950 > URL: https://issues.apache.org/jira/browse/GEODE-6950 > Project: Geode > Issue Type: Bug > Components: core > Affects Versions: 1.7.0, 1.8.0, 1.9.0, 1.10.0, 1.11.0, 1.12.0 > Reporter: Eugene Nedzvetsky > Priority: Major > Attachments: 1.log > > > Locator can't start if a few hundred clients already started. > Steps to reproduce: > 1. Start Locator > 2. Start 300 Geode clients > 3. Stop Locator > 4. Start Locator again > Observe 100% CPU load and after some time Locator app crashes with timeout > exceptions in the log. > The problem is in the method > org.apache.geode.distributed.internal.InternalLocator.PrimaryHandler#processRequest > handlerMapping doesn't have handlers for LocatorListRequest and > ClientConnectionRequest requests on Locator startup and in this case work > code part with condition 'if(giveup == 0)'(InternalLocator:1185) > Pause Thread.sleep(1000) works only on the first iteration and after that > giveup>0 and CPU just spends resources on cycle execution without any pauses. > Call Thread.sleep(1000) should be after if(giveup>0) condition block. It > will be called on each iteration in this case. > -- This message was sent by Atlassian Jira (v8.3.4#803005)