Author: burn Date: Mon Oct 12 17:57:38 2015 New Revision: 1708181 URL: http://svn.apache.org/viewvc?rev=1708181&view=rev Log: Merge 4578 Agent should kill children
Modified: uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java (contents, props changed) uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java Modified: uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1708181&r1=1708180&r2=1708181&view=diff ============================================================================== --- uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java (original) +++ uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java Mon Oct 12 17:57:38 2015 @@ -90,7 +90,9 @@ public class NodeAgent extends AbstractD public static DuccLogger logger = DuccLogger.getLogger(NodeAgent.class, COMPONENT_NAME); public static final String ProcessStateUpdatePort = "ducc.agent.process.state.update.port"; - + public static int SIGKILL=9; + public static int SIGTERM=15; + // Map of known processes this agent is managing. This map is published // at regular intervals as part of agent's inventory update. private Map<DuccId, IDuccProcess> inventory = new HashMap<DuccId, IDuccProcess>(); @@ -272,7 +274,13 @@ public class NodeAgent extends AbstractD if (cgroupsSubsystems == null) { cgroupsSubsystems = "memory,cpu"; } - cgroupsManager = new CGroupsManager(cgUtilsPath, cgroupsBaseDir, cgroupsSubsystems, logger); + long maxTimeToWaitForProcessToStop = 60000; // default 1 minute + if (configurationFactory.processStopTimeout != null) { + maxTimeToWaitForProcessToStop = Long + .valueOf(configurationFactory.processStopTimeout); + } + + cgroupsManager = new CGroupsManager(cgUtilsPath, cgroupsBaseDir, cgroupsSubsystems, logger, maxTimeToWaitForProcessToStop); // check if cgroups base directory exists in the filesystem // which means that cgroups // and cgroups convenience package are installed and the @@ -293,7 +301,7 @@ public class NodeAgent extends AbstractD useCgroups = true; try { // remove dummy container - cgroupsManager.destroyContainer("test"); + cgroupsManager.destroyContainer("test","duck", SIGKILL); } catch( Exception eee ) {} logger.info("nodeAgent", null, "------- Agent Running with CGroups Enabled"); } else { Modified: uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java?rev=1708181&r1=1708180&r2=1708181&view=diff ============================================================================== --- uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java (original) +++ uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java Mon Oct 12 17:57:38 2015 @@ -31,6 +31,7 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.uima.ducc.agent.NodeAgent; import org.apache.uima.ducc.agent.launcher.ManagedProcess; import org.apache.uima.ducc.common.utils.DuccLogger; import org.apache.uima.ducc.common.utils.Utils; @@ -52,7 +53,7 @@ public class CGroupsManager { private String cgroupUtilsDir=null; private String cgroupSubsystems = ""; // comma separated list of subsystems // eg. memory,cpu - + private long maxTimeToWaitForProcessToStop; /** * @param args */ @@ -60,7 +61,7 @@ public class CGroupsManager { try { CGroupsManager cgMgr = new CGroupsManager("/usr/bin","/cgroup/ducc", "memory", - null); + null, 10000); System.out.println("Cgroups Installed:" + cgMgr.cgroupExists("/cgroup/ducc")); Set<String> containers = cgMgr.collectExistingContainers(); @@ -74,7 +75,7 @@ public class CGroupsManager { synchronized (cgMgr) { cgMgr.wait(60000); } - cgMgr.destroyContainer(args[0]); + cgMgr.destroyContainer(args[0], args[2], NodeAgent.SIGKILL); } catch (Exception e) { e.printStackTrace(); @@ -84,11 +85,12 @@ public class CGroupsManager { return cgroupUtilsDir; } public CGroupsManager(String cgroupUtilsDir, String cgroupBaseDir, String cgroupSubsystems, - DuccLogger agentLogger) { + DuccLogger agentLogger, long maxTimeToWaitForProcessToStop) { this.cgroupUtilsDir = cgroupUtilsDir; this.cgroupBaseDir = cgroupBaseDir; this.cgroupSubsystems = cgroupSubsystems; this.agentLogger = agentLogger; + this.maxTimeToWaitForProcessToStop = maxTimeToWaitForProcessToStop; } public String[] getPidsInCgroup(String cgroupName) throws Exception { File f = new File(cgroupBaseDir + "/" + cgroupName + "/cgroup.procs"); @@ -150,7 +152,7 @@ public class CGroupsManager { zombieCount++; } else if (proc.getPid().equals(pid)) { // kill process hard via -9 - kill( proc.getUserid(), proc.getPid()); + kill( proc.getUserid(), proc.getPid(), NodeAgent.SIGKILL); } } } @@ -185,7 +187,7 @@ public class CGroupsManager { // Don't remove CGroups if there are zombie processes there. Otherwise, attempt // to remove the CGroup may hang a thread. if ( zombieCount == 0 ) { // no zombies in the container - destroyContainer(cgroupFolder); + destroyContainer(cgroupFolder, "ducc", NodeAgent.SIGTERM); agentLogger.info("cleanupOnStartup", null, "--- Agent Removed Empty CGroup:" + cgroupFolder); } else { @@ -250,7 +252,7 @@ public class CGroupsManager { return cgroupPids.toArray(pids); } - public void kill(final String user, final String pid) { + public void kill(final String user, final String pid, final int signal) { final String methodName = "kill"; InputStream is = null; BufferedReader reader = null; @@ -271,7 +273,7 @@ public class CGroupsManager { useDuccling = true; } cmdLine = "/bin/kill"; - arg = "-9"; + arg = "-"+signal; } String[] duccling_nolog; if (useDuccling) { @@ -294,6 +296,7 @@ public class CGroupsManager { // dont care about the output, just drain the buffers } is.close(); + killedProcess.waitFor(); StringBuffer sb = new StringBuffer(); for (String part : duccling_nolog) { sb.append(part).append(" "); @@ -471,6 +474,24 @@ public class CGroupsManager { return false; } } + private int killChildProcesses(String containerId, String userId, int signal) throws Exception { + int childCount=0; + String[] pids = getPidsInCgroup(containerId); + if ( pids != null ) { + if ( pids.length > 0 ) { + childCount = pids.length; + agentLogger.info("killChildProcesses", null,"Found "+pids.length+" child processes still in container:"+containerId+" - killing all"); + } + for( String pid : pids ) { + try { + kill(userId, pid, signal); + } catch(Exception ee) { + agentLogger.warn("killChildProcesses", null, "Unable to kill child process with PID:"+pid+" from cgroup:"+containerId+"\n"+ee); + } + } + } + return childCount; + } /** * Removes cgroup container with a given id. Cgroups are implemented as a * virtual file system. All is needed here is just rmdir. @@ -481,9 +502,30 @@ public class CGroupsManager { * * @throws Exception */ - public boolean destroyContainer(String containerId) throws Exception { + public boolean destroyContainer(String containerId, String userId, int signal) throws Exception { try { if (cgroupExists(cgroupBaseDir + "/" + containerId)) { + if ( signal == NodeAgent.SIGTERM ) { + agentLogger.info("destroyContainer", null, "Destroying Container "+containerId+" Using signal:"+signal +" to kill child processes if any still exist in cgroups container"); + + // before removing cgroup container, make sure to kill + // all processes that still may be there. User process + // may have created child processes that may still be running. + // First use kill -15, than wait and any process still standing + // will be killed hard via kill -9 + int childProcessCount = + killChildProcesses(containerId, userId, NodeAgent.SIGTERM); + if ( childProcessCount > 0 ) { + agentLogger.info("destroyContainer", null, "Killed "+childProcessCount+"Child Processes with kill -15"); + try { + this.wait(maxTimeToWaitForProcessToStop); + } catch( InterruptedException ie) { + } + } + } + // Any process remaining in a cgroup will be killed hard + killChildProcesses(containerId, userId, NodeAgent.SIGKILL); + String[] command = new String[] { "/bin/rmdir", cgroupBaseDir + "/" + containerId }; int retCode = launchCommand(command, false, "ducc", containerId); Propchange: uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java ------------------------------------------------------------------------------ --- svn:mergeinfo (added) +++ svn:mergeinfo Mon Oct 12 17:57:38 2015 @@ -0,0 +1 @@ +/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java:1698001-1698002,1698354,1698402,1700636,1700804-1700808,1700877,1701002,1702050,1702094,1702097,1702104,1702261-1702262,1704376,1704380,1704423,1706074,1706294,1706485,1706832,1707056,1707156 Modified: uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java?rev=1708181&r1=1708180&r2=1708181&view=diff ============================================================================== --- uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java (original) +++ uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java Mon Oct 12 17:57:38 2015 @@ -317,7 +317,7 @@ public class DuccCommandExecutor extends // if the process is marked for death or still initializing or it is // JD, kill it if (((ManagedProcess) managedProcess).doKill() - || ((ManagedProcess) managedProcess).getDuccProcess() + || ((ManagedProcess) managedProcess).getDuccProcess() .getProcessType().equals(ProcessType.Service) || ((ManagedProcess) managedProcess).getDuccProcess() .getProcessType().equals(ProcessType.Pop) @@ -681,7 +681,14 @@ public class DuccCommandExecutor extends // destroyed as well. if (agent.useCgroups) { String containerId = getContainerId(); - agent.cgroupsManager.destroyContainer(containerId); + String userId = ((ManagedProcess) super.managedProcess) + .getOwner(); + // before destroying the container the code checks if there + // are processes still running in it. This could be true if + // user code launched child processes. If there are child + // processes still running, the code kills each one at a + // time and at the end the container is removed. + agent.cgroupsManager.destroyContainer(containerId, userId, NodeAgent.SIGTERM); logger.info(methodName, null, "Removed CGroup Container with ID:" + containerId); }