Author: burn
Date: Mon Oct 12 17:57:38 2015
New Revision: 1708181

URL: http://svn.apache.org/viewvc?rev=1708181&view=rev
Log:
Merge 4578 Agent should kill children

Modified:
    
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
    
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
   (contents, props changed)
    
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java

Modified: 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1708181&r1=1708180&r2=1708181&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
 (original)
+++ 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
 Mon Oct 12 17:57:38 2015
@@ -90,7 +90,9 @@ public class NodeAgent extends AbstractD
   public static DuccLogger logger = DuccLogger.getLogger(NodeAgent.class, 
COMPONENT_NAME);
 
   public static final String ProcessStateUpdatePort = 
"ducc.agent.process.state.update.port";
-
+  public static int SIGKILL=9;
+  public static int SIGTERM=15;
+  
   // Map of known processes this agent is managing. This map is published
   // at regular intervals as part of agent's inventory update.
   private Map<DuccId, IDuccProcess> inventory = new HashMap<DuccId, 
IDuccProcess>();
@@ -272,7 +274,13 @@ public class NodeAgent extends AbstractD
                 if (cgroupsSubsystems == null) {
                   cgroupsSubsystems = "memory,cpu";
                 }
-                cgroupsManager = new CGroupsManager(cgUtilsPath, 
cgroupsBaseDir, cgroupsSubsystems, logger);
+                       long maxTimeToWaitForProcessToStop = 60000; // default 
1 minute
+                       if (configurationFactory.processStopTimeout != null) {
+                               maxTimeToWaitForProcessToStop = Long
+                                               
.valueOf(configurationFactory.processStopTimeout);
+                       }
+
+                cgroupsManager = new CGroupsManager(cgUtilsPath, 
cgroupsBaseDir, cgroupsSubsystems, logger, maxTimeToWaitForProcessToStop);
                 // check if cgroups base directory exists in the filesystem
                 // which means that cgroups
                 // and cgroups convenience package are installed and the
@@ -293,7 +301,7 @@ public class NodeAgent extends AbstractD
                                  useCgroups = true;
                               try {
                                  // remove dummy container
-                                 cgroupsManager.destroyContainer("test");
+                                 
cgroupsManager.destroyContainer("test","duck", SIGKILL);
                               } catch( Exception eee ) {}
                               logger.info("nodeAgent", null, "------- Agent 
Running with CGroups Enabled");
                          } else {

Modified: 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java?rev=1708181&r1=1708180&r2=1708181&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
 (original)
+++ 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
 Mon Oct 12 17:57:38 2015
@@ -31,6 +31,7 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.uima.ducc.agent.NodeAgent;
 import org.apache.uima.ducc.agent.launcher.ManagedProcess;
 import org.apache.uima.ducc.common.utils.DuccLogger;
 import org.apache.uima.ducc.common.utils.Utils;
@@ -52,7 +53,7 @@ public class CGroupsManager {
        private String cgroupUtilsDir=null;
        private String cgroupSubsystems = ""; // comma separated list of 
subsystems
                                                                                
        // eg. memory,cpu
-
+    private long maxTimeToWaitForProcessToStop;
        /**
         * @param args
         */
@@ -60,7 +61,7 @@ public class CGroupsManager {
                try {
 
                        CGroupsManager cgMgr = new 
CGroupsManager("/usr/bin","/cgroup/ducc", "memory",
-                                       null);
+                                       null, 10000);
                        System.out.println("Cgroups Installed:"
                                        + cgMgr.cgroupExists("/cgroup/ducc"));
                        Set<String> containers = 
cgMgr.collectExistingContainers();
@@ -74,7 +75,7 @@ public class CGroupsManager {
                        synchronized (cgMgr) {
                                cgMgr.wait(60000);
                        }
-                       cgMgr.destroyContainer(args[0]);
+                       cgMgr.destroyContainer(args[0], args[2], 
NodeAgent.SIGKILL);
 
                } catch (Exception e) {
                        e.printStackTrace();
@@ -84,11 +85,12 @@ public class CGroupsManager {
                return cgroupUtilsDir;
        }
        public CGroupsManager(String cgroupUtilsDir, String cgroupBaseDir, 
String cgroupSubsystems,
-                       DuccLogger agentLogger) {
+                       DuccLogger agentLogger, long 
maxTimeToWaitForProcessToStop) {
                this.cgroupUtilsDir = cgroupUtilsDir;
                this.cgroupBaseDir = cgroupBaseDir;
                this.cgroupSubsystems = cgroupSubsystems;
                this.agentLogger = agentLogger;
+               this.maxTimeToWaitForProcessToStop = 
maxTimeToWaitForProcessToStop;
        }
        public String[] getPidsInCgroup(String cgroupName) throws Exception {
                File f = new File(cgroupBaseDir + "/" + cgroupName + 
"/cgroup.procs");
@@ -150,7 +152,7 @@ public class CGroupsManager {
                                                                        
zombieCount++;
                                                                } else  if 
(proc.getPid().equals(pid)) {
                                                                        // kill 
process hard via -9
-                                                                       kill( 
proc.getUserid(), proc.getPid());
+                                                                       kill( 
proc.getUserid(), proc.getPid(), NodeAgent.SIGKILL);
                                                                }
                                                        }
                                                }
@@ -185,7 +187,7 @@ public class CGroupsManager {
                                        // Don't remove CGroups if there are 
zombie processes there. Otherwise, attempt
                                        // to remove the CGroup may hang a 
thread.
                                        if ( zombieCount == 0 )  {  // no 
zombies in the container
-                                               destroyContainer(cgroupFolder);
+                                               destroyContainer(cgroupFolder, 
"ducc", NodeAgent.SIGTERM);
                                                
agentLogger.info("cleanupOnStartup", null,
                                                                "--- Agent 
Removed Empty CGroup:" + cgroupFolder);
                                        } else {
@@ -250,7 +252,7 @@ public class CGroupsManager {
            return cgroupPids.toArray(pids);
          }
 
-       public void kill(final String user, final String pid) {
+       public void kill(final String user, final String pid, final int signal) 
{
                final String methodName = "kill";
                InputStream is = null;
                BufferedReader reader = null;
@@ -271,7 +273,7 @@ public class CGroupsManager {
                                        useDuccling = true;
                                }
                                cmdLine = "/bin/kill";
-                               arg = "-9";
+                               arg = "-"+signal;
                        }
                        String[] duccling_nolog;
                        if (useDuccling) {
@@ -294,6 +296,7 @@ public class CGroupsManager {
                                // dont care about the output, just drain the 
buffers
                        }
                        is.close();
+                       killedProcess.waitFor();
                        StringBuffer sb = new StringBuffer();
                        for (String part : duccling_nolog) {
                                sb.append(part).append(" ");
@@ -471,6 +474,24 @@ public class CGroupsManager {
                        return false;
                }
        }
+       private int killChildProcesses(String containerId, String userId, int 
signal) throws Exception {
+               int childCount=0;
+               String[] pids = getPidsInCgroup(containerId);
+               if ( pids != null ) {
+                       if ( pids.length > 0 ) {
+                               childCount = pids.length;
+                               agentLogger.info("killChildProcesses", 
null,"Found "+pids.length+" child processes still in container:"+containerId+" 
- killing all"); 
+                       }
+                       for( String pid : pids ) {
+                               try {
+                                  kill(userId, pid, signal);
+                               } catch(Exception ee) {
+                                       agentLogger.warn("killChildProcesses", 
null, "Unable to kill child process with PID:"+pid+" from 
cgroup:"+containerId+"\n"+ee);
+                               }
+                       }
+               }
+               return childCount;
+       }
        /**
         * Removes cgroup container with a given id. Cgroups are implemented as 
a
         * virtual file system. All is needed here is just rmdir.
@@ -481,9 +502,30 @@ public class CGroupsManager {
         * 
         * @throws Exception
         */
-       public boolean destroyContainer(String containerId) throws Exception {
+       public boolean destroyContainer(String containerId, String userId, int 
signal) throws Exception {
                try {
                        if (cgroupExists(cgroupBaseDir + "/" + containerId)) {
+                               if ( signal == NodeAgent.SIGTERM ) {
+                                       agentLogger.info("destroyContainer", 
null, "Destroying Container "+containerId+" Using signal:"+signal +" to kill 
child processes if any still exist in cgroups container");
+
+                                       // before removing cgroup container, 
make sure to kill 
+                                       // all processes that still may be 
there. User process
+                                       // may have created child processes 
that may still be running.
+                                       // First use kill -15, than wait and 
any process still standing
+                                       // will be killed hard via kill -9
+                                       int childProcessCount = 
+                                                       
killChildProcesses(containerId, userId, NodeAgent.SIGTERM);
+                                       if ( childProcessCount > 0 ) {
+                                               
agentLogger.info("destroyContainer", null, "Killed "+childProcessCount+"Child 
Processes with kill -15");
+                                               try {
+                                                       
this.wait(maxTimeToWaitForProcessToStop);
+                                               } catch( InterruptedException 
ie) {
+                                               }
+                                       }
+                               }
+                               // Any process remaining in a cgroup will be 
killed hard
+                               killChildProcesses(containerId, userId, 
NodeAgent.SIGKILL);
+                               
                                String[] command = new String[] { "/bin/rmdir",
                                                cgroupBaseDir + "/" + 
containerId };
                                int retCode = launchCommand(command, false, 
"ducc", containerId);

Propchange: 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Mon Oct 12 17:57:38 2015
@@ -0,0 +1 @@
+/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java:1698001-1698002,1698354,1698402,1700636,1700804-1700808,1700877,1701002,1702050,1702094,1702097,1702104,1702261-1702262,1704376,1704380,1704423,1706074,1706294,1706485,1706832,1707056,1707156

Modified: 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java?rev=1708181&r1=1708180&r2=1708181&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java
 (original)
+++ 
uima/sandbox/uima-ducc/branches/uima-ducc-2.0.1/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/DuccCommandExecutor.java
 Mon Oct 12 17:57:38 2015
@@ -317,7 +317,7 @@ public class DuccCommandExecutor extends
                        // if the process is marked for death or still 
initializing or it is
                        // JD, kill it
                        if (((ManagedProcess) managedProcess).doKill()
-                                       || ((ManagedProcess) 
managedProcess).getDuccProcess()
+                                                                               
        || ((ManagedProcess) managedProcess).getDuccProcess()
                                                        
.getProcessType().equals(ProcessType.Service)
                                        || ((ManagedProcess) 
managedProcess).getDuccProcess()
                                                        
.getProcessType().equals(ProcessType.Pop)
@@ -681,7 +681,14 @@ public class DuccCommandExecutor extends
                                // destroyed as well.
                                if (agent.useCgroups) {
                                        String containerId = getContainerId();
-                                       
agent.cgroupsManager.destroyContainer(containerId);
+                                       String userId = ((ManagedProcess) 
super.managedProcess)
+                                       .getOwner();
+                                       // before destroying the container the 
code checks if there
+                                       // are processes still running in it. 
This could be true if
+                                       // user code launched child processes. 
If there are child
+                                       // processes still running, the code 
kills each one at a 
+                                       // time and at the end the container is 
removed.
+                                       
agent.cgroupsManager.destroyContainer(containerId, userId, NodeAgent.SIGTERM);
                                        logger.info(methodName, null,
                                                        "Removed CGroup 
Container with ID:" + containerId);
                                }


Reply via email to