Author: cwiklik Date: Fri Oct 9 17:32:49 2015 New Revision: 1707776 URL: http://svn.apache.org/viewvc?rev=1707776&view=rev Log: UIMA-4637 added a test on agent startup to test CGroups cpu control.
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1707776&r1=1707775&r2=1707776&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java Fri Oct 9 17:32:49 2015 @@ -37,6 +37,7 @@ import java.util.Set; import java.util.TreeMap; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicLong; import org.apache.camel.CamelContext; import org.apache.camel.Processor; @@ -93,6 +94,9 @@ public class NodeAgent extends AbstractD public static int SIGKILL=9; public static int SIGTERM=15; + //for LinuxNodeMetrics logging + public static AtomicLong logCounter = new AtomicLong(); + public static String cgroupFailureReason; // Map of known processes this agent is managing. This map is published // at regular intervals as part of agent's inventory update. private Map<DuccId, IDuccProcess> inventory = new HashMap<DuccId, IDuccProcess>(); @@ -234,6 +238,8 @@ public class NodeAgent extends AbstractD logger.info("nodeAgent", null, "------- Node Explicitly Excluded From Using CGroups. Check File:" + exclusionFile); + cgroupFailureReason = "------- Node Explicitly Excluded From Using CGroups. Check File:" + + exclusionFile; } System.out.println("excludeNodeFromCGroups=" + excludeNodeFromCGroups + " excludeAPs=" + excludeAPs); @@ -295,32 +301,38 @@ public class NodeAgent extends AbstractD } try { - // Test cgroups by creating a dummy container - if ( cgroupsManager.createContainer("test", "duck", false) ) { - if (cgroupsManager.cgroupExists(cgroupsBaseDir + "/" + "test")) { - useCgroups = true; - try { - // remove dummy container - cgroupsManager.destroyContainer("test","duck", SIGKILL); - } catch( Exception eee ) {} - logger.info("nodeAgent", null, "------- Agent Running with CGroups Enabled"); - } else { - useCgroups = false; - logger.warn("nodeAgent", null, "------- CGroups cgcreate failed to create a cgroup - disabling cgroups"); - } - } - } catch( Exception ee) { + String containerId = "test"; + String uid = "ducc"; + // validate cgroups by creating a dummy cgroup. The code checks if cgroup actually got created by + // verifying existence of test cgroup file. The second step in verification is to check if + // CPU control is working. Configured in cgconfig.conf, the CPU control allows for setting + // cpu.shares. The code will attempt to set the shares and subsequently tries to read the + // value from cpu.shares file to make sure the values match. Any exception in the above steps + // will cause cgroups to be disabled. + // + cgroupsManager.validator(cgroupsBaseDir, containerId, uid,false) + .cgcreate() + .cgset(100); // write cpu.shares=100 and validate + + // cleanup dummy cgroup + cgroupsManager.destroyContainer(containerId, uid, SIGKILL); + useCgroups = true; + } catch( CGroupsManager.CGroupsException ee) { + logger.info("nodeAgent", null, ee); + cgroupFailureReason = ee.getMessage(); useCgroups = false; } } else { logger.info("nodeAgent", null, "------- CGroups Not Installed on this Machine"); + cgroupFailureReason = "------- CGroups Not Installed on this Machine"; } } } } } else { logger.info("nodeAgent", null, "------- CGroups Not Enabled on this Machine"); + cgroupFailureReason = "------- CGroups Not Enabled on this Machine - check ducc.properties: ducc.agent.launcher.cgroups.enable "; } logger.info("nodeAgent", null, "CGroup Support=" + useCgroups + " excludeNodeFromCGroups=" + excludeNodeFromCGroups + " excludeAPs=" + excludeAPs+" CGroups utils Dir:"+cgUtilsPath); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java?rev=1707776&r1=1707775&r2=1707776&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java Fri Oct 9 17:32:49 2015 @@ -20,6 +20,7 @@ package org.apache.uima.ducc.agent.launc import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.InputStream; import java.io.InputStreamReader; @@ -47,7 +48,18 @@ import org.apache.uima.ducc.transport.ev */ public class CGroupsManager { private DuccLogger agentLogger = null; - + enum CGroupCommand { + CGSET("cgset"), + CGCREATE("cgcreate"); + + String cmd; + CGroupCommand(String cmd ) { + this.cmd = cmd; + } + public String cmd() { + return cmd; + } + }; private Set<String> containerIds = new LinkedHashSet<String>(); private String cgroupBaseDir = ""; private String cgroupUtilsDir=null; @@ -92,6 +104,9 @@ public class CGroupsManager { this.agentLogger = agentLogger; this.maxTimeToWaitForProcessToStop = maxTimeToWaitForProcessToStop; } + public Validator validator( String cgroupsBaseDir,String containerId, String uid, boolean useDuccling) { + return new Validator(this, cgroupsBaseDir, containerId, uid, useDuccling); + } public String[] getPidsInCgroup(String cgroupName) throws Exception { File f = new File(cgroupBaseDir + "/" + cgroupName + "/cgroup.procs"); // collect all pids @@ -463,7 +478,7 @@ public class CGroupsManager { return true; } else { agentLogger.info("setContainerCpuShares", null, ">>>>" - + "FAILURE - Unable To Create CGroup Container:" + + "FAILURE - Unable To Set CPU shares on CGroup Container:" + containerId); return false; } @@ -744,4 +759,106 @@ public class CGroupsManager { } } + public class CGroupsException extends RuntimeException { + private static final long serialVersionUID = 1L; + private String command; + private String msg; + + public CGroupsException() { + } + public CGroupsException(Exception e) { + super(e); + } + public CGroupsException addCommand(String command) { + this.command = command; + return this; + } + public CGroupsException addMessage(String msg) { + this.msg = msg; + return this; + } + public String getCommand() { + return command; + } + public String getMessage() { + return msg; + } + + } + public class Validator { + private CGroupsManager cgmgr=null; + String containerId; + String uid; + boolean useDuccling; + String cgroupsBaseDir; + + + + Validator(CGroupsManager instance, String cgroupsBaseDir,String containerId, String uid, boolean useDuccling) { + cgmgr = instance; + this.containerId = containerId; + this.uid = uid; + this.useDuccling = useDuccling; + this.cgroupsBaseDir = cgroupsBaseDir; + } + public Validator cgcreate() throws CGroupsException { + String msg1 = "------- CGroups cgcreate failed to create a cgroup - disabling cgroups"; + String msg2 = "------- CGroups cgcreate failed to validate a cgroup - disabling cgroups"; + String msg3 = "------- CGroups cgcreate failed - disabling cgroups"; + try { + if ( !cgmgr.createContainer(containerId, uid, useDuccling) ) { + throw new CGroupsException().addCommand(CGroupCommand.CGCREATE.cmd()) + .addMessage(msg1); + } + if (!cgmgr.cgroupExists(cgroupsBaseDir + "/" + containerId)) { + throw new CGroupsException().addCommand(CGroupCommand.CGCREATE.cmd()) + .addMessage(msg2); + } + } catch( Exception e) { + throw new CGroupsException(e).addCommand(CGroupCommand.CGCREATE.cmd()) + .addMessage(msg3); + } + return this; + } + public Validator cgset( long cpuShares) throws CGroupsException { + String msg1 = "------- Check cgconfig.conf CPU control. The cgset failed to set cpu.shares"; + String msg2 = "------- Check cgconfig.conf CPU control. The cgset failed to find cpu.shares file"; + String msg3 = "------- Check cgconfig.conf CPU control. The cgset failed to write to cpu.shares file. Expected 100 shares found "; + + BufferedReader reader = null; + String shares = ""; + try { + if (!cgmgr.setContainerCpuShares(containerId, uid, useDuccling, cpuShares) ) { + throw new CGroupsException().addCommand(CGroupCommand.CGSET.cmd()) + .addMessage(msg1); + } + // now try to read created file + File f = new File(cgroupsBaseDir + "/" + "test/cpu.shares"); + reader = new BufferedReader(new FileReader(f)); + // read 1st line. It should be equal to cpuShares + shares = reader.readLine().trim(); + System.out.println("----- Cgroup cgset verifier - cpu.shares read from file:"+shares); + if ( !String.valueOf(cpuShares).equals(shares)) { + throw new CGroupsException().addCommand(CGroupCommand.CGSET.cmd()) + .addMessage(msg3+shares); + } + } catch( FileNotFoundException e ) { + //e.printStackTrace(); + throw new CGroupsException(e).addCommand(CGroupCommand.CGSET.cmd()) + .addMessage(msg2); + } catch(Exception e) { + //e.printStackTrace(); + throw new CGroupsException(e).addCommand(CGroupCommand.CGSET.cmd()) + .addMessage(msg3+shares); + + } finally { + if ( reader != null ) { + try { + reader.close(); + } catch( Exception ee) {} + } + } + return this; + } + } } Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java?rev=1707776&r1=1707775&r2=1707776&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java Fri Oct 9 17:32:49 2015 @@ -24,6 +24,7 @@ import java.util.TreeMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; import org.apache.camel.Exchange; import org.apache.uima.ducc.agent.Agent; @@ -46,7 +47,7 @@ public class LinuxNodeMetricsProcessor e NodeMetricsProcessor { DuccLogger logger = DuccLogger.getLogger(this.getClass(), Agent.COMPONENT_NAME); public static String[] MeminfoTargetFields = new String[] {"MemTotal:","MemFree:","SwapTotal:","SwapFree:"}; - + private NodeAgent agent; private final ExecutorService pool; private RandomAccessFile memInfoFile; @@ -94,7 +95,16 @@ public class LinuxNodeMetricsProcessor e public void process(Exchange e) { String methodName = "process"; try { - + // every 10th node metrics publication log the status of CGroups + if ( ( NodeAgent.logCounter.incrementAndGet() % 10 ) == 0 ) { + if ( agent.useCgroups ) { + logger.info(methodName, null, "\t****\n\t**** Agent CGroups status: enabled"); + + } else { + logger.info(methodName, null, "\t****\n\t**** Agent CGroups status: disabled. Reason:"+NodeAgent.cgroupFailureReason); + + } + } NodeMemInfoCollector memCollector = new NodeMemInfoCollector(MeminfoTargetFields); Future<NodeMemory> nmiFuture = pool.submit(memCollector);