Author: challngr Date: Mon Aug 24 19:25:05 2015 New Revision: 1697485 URL: http://svn.apache.org/r1697485 Log: UIMA-4569 RM should report allocations in GB instead of shares.
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/IRmJob.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ISchedulerMain.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ResourceClass.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/RmJob.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/User.java uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/RmStateDuccEvent.java uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/IRmJobState.java uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/RmJobState.java uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/JobFragment.java uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/Markup.java uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/VisualizedHost.java Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java Mon Aug 24 19:25:05 2015 @@ -124,7 +124,6 @@ public class JobManagerConverter { String methodName = "blacklistJob"; - int order = scheduler.calcShareOrder(memory); Map<DuccId, IResource> all_shares = null; Map<DuccId, IResource> shrunken_shares = null; Map<DuccId, IResource> expanded_shares = null; @@ -137,16 +136,18 @@ public class JobManagerConverter // first time - everything must go IDuccProcessMap pm = ((IDuccWorkExecutable)job).getProcessMap(); + int quantum = 0; for ( IDuccProcess proc : pm.values() ) { // build up Shares from the incoming state NodeIdentity ni = proc.getNodeIdentity(); Machine m = scheduler.getMachine(ni); int share_order = 1; + quantum = m.getQuantum(); if ( m != null ) { if ( proc.isActive() || (proc.getProcessState() == ProcessState.Undefined) ) { logger.info(methodName, job.getDuccId(), "blacklist", proc.getDuccId(), "state", proc.getProcessState(), "isActive", proc.isActive(), "isComplete", proc.isComplete()); - m.blacklist(job.getDuccId(), proc.getDuccId(), order); + m.blacklist(job.getDuccId(), proc.getDuccId(), memory); if ( evict ) { share_order = m.getShareOrder(); // best guess Resource r = new Resource(proc.getDuccId(), proc.getNode(), false, share_order, 0); @@ -162,7 +163,7 @@ public class JobManagerConverter } if ( evict && (shrunken_shares.size() > 0) ) { - RmJobState rjs = new RmJobState(job.getDuccId(), all_shares, shrunken_shares, expanded_shares); + RmJobState rjs = new RmJobState(job.getDuccId(), quantum, all_shares, shrunken_shares, expanded_shares); rjs.setDuccType(job.getDuccType()); blacklistedResources.put(job.getDuccId(), rjs); // to tell OR } @@ -1157,7 +1158,7 @@ public class JobManagerConverter } } - RmJobState rjs = new RmJobState(j.getId(), all_shares, shrunken_shares, expanded_shares); + RmJobState rjs = new RmJobState(j.getId(), (j.getShareQuantum() >> 20) * (j.getShareOrder()), all_shares, shrunken_shares, expanded_shares); rjs.setDuccType(j.getDuccType()); rjs.setReason(j.getReason()); rmJobState.put(j.getId(), rjs); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/IRmJob.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/IRmJob.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/IRmJob.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/IRmJob.java Mon Aug 24 19:25:05 2015 @@ -49,6 +49,11 @@ public interface IRmJob public void setJobName(String name); /** + * Asks it's resource class what the share quantum is that it is being scheduled under. + */ + public int getShareQuantum(); + + /** * Save ref to the class we are in, and init class-based structures. */ public void setResourceClass(ResourceClass cl); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ISchedulerMain.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ISchedulerMain.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ISchedulerMain.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ISchedulerMain.java Mon Aug 24 19:25:05 2015 @@ -62,7 +62,7 @@ public interface ISchedulerMain IRmJob getJob(DuccId id); Share getShare(DuccId id); Machine getMachine(NodeIdentity id); - int calcShareOrder(long mem); // UIMA-4142 expose to JobManagerConverter + int calcShareOrder(IRmJob j); // UIMA-4142 expose to JobManagerConverter void queryMachines(); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java Mon Aug 24 19:25:05 2015 @@ -39,7 +39,7 @@ public class Machine private int hbcounter = 0; // heartbeat counter private long memory; // in Kb - private int share_order = 0; + private int share_order = 1; private NodePool nodepool; @@ -94,13 +94,26 @@ public class Machine return node; } + /** + * Return the quantum for this machine - the actual amount allocated in a single share on this machine. + */ + public int getQuantum() + { + return (int) memory / share_order; + } + // UIMA-4142 // Black list some number of shres for a specific job and proc. This reduces the number of // schedulable shares until they are whitelisted. - public synchronized void blacklist(DuccId jobid, DuccId procid, int nshares) + //public synchronized void blacklist(DuccId jobid, DuccId procid, int nshares) + public synchronized void blacklist(DuccId jobid, DuccId procid, long jobmem) { String methodName = "blacklist"; - + + int q = getQuantum(); + int nshares = (int) jobmem / q; + if ( jobmem % q > 0 ) nshares++; + if ( ! blacklistedWork.containsKey(procid) ) { // already condemned? if ( nshares == -1 ) nshares = share_order; // whole machine - reservations Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java Mon Aug 24 19:25:05 2015 @@ -1,3 +1,4 @@ + /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +20,7 @@ package org.apache.uima.ducc.rm.scheduler; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -42,6 +44,7 @@ class NodePool int depth; int updated = 0; int search_order = 100; + int share_quantum = 0; EvictionPolicy evictionPolicy = EvictionPolicy.SHRINK_BY_MACHINE; @@ -92,7 +95,7 @@ class NodePool // Indexed by available free shares, the specific machines with the indicated free space HashMap<Integer, Map<Node, Machine>> virtualMachinesByOrder = new HashMap<Integer, Map<Node, Machine>>(); // UIMA-4142 - static int maxorder = 0; + GlobalOrder maxorder = null; // NodePool(NodePool parent, String id, EvictionPolicy ep, int order) // { @@ -103,7 +106,7 @@ class NodePool // this.order = order; // } - NodePool(NodePool parent, String id, Map<String, String> nodes, EvictionPolicy ep, int depth, int search_order) + NodePool(NodePool parent, String id, Map<String, String> nodes, EvictionPolicy ep, int depth, int search_order, int share_quantum) { String methodName = "NodePool.<init>"; this.parent = parent; @@ -116,6 +119,13 @@ class NodePool this.evictionPolicy = ep; this.depth = depth; this.search_order = search_order; + this.share_quantum = share_quantum; + + if ( parent == null ) { + maxorder = new GlobalOrder(); + } else { + maxorder = parent.getGlobalOrder(); + } } void addResourceClass(ResourceClass cl) @@ -133,6 +143,11 @@ class NodePool return id; } + int getShareQuantum() + { + return share_quantum; + } + int getDepth() { return depth; @@ -481,22 +496,33 @@ class NodePool return cln; } - public static int getMaxOrder() + public GlobalOrder getGlobalOrder() { - return maxorder; // must always be the same for parent and all children + return maxorder; } - public static int[] makeArray() // common static code because getting this right everywhere is painful + public void updateMaxOrder(int order) { - return new int[getArraySize()]; + maxorder.update(order); + } + + public int getMaxOrder() + { + return maxorder.getOrder(); // must always be the same for parent and all children } - public static int getArraySize() + public int getArraySize() { - return getMaxOrder() + 1; // a bit bigger, because we're 1-indexed for easier counting - // same for parent and children + return getMaxOrder() + 1; // a bit bigger, because we're 1-indexed for easier counting + // same for parent and children } + public int[] makeArray() // common static code because getting this right everywhere is painful + { + return new int[getArraySize()]; + } + + int getSearchOrder() { return this.search_order; @@ -637,7 +663,7 @@ class NodePool // init nSharesByorder to the sum of 'n and 'v MachinesByOrder System.arraycopy(nMachinesByOrder, 0, nSharesByOrder, 0, len); - for ( int i = 0; i < maxorder + 1; i++ ) { + for ( int i = 0; i < getMaxOrder() + 1; i++ ) { nSharesByOrder[i] += vMachinesByOrder[i]; } @@ -715,6 +741,11 @@ class NodePool String methodName = "rearrangeVirtual"; if ( allMachines.containsKey(m.key()) ) { int v_order = m.getVirtualShareOrder(); + + if ( v_order < 0 ) { + int stop_here = 1; + stop_here++; + } int r_order = m.getShareOrder(); logger.trace(methodName, null, m.getId(), "order", order, "v_order", v_order, "r_order", r_order); @@ -783,14 +814,16 @@ class NodePool // reset code so it matches the cycles better. otoh, this isn't a performance-intensive // scheduler so do we care? // - maxorder = Math.max(order, maxorder); - nSharesByOrder = new int[maxorder + 1]; - nMachinesByOrder = new int[maxorder + 1]; - vMachinesByOrder = new int[maxorder + 1]; + updateMaxOrder(order); + logger.info(methodName, null, "Nodepool:", id, "Maxorder set to", getMaxOrder()); + + nSharesByOrder = makeArray(); + nMachinesByOrder = makeArray(); + vMachinesByOrder = makeArray(); //nFreeSharesByOrder = new int[maxorder + 1]; //neededByOrder = new int[maxorder + 1]; - nPendingByOrder = new int[maxorder + 1]; + nPendingByOrder = makeArray(); // UIMA-4142 Must set vMachinesByOrder and virtualMachinesByOrder independently of // machinesByOrder because blacklisting can cause v_order != r_order @@ -922,7 +955,7 @@ class NodePool */ NodePool createSubpool(String className, Map<String, String> names, int order) { - NodePool np = new NodePool(this, className, names, evictionPolicy, depth + 1, order); + NodePool np = new NodePool(this, className, names, evictionPolicy, depth + 1, order, share_quantum); children.put(className, np); return np; } @@ -965,8 +998,7 @@ class NodePool { String methodName = "nodeArrives"; - maxorder = Math.max(order, maxorder); - + updateMaxOrder(order); for ( NodePool np : children.values() ) { if ( np.containsPoolNode(node) ) { @@ -1211,7 +1243,7 @@ class NodePool { int order = j.getShareOrder(); int ret = 0; - for ( int i = order; i < maxorder; i++ ) { + for ( int i = order; i < getMaxOrder(); i++ ) { if ( machinesByOrder.containsKey(order) ) { ret += machinesByOrder.get(order).size(); } @@ -1304,7 +1336,7 @@ class NodePool int rem = 0; int low = order; - while ( (given < nrequested ) && ( low <= maxorder ) ) { + while ( (given < nrequested ) && ( low <= getMaxOrder() ) ) { int avail = vMachinesByOrder[low] + nMachinesByOrder[low]; if ( avail > 0 ) { @@ -1507,126 +1539,126 @@ class NodePool return null; // found nothing, heck } - // private void doEvictions(int[] neededByOrder, HashMap<Integer, HashMap<IRmJob, IRmJob>> candidates, boolean force) - // { + private void doEvictions(int[] neededByOrder, HashMap<Integer, HashMap<IRmJob, IRmJob>> candidates, boolean force) + { + + for ( int nbo = getMaxOrder(); nbo > 0; nbo-- ) { + + if ( neededByOrder[nbo] == 0 ) { // these are N-shares + continue; + } + for ( int oo = getMaxOrder(); oo > 0; oo-- ) { + HashMap<IRmJob, IRmJob> jobs = candidates.get(oo); + if ( jobs == null ) { + continue; + } + + Iterator<IRmJob> iter = jobs.values().iterator(); // he has something to give. is it enough? + while ( iter.hasNext() && (neededByOrder[nbo] > 0) ) { + IRmJob j = iter.next(); + int loss = 0; + + switch ( evictionPolicy ) { + case SHRINK_BY_MACHINE: + // minimize fragmentation + loss = j.shrinkByOrderByMachine(neededByOrder[nbo], nbo, force, this); // pass in number of N-shares of given order that we want + // returns number of quantum shares it had to relinquish + break; + case SHRINK_BY_INVESTMENT: + // minimize lost work + loss = j.shrinkByInvestment(neededByOrder[nbo], nbo, force, this); // pass in number of N-shares of given order that we want + // returns number of quantum shares it had to relinquish + break; + } - // for ( int nbo = maxorder; nbo > 0; nbo-- ) { + neededByOrder[nbo] -= loss; + neededByOrder[0] -= loss; + nPendingByOrder[oo] += loss; - // if ( neededByOrder[nbo] == 0 ) { // these are N-shares - // continue; - // } - // for ( int oo = maxorder; oo > 0; oo-- ) { - // HashMap<IRmJob, IRmJob> jobs = candidates.get(oo); - // if ( jobs == null ) { - // continue; - // } - - // Iterator<IRmJob> iter = jobs.values().iterator(); // he has something to give. is it enough? - // while ( iter.hasNext() && (neededByOrder[nbo] > 0) ) { - // IRmJob j = iter.next(); - // int loss = 0; - - // switch ( evictionPolicy ) { - // case SHRINK_BY_MACHINE: - // // minimize fragmentation - // loss = j.shrinkByOrderByMachine(neededByOrder[nbo], nbo, force, this); // pass in number of N-shares of given order that we want - // // returns number of quantum shares it had to relinquish - // break; - // case SHRINK_BY_INVESTMENT: - // // minimize lost work - // loss = j.shrinkByInvestment(neededByOrder[nbo], nbo, force, this); // pass in number of N-shares of given order that we want - // // returns number of quantum shares it had to relinquish - // break; - // } - - // neededByOrder[nbo] -= loss; - // neededByOrder[0] -= loss; - // nPendingByOrder[oo] += loss; - - // if ( j.countNShares() == 0 ) { // nothing left? don't look here any more - // iter.remove(); - // } - // } - - // } - // } - // } - - // /** - // * Here we tell the NP how much we need cleared up. It will look around and try to do that. - // * @deprecated No longer used, the doEvictions code in NodepoolScheduler handles evictions by itself. - // * Keeping this for a while for reference. UIMA-4275 - // */ - // void doEvictionsByMachine(int [] neededByOrder, boolean force) - // { - // String methodName = "doEvictions"; - // // - // // Collect losers that are also squatters, by order, and try them first - // // - // String type; - // type = force ? "forced" : "natural"; - - // logger.debug(methodName, null, getId(), "NeededByOrder", type, "on entrance eviction", Arrays.toString(neededByOrder)); - - // for ( NodePool np : getChildrenDescending() ) { - // logger.info(methodName, null, "Recurse to", np.getId(), "from", getId(), "force:", force); - // np.doEvictionsByMachine(neededByOrder, force); - // logger.info(methodName, null, "Recurse from", np.getId(), "proceed with logic for", getId(), "force", force); - // } - - // // - // // Adjust neededByOrder to reflect the number of shares that need to be preempted by subtracting the - // // number of shares that already are free - // // - // for ( int nbo = maxorder; nbo > 0; nbo-- ) { - // // UIMA-4065 - I think that subtracting countPendingSharesByOrder() amounts to double counting because it - // // will reflect any evictions from the depth-first recursion. Instead, we would subtract only - // // our own shares. - // // - // // int needed = Math.max(0, neededByOrder[nbo] - countNSharesByOrder(nbo) - countPendingSharesByOrder(nbo)); - // int needed = Math.max(0, neededByOrder[nbo] - countNSharesByOrder(nbo) - nPendingByOrder[nbo]); - // neededByOrder[nbo] = needed; - // neededByOrder[0] += needed; - // } - - // logger.debug(methodName, null, getId(), "NeededByOrder", type, "after adjustments for pending eviction:", Arrays.toString(neededByOrder)); - - // HashMap<Integer, HashMap<IRmJob, IRmJob>> squatters = new HashMap<Integer, HashMap<IRmJob, IRmJob>>(); - // HashMap<Integer, HashMap<IRmJob, IRmJob>> residents = new HashMap<Integer, HashMap<IRmJob, IRmJob>>(); - - // for ( Share s : allShares.values() ) { - // HashMap<Integer, HashMap<IRmJob, IRmJob>> map = null; - // boolean is_candidate = force ? s.isForceable() : s.isPreemptable(); - // if ( is_candidate ) { - // IRmJob j = s.getJob(); - // ResourceClass rc = j.getResourceClass(); - // if ( rc.getNodepoolName().equals(id) ) { - // map = residents; - // } else { - // map = squatters; - // } - - // int order = j.getShareOrder(); - // HashMap<IRmJob, IRmJob> jmap = null; - // if ( map.containsKey(order) ) { - // jmap = map.get(order); - // } else { - // jmap = new HashMap<IRmJob, IRmJob>(); - // map.put(order, jmap); - // } - // jmap.put(j, j); - // } - // } - - // doEvictions(neededByOrder, squatters, force); - // logger.debug(methodName, null, getId(), "NeededByOrder", type, "after eviction of squatters:", Arrays.toString(neededByOrder)); - // if ( neededByOrder[0] <= 0 ) { - // return; - // } - - // doEvictions(neededByOrder, residents, force); - // logger.debug(methodName, null, getId(), "NeededByOrder", type, "after eviction of residents:", Arrays.toString(neededByOrder)); - // } + if ( j.countNShares() == 0 ) { // nothing left? don't look here any more + iter.remove(); + } + } + + } + } + } + + /** + * Here we tell the NP how much we need cleared up. It will look around and try to do that. + * @deprecated No longer used, the doEvictions code in NodepoolScheduler handles evictions by itself. + * Keeping this for a while for reference. UIMA-4275 + */ + void doEvictionsByMachine(int [] neededByOrder, boolean force) + { + String methodName = "doEvictions"; + // + // Collect losers that are also squatters, by order, and try them first + // + String type; + type = force ? "forced" : "natural"; + + logger.debug(methodName, null, getId(), "NeededByOrder", type, "on entrance eviction", Arrays.toString(neededByOrder)); + + for ( NodePool np : getChildrenDescending() ) { + logger.info(methodName, null, "Recurse to", np.getId(), "from", getId(), "force:", force); + np.doEvictionsByMachine(neededByOrder, force); + logger.info(methodName, null, "Recurse from", np.getId(), "proceed with logic for", getId(), "force", force); + } + + // + // Adjust neededByOrder to reflect the number of shares that need to be preempted by subtracting the + // number of shares that already are free + // + for ( int nbo = getMaxOrder(); nbo > 0; nbo-- ) { + // UIMA-4065 - I think that subtracting countPendingSharesByOrder() amounts to double counting because it + // will reflect any evictions from the depth-first recursion. Instead, we would subtract only + // our own shares. + // + // int needed = Math.max(0, neededByOrder[nbo] - countNSharesByOrder(nbo) - countPendingSharesByOrder(nbo)); + int needed = Math.max(0, neededByOrder[nbo] - countNSharesByOrder(nbo) - nPendingByOrder[nbo]); + neededByOrder[nbo] = needed; + neededByOrder[0] += needed; + } + + logger.debug(methodName, null, getId(), "NeededByOrder", type, "after adjustments for pending eviction:", Arrays.toString(neededByOrder)); + + HashMap<Integer, HashMap<IRmJob, IRmJob>> squatters = new HashMap<Integer, HashMap<IRmJob, IRmJob>>(); + HashMap<Integer, HashMap<IRmJob, IRmJob>> residents = new HashMap<Integer, HashMap<IRmJob, IRmJob>>(); + + for ( Share s : allShares.values() ) { + HashMap<Integer, HashMap<IRmJob, IRmJob>> map = null; + boolean is_candidate = force ? s.isForceable() : s.isPreemptable(); + if ( is_candidate ) { + IRmJob j = s.getJob(); + ResourceClass rc = j.getResourceClass(); + if ( rc.getNodepoolName().equals(id) ) { + map = residents; + } else { + map = squatters; + } + + int order = j.getShareOrder(); + HashMap<IRmJob, IRmJob> jmap = null; + if ( map.containsKey(order) ) { + jmap = map.get(order); + } else { + jmap = new HashMap<IRmJob, IRmJob>(); + map.put(order, jmap); + } + jmap.put(j, j); + } + } + + doEvictions(neededByOrder, squatters, force); + logger.debug(methodName, null, getId(), "NeededByOrder", type, "after eviction of squatters:", Arrays.toString(neededByOrder)); + if ( neededByOrder[0] <= 0 ) { + return; + } + + doEvictions(neededByOrder, residents, force); + logger.debug(methodName, null, getId(), "NeededByOrder", type, "after eviction of residents:", Arrays.toString(neededByOrder)); + } int findShares( IRmJob j ) @@ -1938,4 +1970,28 @@ class NodePool } } + class GlobalOrder + { + int maxorder = 0; + + GlobalOrder() + { + this.maxorder = 0; + } + + synchronized void reset() + { + this.maxorder = 0; + } + + synchronized void update(int order) + { + this.maxorder = Math.max(maxorder, order); + } + + synchronized int getOrder() + { + return maxorder; + } + } } Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodepoolScheduler.java Mon Aug 24 19:25:05 2015 @@ -106,15 +106,18 @@ public class NodepoolScheduler } fragmentationThreshold = SystemPropertyResolver.getIntProperty("ducc.rm.fragmentation.threshold", fragmentationThreshold); - scheduling_quantum = SystemPropertyResolver.getIntProperty("ducc.rm.share.quantum", scheduling_quantum); do_defragmentation = SystemPropertyResolver.getBooleanProperty("ducc.rm.defragmentation", do_defragmentation); use_global_allotment = SystemPropertyResolver.getBooleanProperty("ducc.rm.use_global_allotment", use_global_allotment); global_allotment = SystemPropertyResolver.getIntProperty("ducc.rm.global_allotment", global_allotment); } + /** + * Sets the top-level np for this scheduler + */ public void setNodePool(NodePool np) { this.globalNodepool = np; + this.scheduling_quantum = np.getShareQuantum() >> 20; // to GB from KB } public void setEvictionPolicy(EvictionPolicy ep) @@ -385,8 +388,8 @@ public class NodepoolScheduler { String methodName = "apportion_qshares"; boolean shares_given = false; - int maxorder = NodePool.getMaxOrder(); - int[] nshares = NodePool.makeArray(); // nshares + int maxorder = globalNodepool.getMaxOrder(); + int[] nshares = globalNodepool.makeArray(); // nshares if ( entities.size() == 0 ) return; @@ -408,12 +411,12 @@ public class NodepoolScheduler StringBuffer enames = null; StringBuffer eweights = null; logger.info(methodName, null, descr, "RmCounter Start"); - logger.info(methodName, null, descr, "maxorder = ", NodePool.getMaxOrder()); + logger.info(methodName, null, descr, "maxorder = ", globalNodepool.getMaxOrder()); enames = new StringBuffer(); eweights = new StringBuffer(); for ( IEntity e : working ) { - int[] gbo = NodePool.makeArray(); + int[] gbo = globalNodepool.makeArray(); e.setGivenByOrder(gbo); given_by_order.put(e, gbo); @@ -1334,7 +1337,7 @@ public class NodepoolScheduler // The job passes; give the job a count // logger.info(methodName, j.getId(), "+++++ nodepool", np.getId(), "class", rc.getName(), "order", order, "shares", nSharesToString(granted, order)); - int[] gbo = NodePool.makeArray(); + int[] gbo = globalNodepool.makeArray(); gbo[order] = granted; // what we get j.setGivenByOrder(gbo); @@ -1359,7 +1362,7 @@ public class NodepoolScheduler // already accounted for as well, since it is a non-preemptable share logger.info(methodName, j.getId(), "[stable]", "assigned", j.countNShares(), "processes, ", (j.countNShares() * j.getShareOrder()), "QS"); - int[] gbo = NodePool.makeArray(); + int[] gbo = globalNodepool.makeArray(); gbo[j.getShareOrder()] = 1; // must set the allocation so eviction works right j.setGivenByOrder(gbo); @@ -1393,7 +1396,7 @@ public class NodepoolScheduler // The job passes. Assign it a count and get on with life ... // logger.info(methodName, j.getId(), "+++++ nodepool", np.getId(), "class", rc.getName(), "order", order, "shares", nSharesToString(1, order)); - int[] gbo = NodePool.makeArray(); + int[] gbo = globalNodepool.makeArray(); gbo[order] = 1; j.setGivenByOrder(gbo); @@ -1542,7 +1545,7 @@ public class NodepoolScheduler // The job passes; give the job a count // logger.info(methodName, j.getId(), "Request is granted a machine for reservation."); - int[] gbo = NodePool.makeArray(); + int[] gbo =globalNodepool.makeArray(); int order = j.getShareOrder(); // memory, coverted to order, so we can find stuff gbo[order] = freeable + j.countNShares(); // account for new stuff plus what it already has j.setGivenByOrder(gbo); @@ -1560,7 +1563,7 @@ public class NodepoolScheduler logger.info(methodName, j.getId(), "[stable]", "assigned", j.countNShares(), "processes, ", (j.countNShares() * j.getShareOrder()), "QS"); - int[] gbo = NodePool.makeArray(); + int[] gbo =globalNodepool.makeArray(); gbo[j.getShareOrder()] = 1; // UIMA4275 - only one j.setGivenByOrder(gbo); @@ -1585,7 +1588,7 @@ public class NodepoolScheduler } logger.info(methodName, j.getId(), "Request is granted a machine for reservation."); - int[] gbo = NodePool.makeArray(); + int[] gbo =globalNodepool.makeArray(); int order = j.getShareOrder(); // memory, coverted to order, so we can find stuff gbo[order] = 1; j.setGivenByOrder(gbo); @@ -1722,7 +1725,7 @@ public class NodepoolScheduler logger.info(methodName, null, "Return from", np.getId(), "proceeding with logic for", nodepool.getId()); } - int neededByOrder[] = NodePool.makeArray(); // for each order, how many N-shares do I want to add? + int neededByOrder[] =globalNodepool.makeArray(); // for each order, how many N-shares do I want to add? // int total_needed = 0; Map<IRmJob, Integer> overages = new HashMap<IRmJob, Integer>(); // UIMA-4275 @@ -2298,8 +2301,8 @@ public class NodepoolScheduler NodePool np = allPools[npi]; String id = np.getId(); - int[] vmach = NodePool.makeArray(); - int[] nmach = NodePool.makeArray(); + int[] vmach =globalNodepool.makeArray(); + int[] nmach =globalNodepool.makeArray(); Map<IRmJob, Integer> jobmap = new HashMap<IRmJob, Integer>(); vshares.put(id, vmach); nshares.put(id, nmach); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ResourceClass.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ResourceClass.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ResourceClass.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/ResourceClass.java Mon Aug 24 19:25:05 2015 @@ -84,7 +84,7 @@ public class ResourceClass private static Comparator<IEntity> apportionmentSorter = new ApportionmentSorterCl(); - public ResourceClass(DuccProperties props, long share_quantum) + public ResourceClass(DuccProperties props) { // // We can assume everything useful is here because the parser insured it @@ -92,7 +92,7 @@ public class ResourceClass this.id = props.getStringProperty("name"); this.policy = Policy.valueOf(props.getStringProperty("policy")); this.priority = props.getIntProperty("priority"); - this.share_quantum = (int) (share_quantum / ( 1024 * 1024 )); // KB back to GB + // (Note: the share quantum is set when the nodepool is set because it isn't known quite yet in the constructor.) String userset = props.getProperty("users"); if ( userset != null ) { @@ -111,7 +111,9 @@ public class ResourceClass this.enforce_memory = props.getBooleanProperty("enforce", true); } - // For now, R 2.0.0 not configurable, and not cappable. Hope to revive in future release. + // This is not used any more - we keep it for back-level compatibility, and because + // we may revive it in the future. It will therefore be referenced, but by making it + // Integer.MAX_VALUE it is essentially a no-op. this.fair_share_cap = Integer.MAX_VALUE; // UIMA-4275 if ( this.policy == Policy.FAIR_SHARE ) { @@ -153,9 +155,31 @@ public class ResourceClass return authorizedUsers.containsKey(user); } + /** + * Ask my nodepool to make an array of the right size for the caller. + */ + int[] makeArray() + { + return nodepool.makeArray(); + } + + /** + * Ask my nodepool what the largest order of job I supoprt is. + */ + int getMaxOrder() + { + return nodepool.getMaxOrder(); + } + + public int getShareQuantum() + { + return share_quantum; + } + public void setNodepool(NodePool np) { this.nodepool = np; + this.share_quantum = np.getShareQuantum(); } public NodePool getNodepool() @@ -336,6 +360,7 @@ public class ResourceClass // UIMA-4275 public boolean fairShareCapExceeded(IRmJob j) { + // fair-share caps are deprecated, insure this never returns true return false; // if ( policy != Policy.FAIR_SHARE ) return false; @@ -407,7 +432,7 @@ public class ResourceClass if ( given_by_order == null ) return; // nothing given, nothing to adjust - for ( int o = NodePool.getMaxOrder(); o > 0; o-- ) { + for ( int o = nodepool.getMaxOrder(); o > 0; o-- ) { np.countOutNSharesByOrder(o, given_by_order[o]); } } @@ -505,8 +530,8 @@ public class ResourceClass public void initWantedByOrder(ResourceClass unused) { - int ord = NodePool.getMaxOrder(); - wanted_by_order = NodePool.makeArray(); + int ord = nodepool.getMaxOrder(); + wanted_by_order = nodepool.makeArray(); for ( int o = ord; o > 0; o-- ) { wanted_by_order[o] = countNSharesWanted(o); wanted_by_order[0] += wanted_by_order[o]; Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/RmJob.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/RmJob.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/RmJob.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/RmJob.java Mon Aug 24 19:25:05 2015 @@ -321,7 +321,7 @@ public class RmJob public void initWantedByOrder(ResourceClass unused) { - wanted_by_order = NodePool.makeArray(); + wanted_by_order = unused.makeArray(); wanted_by_order[share_order] = getJobCap(); wanted_by_order[0] = wanted_by_order[share_order]; } @@ -1228,6 +1228,11 @@ public class RmJob this.threads = th; } + public int getShareQuantum() + { + return resource_class.getShareQuantum(); + } + public int getMemory() { return memory; } Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java Mon Aug 24 19:25:05 2015 @@ -115,9 +115,6 @@ public class Scheduler String schedImplName; IScheduler[] schedulers; - long share_quantum = 15; // 15 GB - smallest share size - converted to KB during init because - // hosts report in KB - long share_free_dram = 0; // 0 GB in KB - minim memory after shares are allocated long dramOverride = 0; // if > 0, use this instead of amount reported by agents (modeling and testing) @@ -174,7 +171,6 @@ public class Scheduler evictionPolicy = EvictionPolicy.valueOf(ep); // nodepool = new NodePool(null, evictionPolicy, 0); // global nodepool - share_quantum = SystemPropertyResolver.getLongProperty("ducc.rm.share.quantum", share_quantum) * 1024 * 1024; // GB -> KB share_free_dram = SystemPropertyResolver.getLongProperty("ducc.rm.reserved.dram", share_free_dram) * 1024 * 1024; // GB -> KB ducc_home = SystemPropertyResolver.getStringProperty("DUCC_HOME"); @@ -227,7 +223,6 @@ public class Scheduler // scheduler.setClasses(resourceClasses); // scheduler.setNodePool(nodepools[0]); - logger.info(methodName, null, "Scheduler running with share quantum : ", (share_quantum / (1024*1024)), " GB"); logger.info(methodName, null, " reserved DRAM : ", (share_free_dram / (1024*1024)), " GB"); logger.info(methodName, null, " DRAM override : ", (dramOverride / (1024*1024)), " GB"); logger.info(methodName, null, " scheduler : ", schedImplName); @@ -427,10 +422,11 @@ public class Scheduler /** * Calculate share order, given some memory size in GB (as in from a job spec) */ - public int calcShareOrder(long mem) + public int calcShareOrder(IRmJob j) { // Calculate its share order - mem = mem * 1024 * 1024; // to GB + long mem = j.getMemory() << 20 ; // to KB from GB + int share_quantum = j.getShareQuantum(); int share_order = (int) (mem / share_quantum); // liberal calc, round UP if ( (mem % share_quantum) > 0 ) { @@ -439,6 +435,7 @@ public class Scheduler return share_order; } + /** * Collect all the classes served by the indicated nodepool (property set). This fills * in the 'ret' map from the parameter 'dp' and recursive calls to the children in dp. @@ -548,7 +545,7 @@ public class Scheduler logger.info(methodName, null, ResourceClass.getHeader()); logger.info(methodName, null, ResourceClass.getDashes()); for ( DuccProperties props : cls.values() ) { - ResourceClass rc = new ResourceClass(props, share_quantum); + ResourceClass rc = new ResourceClass(props); resourceClasses.put(rc, rc); resourceClassesByName.put(rc.getName(), rc); logger.info(methodName, null, rc.toString()); @@ -590,7 +587,8 @@ public class Scheduler @SuppressWarnings("unchecked") Map<String, String> nodes = (Map<String, String>) np.get("nodes"); int search_order = np.getIntProperty("search-order", 100); - nodepools[i] = new NodePool(null, id, nodes, evictionPolicy, 0, search_order); + int q = np.getIntProperty("share-quantum", 15) << 20 ; // to kB which is how the nodes report in + nodepools[i] = new NodePool(null, id, nodes, evictionPolicy, 0, search_order, q); schedulers[i].setNodePool(nodepools[i]); // set its top-level nodepool mapNodesToNodepool(nodes, nodepools[i]); @@ -614,6 +612,7 @@ public class Scheduler } schedulers[i].setClasses(classesForNp); + } // Here create or update Users with constraints from the registry @@ -973,7 +972,7 @@ public class Scheduler j.setUser(u); // Calculate its share order - int share_order = calcShareOrder(j.getMemory()); + int share_order = calcShareOrder(j); j.setShareOrder(share_order); // Assign it to its priority class @@ -1114,7 +1113,7 @@ public class Scheduler if ( dramOverride > 0 ) { allocatable_mem = dramOverride; } - share_order = (int) (allocatable_mem / share_quantum); // conservative - rounds down (this will always cast ok) + share_order = (int) (allocatable_mem / np.getShareQuantum()); // conservative - rounds down (this will always cast ok) } else { share_order = m.getShareOrder(); } @@ -1229,8 +1228,8 @@ public class Scheduler ret.setAllMachines(np.countAllLocalMachines()); - int[] onlineMachines = NodePool.makeArray(); - int[] freeMachines = NodePool.makeArray(); + int[] onlineMachines = np.makeArray(); + int[] freeMachines = np.makeArray(); for ( int i = 1; i < freeMachines.length; i++ ) { freeMachines[i] += np.countFreeMachines(i); // (these are local, as we want) } @@ -1272,20 +1271,22 @@ public class Scheduler break; } - int[] demanded = NodePool.makeArray(); - int[] awarded = NodePool.makeArray(); + // TODO MUST FIX THIS - HashMap<IRmJob, IRmJob> jobs = cl.getAllJobs(); - for ( IRmJob j : jobs.values() ) { - int o = j.getShareOrder(); - demanded[o] += j.queryDemand(); - awarded[o] += j.countNShares(); - } + // int[] demanded = NodePool.makeArray(); + // int[] awarded = NodePool.makeArray(); + + // HashMap<IRmJob, IRmJob> jobs = cl.getAllJobs(); + // for ( IRmJob j : jobs.values() ) { + // int o = j.getShareOrder(); + // demanded[o] += j.queryDemand(); + // awarded[o] += j.countNShares(); + // } - qcl.setName(cl.getName()); - qcl.setDemanded(demanded); - qcl.setAwarded(awarded); - reply.addClass(qcl); + // qcl.setName(cl.getName()); + // qcl.setDemanded(demanded); + // qcl.setAwarded(awarded); + // reply.addClass(qcl); } } @@ -1306,8 +1307,6 @@ public class Scheduler return ret; } - ret.setShareQuantum(share_quantum); - calculateLoad(ret); ArrayList<NodePool> allpools = new ArrayList<NodePool>(); @@ -1522,7 +1521,7 @@ public class Scheduler public void resetNodepools() { for ( NodePool np : nodepools ) { - np.reset(NodePool.getMaxOrder()); + np.reset(np.getMaxOrder()); } } @@ -1555,10 +1554,12 @@ public class Scheduler { String methodName = "processRecovery"; - int share_order = calcShareOrder(j.getMemory()); ResourceClass rc = resourceClassesByName.get(j.getClassName()); - j.setShareOrder(share_order); j.setResourceClass(rc); + + int share_order = calcShareOrder(j); + j.setShareOrder(share_order); + HashMap<Share, Share> shares = j.getRecoveredShares(); List<Share> sharesToShrink = new ArrayList<Share>(); // UIMA-4142 StringBuffer sharenames = new StringBuffer(); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java Mon Aug 24 19:25:05 2015 @@ -168,6 +168,7 @@ public class Share return machine; } + long getHostMemory() { if ( machine != null ) return machine.getMemory(); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/User.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/User.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/User.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/User.java Mon Aug 24 19:25:05 2015 @@ -172,8 +172,8 @@ public class User public void initWantedByOrder(ResourceClass rc) { - wanted_by_order = NodePool.makeArray(); - for ( int o = NodePool.getMaxOrder(); o > 0; o-- ) { + wanted_by_order = rc.makeArray(); + for ( int o = rc.getMaxOrder(); o > 0; o-- ) { wanted_by_order[o] = countNSharesWanted(o, rc); wanted_by_order[0] += wanted_by_order[o]; } Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/RmStateDuccEvent.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/RmStateDuccEvent.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/RmStateDuccEvent.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/RmStateDuccEvent.java Mon Aug 24 19:25:05 2015 @@ -105,9 +105,9 @@ public class RmStateDuccEvent Map<DuccId, IResource> existing = j.getResources(); if ( existing == null ) { - buf.append(String.format("%s %s %s\n\tExisting[0]", j.getDuccType(), j.getId().getFriendly(), reason)); + buf.append(String.format("%s %s %dGB %s\n\tExisting[0]", j.getDuccType(), j.getId().getFriendly(), j.memoryGbPerProcess(), reason)); } else { - buf.append(String.format("%s %s %s\n\tExisting[%d]: ", j.getDuccType(), j.getId().getFriendly(), reason, existing.size())); + buf.append(String.format("%s %s %dGB %s\n\tExisting[%d]: ", j.getDuccType(), j.getId().getFriendly(), j.memoryGbPerProcess(), reason, existing.size())); for ( IResource r : existing.values() ) { buf.append(r.toString()); buf.append(" "); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/IRmJobState.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/IRmJobState.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/IRmJobState.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/IRmJobState.java Mon Aug 24 19:25:05 2015 @@ -77,6 +77,11 @@ public interface IRmJobState extends Ser boolean isRefused(); /** + * Actual memory allocated per process. + */ + public int memoryGbPerProcess(); + + /** * If isRefused() is true, this string contains a message explaining why. */ String getReason(); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java Mon Aug 24 19:25:05 2015 @@ -29,7 +29,7 @@ public class Resource implements IResour private Node node; // Node id, assigned by Agent private boolean purged; // Purged, for node failure private int qShares; // Number of quantum shares this resource occupies - private transient long itime; // initialization time, for the toString, but not to be transmitted + private transient long itime; // initialization time, for the toString, but not to be transmitted // dissallow @SuppressWarnings("unused") private Resource() @@ -42,7 +42,7 @@ public class Resource implements IResour this.node = node; this.purged = purged; this.qShares = qShares; - this.itime = itime; + this.itime = itime; } public DuccId getId() Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/RmJobState.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/RmJobState.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/RmJobState.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/RmJobState.java Mon Aug 24 19:25:05 2015 @@ -28,7 +28,9 @@ public class RmJobState implements IRmJo private static final long serialVersionUID = 1L; private DuccId duccId; // this job's DuccId as assigned by OR - private DuccType ducc_type; // for messages :( + private DuccType ducc_type; // for messages + private int memoryPerProcess; // in gb, actual memory allocated, usually > memory requesed + // because of rounding to nearst multiple of the quantum // for all maps: // key: DuccId of a share, assigned by RM @@ -47,11 +49,13 @@ public class RmJobState implements IRmJo } public RmJobState(DuccId duccId, + int memoryPerProcess, Map<DuccId, IResource> resources, Map<DuccId, IResource> removals, Map<DuccId, IResource> additions) { this.duccId = duccId; + this.memoryPerProcess = memoryPerProcess; this.resources = resources; this.pendingRemovals = removals; this.pendingAdditions = additions; @@ -60,6 +64,7 @@ public class RmJobState implements IRmJo public RmJobState(DuccId duccId, String refusalReason) { this.duccId = duccId; + this.memoryPerProcess = 0; this.refused = true; this.reason = refusalReason; } @@ -113,4 +118,16 @@ public class RmJobState implements IRmJo { this.ducc_type = dt; } + + // in GB + public int memoryGbPerProcess() + { + return memoryPerProcess; + } + + public int setMemorPerProcess(int m) + { + return memoryPerProcess; + } + } Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/JobFragment.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/JobFragment.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/JobFragment.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/JobFragment.java Mon Aug 24 19:25:05 2015 @@ -33,19 +33,21 @@ class JobFragment int qshares; // total qshares represented by the node String service_endpoint; // for services only, the defined endpoint we we can link back to services page int mem; // Actual memory requested + int quantum; // The scheduling quantum used for this job String color; // color to draw this DuccType type; // Job, Service, Reservation, Pop. String textColor = "white"; String fillColor = "black"; - JobFragment(String user, DuccType type, String id, int mem, int qshares, String service_endpoint) + JobFragment(String user, DuccType type, String id, int mem, int qshares, int quantum, String service_endpoint) { this.user = user; this.type = type; this.id = id; this.qshares = qshares; this.mem = mem; + this.quantum = quantum; this.nprocesses = 1; this.service_endpoint = service_endpoint; setColors(); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/Markup.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/Markup.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/Markup.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/Markup.java Mon Aug 24 19:25:05 2015 @@ -217,7 +217,7 @@ public class Markup out.append("<title>"); out.append(j.qshares); out.append(" unused shares ("); - out.append((j.qshares * NodeViz.quantum)); + out.append((j.qshares * j.quantum)); out.append("GB) on "); out.append(h.name); out.append("("); Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java Mon Aug 24 19:25:05 2015 @@ -19,6 +19,8 @@ package org.apache.uima.ducc.ws.server.nodeviz; +import java.io.FileNotFoundException; +import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; @@ -26,8 +28,11 @@ import java.util.Map; import java.util.concurrent.ConcurrentSkipListMap; import org.apache.uima.ducc.common.Node; +import org.apache.uima.ducc.common.NodeConfiguration; import org.apache.uima.ducc.common.utils.DuccLogger; import org.apache.uima.ducc.common.utils.DuccLoggerComponents; +import org.apache.uima.ducc.common.utils.DuccPropertiesResolver; +import org.apache.uima.ducc.common.utils.IllegalConfigurationException; import org.apache.uima.ducc.common.utils.SystemPropertyResolver; import org.apache.uima.ducc.common.utils.Version; import org.apache.uima.ducc.transport.event.OrchestratorStateDuccEvent; @@ -61,8 +66,8 @@ public class NodeViz private String visualization; // cached visualization private long update_interval = 60000; // Only gen a new viz every 'this long' + static int default_quantum = 4; // for hosts with no jobs so we don't know or care that much private String version = "1.1.0"; - static int quantum = 4; static String wshost = ""; static String wsport = "42133"; static boolean strip_domain = true; @@ -72,17 +77,16 @@ public class NodeViz String methodName = "NodeViz"; update_interval = SystemPropertyResolver.getLongProperty("ducc.viz.update.interval", update_interval); - quantum = SystemPropertyResolver.getIntProperty("ducc.rm.share.quantum", quantum); - + default_quantum = SystemPropertyResolver.getIntProperty("ducc.rm.share.quantum", default_quantum); wshost = SystemPropertyResolver.getStringProperty("ducc.ws.node", System.getProperty("ducc.head")); wsport = SystemPropertyResolver.getStringProperty("ducc.ws.port", wsport); - strip_domain = SystemPropertyResolver.getBooleanProperty("ducc.ws.visualization.strip.domain", true); + strip_domain = SystemPropertyResolver.getBooleanProperty("ducc.ws.visualization.strip.domain", true); logger.info(methodName, null, "------------------------------------------------------------------------------------"); logger.info(methodName, null, "Node Visualization starting:"); logger.info(methodName, null, " DUCC home : ", System.getProperty("DUCC_HOME")); logger.info(methodName, null, " ActiveMQ URL : ", System.getProperty("ducc.broker.url")); - logger.info(methodName, null, "Using Share Quantum : ", quantum); + logger.info(methodName, null, "Default Quantum : ", default_quantum); logger.info(methodName, null, "Viz update Interval : ", update_interval); logger.info(methodName, null, "Web Server Host : ", wshost); logger.info(methodName, null, "Web Server Port : ", wsport); @@ -125,6 +129,33 @@ public class NodeViz int pop_shares = 0; int reservation_shares = 0; + int job_gb = 0; + int service_gb = 0; + int pop_gb = 0; + int reservation_gb = 0; + + // Must find nost configuration so we can work out the quantum used to schedule each job + String class_definitions = SystemPropertyResolver + .getStringProperty(DuccPropertiesResolver + .ducc_rm_class_definitions, "scheduler.classes"); + String user_registry = SystemPropertyResolver + .getStringProperty(DuccPropertiesResolver + .ducc_rm_user_registry, "ducc.users"); + class_definitions = System.getProperty("DUCC_HOME") + "/resources/" + class_definitions; + NodeConfiguration nc = new NodeConfiguration(class_definitions, null, user_registry, logger); // UIMA-4142 make the config global + try { + nc.readConfiguration(); + } catch (FileNotFoundException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } catch (IllegalConfigurationException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + // first step, generate the viz from the OR map which seems to have everything we need // next stop, walk the machines list and generate empty node for any machine in that list // that had no work on it @@ -157,10 +188,20 @@ public class NodeViz IDuccStandardInfo si = w.getStandardInfo(); IDuccSchedulingInfo sti = w.getSchedulingInfo(); - String user = si.getUser(); - String duccid = service_id == null ? Long.toString(w.getDuccId().getFriendly()) : service_id; // UIMA-4209 - int jobmem = Integer.parseInt(sti.getShareMemorySize()); - int qshares = jobmem / quantum; + String user = si.getUser(); + String duccid = service_id == null ? Long.toString(w.getDuccId().getFriendly()) : service_id; // UIMA-4209 + int jobmem = Integer.parseInt(sti.getShareMemorySize()); + + String sclass = sti.getSchedulingClass(); + int quantum = default_quantum; + try { + quantum = nc.getShareQuantum(sclass); + } catch ( Exception e ) { + // this most likely caused by a reconfigure so that a job's class no longer exists. nothing to do about it + // but punt and try not to crash. + logger.warn(methodName, null, "Cannot find scheduling class or quantum for " + sclass + ". Using default quantum of " + default_quantum); + } + int qshares = jobmem / quantum; if ( jobmem % quantum != 0 ) qshares++; switch ( type ) { @@ -185,12 +226,15 @@ public class NodeViz switch ( type ) { case Job: job_shares += qshares; + job_gb += jobmem; break; case Pop: pop_shares += qshares; + pop_gb += jobmem; break; case Service: service_shares += qshares; + service_gb += jobmem; break; } @@ -216,6 +260,7 @@ public class NodeViz logger.debug(methodName, w.getDuccId(), "Receive:", type, w.getStateObject(), "processes[", rm.size(), "] Completed:", w.isCompleted()); reservation_shares += qshares; + reservation_gb += jobmem; for ( IDuccReservation r: rm.values()) { Node n = r.getNode(); @@ -244,9 +289,11 @@ public class NodeViz logger.debug(methodName, null, "Generateing visualizaiton"); ConcurrentSkipListMap<String,MachineInfo> m = machineData.getMachines(); - for (String s : m.keySet()) { - + // + // This is for hosts that have no work on them so they didn't come in the work map + // + MachineInfo mi = m.get(s); // NOTE: the map changes all the time so the value may be gone. This situation // will be fixed one day but for now just forget the node, it will show up @@ -258,24 +305,24 @@ public class NodeViz String key = strip(s); // our key, possibly with domain stripped if ( ! hosts.containsKey(key) ) { // System.out.println("Set host from MachineInfo with key :" + key + ":"); - VisualizedHost vh = new VisualizedHost(mi, quantum); + + VisualizedHost vh = new VisualizedHost(mi, default_quantum); hosts.put(key, vh); } } - int total_shares = 0; - int total_ram = 0; + int total_gb = 0; Markup markup = new Markup(); VisualizedHost[] sorted = hosts.values().toArray(new VisualizedHost[hosts.size()]); Arrays.sort(sorted, new HostSorter()); for ( VisualizedHost vh : sorted ) { vh.toSvg(markup); - total_shares += vh.countShares(); - total_ram += vh.countRam(); + total_gb += vh.countRam(); } String page = markup.close(); - int unoccupied_shares = total_shares - (job_shares + pop_shares + service_shares + reservation_shares); + int unoccupied_gb = total_gb - (job_gb + pop_gb + service_gb + reservation_gb); + int total_shares = job_shares + pop_shares + service_shares + reservation_shares; visualization = "<html>" + @@ -285,20 +332,19 @@ public class NodeViz "<i onclick=\"ducc_viz_node_sorter('size')\" id=\"ducc-viz-sort-size\" style=\"color:red\">Size </i>" + "<i onclick=\"ducc_viz_node_sorter('name')\" id=\"ducc-viz-sort-name\"\">Name</i>" + "</br>" + - "<b>Shares of size " + quantum + "GB: </b>" + total_shares + + "<b>Total shares: </b>" + total_shares + ", <b>Jobs: </b>" + job_shares + ", <b>Services: </b>" + service_shares + ", <b>Managed Reservations: </b>" + pop_shares + ", <b>Reservations: </b>" + reservation_shares + - ", <b>Unoccupied: </b>" + unoccupied_shares + + ", <b>Unoccupied: </b>" + unoccupied_gb + "<br><i><small>" + - "<b>RAM Total:</b> " + total_ram + - "GB, <b>For shares:</b> " + (total_shares * quantum) + - "GB, <b>Jobs:</b> " + (job_shares * quantum) + - "GB, <b>Services:</b> " + (service_shares * quantum) + - "GB, <b>Managed Reservations:</b> " + (pop_shares * quantum) + - "GB, <b>Reservations:</b> " + (reservation_shares * quantum) + - "GB, <b>Unoccupied:</b> " + (unoccupied_shares * quantum) + + "<b>RAM Total:</b> " + total_gb + + "GB, <b>Jobs:</b> " + (job_gb) + + "GB, <b>Services:</b> " + (service_gb) + + "GB, <b>Managed Reservations:</b> " + (pop_gb) + + "GB, <b>Reservations:</b> " + (pop_gb) + + "GB, <b>Unoccupied:</b> " + (unoccupied_gb) + "GB</small></i>" + "</div>" + "<br>" + Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/VisualizedHost.java URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/VisualizedHost.java?rev=1697485&r1=1697484&r2=1697485&view=diff ============================================================================== --- uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/VisualizedHost.java (original) +++ uima/sandbox/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/VisualizedHost.java Mon Aug 24 19:25:05 2015 @@ -132,7 +132,7 @@ class VisualizedHost } if ( ! found ) { logger.debug(methodName, null, name, "Create new job fragment for", user, "with", qshares, "qshare, type", type); - JobFragment j = new JobFragment(user, type, duccid, jobmem, qshares, service_endpoint); + JobFragment j = new JobFragment(user, type, duccid, jobmem, qshares, quantum, service_endpoint); fragments.add(j); } } @@ -166,7 +166,7 @@ class VisualizedHost m.tooltipEnd(); Collections.sort(fragments, sorter); - float height_one_share = (float) Math.sqrt(shares * NodeViz.quantum) / shares; + float height_one_share = (float) Math.sqrt(shares * quantum) / shares; float foo = (float) Math.sqrt(mem) / shares; logger.debug(methodName, null, name, "shares", shares, "height-one-share", height_one_share, "foo", foo); float top = 0f + TITLE_ADJUSTMENT; // the top of the box