Author: degenaro Date: Tue Jun 25 20:59:44 2019 New Revision: 1862087 URL: http://svn.apache.org/viewvc?rev=1862087&view=rev Log: UIMA-6077 DUCC stop_ducc -ag should cause RM to stop scheduling there
Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java?rev=1862087&r1=1862086&r2=1862087&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java (original) +++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java Tue Jun 25 20:59:44 2019 @@ -24,6 +24,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.camel.CamelContext; @@ -725,6 +726,42 @@ implements Orchestrator { return dh; } + private void diagnose(DuccHeadTransition dh_transition, String when) { + String methodName = "diagnose"; + DuccWorkMap dwm = orchestratorCommonArea.getWorkMap(); + Set<DuccId> keys = dwm.getJobKeySet(); + if(keys.isEmpty()) { + logger.debug(methodName, null, "no jobs"); + } + else { + for(DuccId key : keys) { + IDuccWork dw = dwm.findDuccWork(key); + IDuccWorkJob dwj = (IDuccWorkJob) dw; + if(dwj != null) { + IDuccProcessMap processMap = dwj.getProcessMap(); + if(processMap != null) { + for(IDuccProcess process : processMap.values()) { + StringBuffer sb = new StringBuffer(); + sb.append("head-state:"+dh_transition.name()+"="+when); + sb.append(" "); + sb.append("ducc-pid:"+process.getDuccId()); + sb.append(" "); + sb.append("user:"+dwj.getStandardInfo().getUser()); + sb.append(" "); + sb.append("node:"+process.getNode().getNodeIdentity().getCanonicalName()); + sb.append(" "); + sb.append("pid:"+process.getPID()); + logger.debug(methodName, dwj.getDuccId(), sb); + } + } + } + else { + logger.debug(methodName, key, "no job"); + } + } + } + } + /** * Publish Orchestrator State */ @@ -745,10 +782,12 @@ implements Orchestrator { logger.warn(methodName, jobid, "ducc head -> backup"); break; case backup_to_master: + diagnose(dh_transition,"before"); OrchestratorCommonArea.getInstance().restart(); SystemEventsLogger.warn(IDuccLoggerComponents.abbrv_orchestrator, EventType.SWITCH_TO_MASTER.name(), ""); orchestratorStateDuccEvent.setDuccHeadState(DuccHeadState.master); logger.warn(methodName, jobid, "ducc head -> master"); + diagnose(dh_transition,"after"); break; case master_to_master: orchestratorStateDuccEvent.setDuccHeadState(DuccHeadState.master); Modified: uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java?rev=1862087&r1=1862086&r2=1862087&view=diff ============================================================================== --- uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java (original) +++ uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java Tue Jun 25 20:59:44 2019 @@ -1262,8 +1262,13 @@ public class JobManagerConverter ProcessState state = proc.getProcessState(); Node n = proc.getNode(); if ( n == null ) { - logger.info(methodName, w.getDuccId(), " Process[", pid, "] state [", state, "] is complete[", proc.isComplete(), "] Node [N/A] mem[N/A"); - } else { + logger.info(methodName, w.getDuccId(), " Process[", pid, "] state [", state, "] is complete[", proc.isComplete(), "] Node [N/A] mem[N/A]"); + } + else if( proc.isComplete() ) { + long mem = n .getNodeMetrics().getNodeMemory().getMemTotal(); + logger.info(methodName, w.getDuccId(), " Process[", pid, "] state [", state, "] is complete[", proc.isComplete(), "] Node [",n.getNodeIdentity().getCanonicalName() + "." + proc.getDuccId(),"] mem[", mem, "]"); + } + else { long mem = n .getNodeMetrics().getNodeMemory().getMemTotal(); logger.info(methodName, w.getDuccId(), " Process[", pid, @@ -1297,6 +1302,10 @@ public class JobManagerConverter logger.info(methodName, w.getDuccId(), "Receive:", prefix, w.getDuccType(), w.getStateObject(), "processes[", rm.size(), "] Completed:", w.isCompleted()); + if(w.isCompleted()) { + continue; + } + for ( IDuccReservation r: rm.values()) { Node n = r.getNode(); if ( n == null ) {