Author: challngr
Date: Fri Feb  7 13:48:17 2014
New Revision: 1565655

URL: http://svn.apache.org/r1565655
Log:
UIMA-3405 Implement service instance failure/restart policy.

Modified:
    
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
    
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
 Fri Feb  7 13:48:17 2014
@@ -28,9 +28,21 @@ import org.apache.uima.ducc.common.IServ
 
 public abstract class AServicePing
 {
+    protected int[] failure_window = null;      // tracks consecutive failures 
within a window
+    protected int failure_cursor = 0;           // cursor to track failures 
within the current window
+    protected int total_failures = 0;           // current total run failures. 
usually monotonically increasing.
+    protected int failure_max = 3;              // max allowed failures within 
any given window
+
+    protected int failure_window_size = 30;     // 30 minutes. overridden at 
first ping
+    protected int monitor_rate = 1;             // ping rate, in minutes, min 
1 used for calculations
+
+    protected boolean log_enabled = false;
+    protected long service_id = 0;    
 
     protected Properties smState;
 
+    private org.apache.uima.ducc.common.utils.DuccLogger duccLogger = null;
+
     /**
      * Called by the ping driver, to pass in useful things the pinger may want.
      * @param arguments This is passed in from the service specification's
@@ -42,6 +54,43 @@ public abstract class AServicePing
     public abstract void init(String arguments, String endpoint)  throws 
Exception;
 
     /**
+     * Called by the ping driver to initialize static information about the 
service and
+     * pinger.  This guy calls the public init() method and is not intended 
for public
+     * consumption.
+     *
+     * This guy initializes the default failure monitor, service id, whether 
the
+     * service log is enabled, and the monitor (ping) rate, all of which are
+     * free to be used by derived classes.
+     *
+     * @param arguments This is passed in from the service specification's
+     *                  service_ping_arguments string.
+     *
+     * @param endpoint This is the name of the service endpoint, as passed in
+     *                 at service registration.
+     *
+     * @param initProps Properties file with static data about the service and 
+     *                  pinger.
+     */
+    public void init(String arguments, String endpoint, Properties initProps)
+        throws Exception
+    {
+        failure_window_size = 
Integer.parseInt(initProps.getProperty("failure-window"));
+        failure_window = new int[failure_window_size];
+        failure_cursor = 0;
+        
+        monitor_rate = Integer.parseInt(initProps.getProperty("monitor-rate") 
) / 60000;       // convert to minutes
+        if (monitor_rate <= 0 ) monitor_rate = 1;                              
                  // minimum 1 minute allowed
+        
+        service_id = Long.parseLong(initProps.getProperty("service-id") );
+        
+        log_enabled = Boolean.parseBoolean(initProps.getProperty("do-log"));
+        
+        failure_max = Integer.parseInt(initProps.getProperty("failure-max"));  
                                          
+
+        init(arguments, endpoint);
+    }
+
+    /**
      * Stop is called by the ping wrapper when it is being killed.  
Implementors may optionally
      * override this method with conenction shutdown code.
      */
@@ -53,31 +102,135 @@ public abstract class AServicePing
      *     for correct management and display of the service.
      */
     public abstract IServiceStatistics getStatistics();
-    
 
+    /**
+     * Current state of the monitored service is passed in here.
+     */    
     public void setSmState(Properties props)
     {
         smState = props;
     }
 
+    /**
+     * Getter of the service state;  Implementors may just access it directly 
if they want.
+     */
     public Properties getSmState() 
     {
         return smState;
     }
 
+    /**
+     * Called by the service manager to query the number of additional needed 
service instances.
+     */
     public int getAdditions()
     {
         return 0;
     }
 
+    /**
+     * Called by the service manager to query the number of service insances 
to dump.
+     */
     public int getDeletions()
     {
         return 0;
     }
 
+    private String fmtArray(int[] array)
+    {
+        Object[] vals = new Object[array.length];
+        StringBuffer sb = new StringBuffer();
+        
+        for ( int i = 0; i < array.length; i++ ) {
+            sb.append("%3s ");
+            vals[i] = Integer.toString(array[i]);
+        }
+        return String.format(sb.toString(), vals);
+    }
+
+    /**
+     * This is intended for use by the SM when it drives a pinger in an 
internal thread.  External
+     * pingers won't have this set.  
+     *
+     * However, external pinger's stdout is picked up by DUCC so the logger 
will still print
+     * stuff to the service log without the use of the ducc logger.
+     */
+    public void setLogger(org.apache.uima.ducc.common.utils.DuccLogger logger)
+    {
+        this.duccLogger = logger;
+    }
+
+    private void doLog(String methodName, Object ... msg)
+    {        
+        if ( !log_enabled ) return;
+
+        StringBuffer buf = new StringBuffer(methodName);
+        buf.append(" ");
+        buf.append(Long.toString(service_id));
+        for ( Object o : msg ) {
+            buf.append(" ");
+            if ( o == null ) {
+                buf.append("<null>");
+            } else {
+                buf.append(o.toString());
+            }
+        }
+
+        if ( duccLogger != null ) {
+            duccLogger.info(methodName, null, buf);
+        } else {
+            System.out.println(buf);
+        }
+    }
+
+    /**
+     * This determines if there have been excessive service instance failures 
by tracking the 
+     * number of failures, not consecutive, but rather within a window of 
time.  It may be
+     * overridden by extending monitors.
+     */
     public boolean isExcessiveFailures()
     {
-        return false;
+        String methodName = "isExcessiveFailures";
+        boolean excessive_failures = false;
+
+        // Calculate total instance failures within some configured window.  
If we get a cluster
+        // of failures, signal excessive failures so SM stops spawning new 
ones.
+        int failures = Integer.parseInt(smState.getProperty("run-failures"));
+        doLog(methodName, "failures:", failures, "total_failures", 
total_failures);
+        if ( failures > 0 ) {
+            int diff = Math.max(0, failures - total_failures);  // nfailures 
since last update
+            if ( diff < 0 ) {
+                // This indicates an instance was restarted, which forces a 
cleaning of
+                // failure conditions.
+                total_failures = 0;
+                failure_cursor = 0;
+                for ( int i = 0; i < failure_window_size; i++ ) {
+                    failure_window[i] = 0;
+                }
+            } else if ( diff > 0 ) {
+                total_failures += diff;
+            }
+
+            if ( diff >= 0 ) {
+                failure_window[failure_cursor++] = diff;
+            }
+
+            doLog(methodName, "failures", failures, "total_failures", 
total_failures, 
+                  "failure_window", fmtArray(failure_window), 
"failure_cursor", failure_cursor);
+
+            failure_cursor = failure_cursor % failure_window_size;
+
+            int windowed_failures = 0;
+            excessive_failures = false;
+            for ( int i = 0; i < failure_window_size; i++ ) {
+                windowed_failures += failure_window[i];                    
+            }
+            if ( windowed_failures >= failure_max ) {
+                excessive_failures = true;
+            }
+            doLog(methodName, "windowed_failures", windowed_failures, 
"excessive_failures", excessive_failures);
+        }
+        return excessive_failures;
     }
 
+
 }

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
 Fri Feb  7 13:48:17 2014
@@ -45,18 +45,6 @@ public class UimaAsPing
 {
     String ep;
 
-    int failure_max = 5;            // max consecutive run failures before 
reporting excessive failures
-                                    // which prevents restart of instances
-    int current_failures = 0;       // current consecutive run failures
-    int consecutive_failures = 0;   // n failures in consecutive pings
-    int failure_window_size = 15;   // 15 minutes
-    int monitor_rate = 1;           // ping rate, in minutes, min 1 used for 
calculations
-    int fail_index = 0;
-    int[] failure_window = null;    // tracks consecutive failures within a 
window
-    int failure_cursor = 0;
-    long service_id = 0;
-    
-    boolean excessive_failures = false;
     
     String endpoint;
     String broker;
@@ -73,7 +61,6 @@ public class UimaAsPing
     String nodeIp;
     String pid;
     boolean gmfail = false;
-    boolean enable_log = false;
     
     public UimaAsPing()
     {
@@ -128,15 +115,8 @@ public class UimaAsPing
             }
             meta_timeout         = props.getIntProperty    ("meta-timeout"   , 
5000);
             broker_jmx_port      = props.getIntProperty    ("broker-jmx-port", 
1099);
-            enable_log           = props.getBooleanProperty("enable-log"     , 
false);
-            failure_max          = props.getIntProperty    ("max-failures"   , 
failure_max);
-            failure_window_size  = props.getIntProperty    ("failure-window" , 
failure_window_size);
-            failure_window = new int[failure_window_size];
-            failure_cursor = 0;
         }
 
-        doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, 
"broker-jmx-port", broker_jmx_port);
-
         this.monitor = new UimaAsServiceMonitor(endpoint, broker_host, 
broker_jmx_port);
     }
 
@@ -146,8 +126,8 @@ public class UimaAsPing
     }
 
     private void doLog(String methodName, Object ... msg)
-    {
-        if ( ! enable_log ) return;
+    {        
+        if ( !log_enabled ) return;
 
         StringBuffer buf = new StringBuffer(methodName);
         for ( Object o : msg ) {
@@ -161,17 +141,6 @@ public class UimaAsPing
         System.out.println(buf);
     }
 
-    private String fmtArray(int[] array)
-    {
-        Object[] vals = new Object[array.length];
-        StringBuffer sb = new StringBuffer();
-        
-        for ( int i = 0; i < array.length; i++ ) {
-            sb.append("%3s ");
-            vals[i] = Integer.toString(array[i]);
-        }
-        return String.format(sb.toString(), vals);
-    }
 
     void evaluateService(IServiceStatistics stats)
     {
@@ -182,55 +151,12 @@ public class UimaAsPing
             monitor.collect();
             stats.setHealthy(true);       // this pinger defines 'healthy' as
                                           // 'service responds to get-meta and 
broker returns jmx stats'
-
-
-            monitor_rate = 
Integer.parseInt(smState.getProperty("monitor-rate") ) / 60000;       // 
convert to minutes
-            service_id   = Long.parseLong(smState.getProperty("service-id"));  
          
-            if (monitor_rate <= 0 ) monitor_rate = 1;                          
                  // minimum 1 minute allowed
-
-            // Calculate total instance failures within some configured 
window.  If we get a cluster
-            // of failures, signal excessive failures so SM stops spawning new 
ones.
-            int failures = 
Integer.parseInt(smState.getProperty("run-failures"));
-            doLog(methodName, "run-failures:", failures);
-            if ( (failure_window != null) && (failures > 0) ) {
-                int diff = failures - current_failures;  // nfailures since 
last update
-                current_failures = failures;
-
-                if ( diff > 0 ) {
-                    failure_window[failure_cursor++] = diff;
-                } else {
-                    failure_window[failure_cursor++] = 0;                    
-                }
-
-                doLog(methodName, "failures", failures, "current_failures", 
current_failures, 
-                      "failure_window", fmtArray(failure_window), 
"failure_cursor", failure_cursor);
-
-                failure_cursor = failure_cursor % failure_window_size;
-
-
-
-                int windowed_failures = 0;
-                excessive_failures = false;
-                for ( int i = 0; i < failure_window_size; i++ ) {
-                    windowed_failures += failure_window[i];                    
-                }
-                if ( windowed_failures >= failure_max ) {
-                    excessive_failures = true;
-                }
-                doLog(methodName, "windowed_failures", windowed_failures, 
"excessive_failures", excessive_failures);
-            }
-
         } catch ( Throwable t ) {
             stats.setHealthy(false);
             monitor.setJmxFailure(t.getMessage());
         }
     }
 
-    public boolean isExcessiveFailures()
-    {
-        return excessive_failures;
-    }
-
     public IServiceStatistics getStatistics()
     {
         String methodName = "getStatistics";

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
 Fri Feb  7 13:48:17 2014
@@ -145,6 +145,7 @@ class PingDriver
 
         this.log_directory     = resolveStringProperty ("log_directory", 
ping_props, job_props, null);     // cli always puts this int job props, no 
default 
 
+
         jvm_args_str = jvm_args_str.trim();
         if ( jvm_args_str.equals("") ) {
             jvm_args = null;
@@ -357,12 +358,30 @@ class PingDriver
         }
     }
 
+    void setCommonInitProperties(Properties props)
+    {
+        props.setProperty("monitor-rate"    , "" + meta_ping_rate);
+        props.setProperty("service-id"      , "" + sset.getId().getFriendly());
+        props.setProperty("failure-max"     , "" + 
ServiceManagerComponent.failure_max);
+        props.setProperty("failure-window"  , "" + 
ServiceManagerComponent.failure_window);
+        props.setProperty("do-log"          , "" + do_log);
+    }
+
+    void setCommonProperties(Properties props)
+    {
+        props.setProperty("total-instances" , "" + sset.countImplementors());
+        props.setProperty("active-instances", "" + sset.getActiveInstances());
+        props.setProperty("references"      , "" + sset.countReferences());
+        props.setProperty("run-failures"    , "" + sset.getRunFailures());
+    }
+
     void runAsThread()
     {
         long tid = Thread.currentThread().getId();
 
        String methodName = "runAsThread[" + tid + "]";
         AServicePing pinger = null;
+        Properties initProps = new Properties();
         Properties props = new Properties();
 
                try {
@@ -381,18 +400,14 @@ class PingDriver
             return;            
                }
 
-        try {
-            props.setProperty("total-instances", "" + 
sset.countImplementors());
-            props.setProperty("active-instances", "" + 
sset.getActiveInstances());
-            props.setProperty("references", "" + sset.countReferences());
-            props.setProperty("run-failures", "" + sset.getRunFailures());
-            props.setProperty("monitor-rate", "" + meta_ping_rate);
-            props.setProperty("service-id", "" + sset.getId().getFriendly());
-
-            pinger.setSmState(props);
-            pinger.init(ping_arguments, endpoint);
-            while ( ! shutdown ) {
-                
+        try {            
+            setCommonInitProperties(initProps);
+            pinger.setLogger(logger);
+            pinger.init(ping_arguments, endpoint, initProps);
+            
+            while ( ! shutdown ) {                
+                setCommonProperties(props);
+                pinger.setSmState(props);
                 Pong pr = new Pong();
 
                 pr.setStatistics(pinger.getStatistics());
@@ -647,13 +662,7 @@ class PingDriver
                     // Ask for the ping
                     try {
                         logger.info(methodName, sset.getId(), 
"ExtrnPingDriver: ping OUT");
-                        props.setProperty("total-instances" , "" + 
sset.countImplementors());
-                        props.setProperty("active-instances", "" + 
sset.getActiveInstances());
-                        props.setProperty("references"      , "" + 
sset.countReferences());
-                        props.setProperty("run-failures"    , "" + 
sset.getRunFailures());
-                        props.setProperty("monitor-rate"    , "" + 
meta_ping_rate);
-                        props.setProperty("service-id"      , "" + 
sset.getId().getFriendly());
-
+                        setCommonProperties(props);
                         oos.writeObject(new Ping(false, props));
                         oos.flush();
                         oos.reset();

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
 Fri Feb  7 13:48:17 2014
@@ -688,8 +688,9 @@ public class ServiceHandler
             wanted = instances;
         }
 
+        sset.setNInstances(running + instances);
+        sset.resetRuntimeErrors();
         if ( update ) {
-            sset.setNInstances(running + instances);
             sset.saveMetaProperties();
         }
 
@@ -743,8 +744,8 @@ public class ServiceHandler
             tolose = Math.min(instances, running);
         }
 
+        sset.setNInstances(Math.max(0, running - instances)); // never persist 
< 0 registered instance
         if ( update ) {
-            sset.setNInstances(Math.max(0, running - instances)); // never 
persist < 0 registered instance
             sset.saveMetaProperties();
         }
         

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
 Fri Feb  7 13:48:17 2014
@@ -98,6 +98,7 @@ public class ServiceManagerComponent 
     static String default_ping_class;
 
     static int failure_max = 5;
+    static int failure_window = 30;
 
     private String state_dir = null;
     private String state_file = null;
@@ -253,6 +254,7 @@ public class ServiceManagerComponent 
                
DuccDaemonRuntimeProperties.getInstance().boot(DaemonName.ServiceManager,getProcessJmxUrl());
 
         failure_max = 
SystemPropertyResolver.getIntProperty("ducc.sm.instance.failure.max", 
failure_max);
+        failure_window = 
SystemPropertyResolver.getIntProperty("ducc.sm.instance.failure.window", 
failure_window);
         meta_ping_rate = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.rate", meta_ping_rate);
         meta_ping_timeout = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.timeout", 
meta_ping_timeout);
         meta_ping_stability = 
SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.stability", 
meta_ping_stability);

Modified: 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
 (original)
+++ 
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
 Fri Feb  7 13:48:17 2014
@@ -470,6 +470,12 @@ public class ServiceSet
         return ping_only;
     }
 
+    synchronized void resetRuntimeErrors()
+    {
+        run_failures = 0;
+        excessiveRunFailures = false;
+    }
+
     synchronized void setAutostart(boolean auto)
     {
         cancelLinger();


Reply via email to