Repository: incubator-eagle
Updated Branches:
  refs/heads/master 8d7f81e1c -> 929a21da7


[MINOR] health check optimize

Author: wujinhu <wujinhu...@126.com>

Closes #677 from wujinhu/healthCheckOptimize.


Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/929a21da
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/929a21da
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/929a21da

Branch: refs/heads/master
Commit: 929a21da7182986dc62db953639e27cc983f6f23
Parents: 8d7f81e
Author: wujinhu <wujinhu...@126.com>
Authored: Wed Nov 23 20:09:32 2016 +0800
Committer: wujinhu <wujinhu...@126.com>
Committed: Wed Nov 23 20:09:32 2016 +0800

----------------------------------------------------------------------
 .../ApplicationHealthCheckPublisher.java        |  4 +-
 .../impl/ApplicationHealthCheckBase.java        | 12 ++++
 .../ApplicationHealthCheckEmailPublisher.java   | 20 ++++--
 .../impl/ApplicationHealthCheckServiceImpl.java | 68 +++++++++++++++-----
 .../eagle/app/spi/ApplicationProvider.java      |  1 -
 .../src/main/resources/HealthCheckTemplate.vm   | 18 +++++-
 ...adoopQueueRunningApplicationHealthCheck.java | 13 ++--
 .../MRHistoryJobApplicationHealthCheck.java     | 10 +--
 .../JobEntityCreationEagleServiceListener.java  |  5 +-
 .../SparkHistoryJobApplicationHealthCheck.java  | 12 ++--
 .../src/main/resources/application.conf         |  3 +-
 .../TopologyCheckApplicationHealthCheck.java    | 10 +--
 12 files changed, 127 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/ApplicationHealthCheckPublisher.java
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/ApplicationHealthCheckPublisher.java
 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/ApplicationHealthCheckPublisher.java
index 9469521..4a8d2e7 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/ApplicationHealthCheckPublisher.java
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/ApplicationHealthCheckPublisher.java
@@ -19,6 +19,8 @@ package org.apache.eagle.app.service;
 
 import com.codahale.metrics.health.HealthCheck;
 
+import java.util.Map;
+
 public interface ApplicationHealthCheckPublisher {
-    void onUnHealthApplication(String appId, HealthCheck.Result result);
+    void onUnHealthApplication(Map<String, HealthCheck.Result> results);
 }

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java
 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java
index 104e76d..75b7c2d 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java
@@ -23,6 +23,9 @@ import com.typesafe.config.Config;
 import org.apache.eagle.metadata.model.ApplicationEntity;
 import org.apache.eagle.metadata.service.ApplicationEntityService;
 
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
 public abstract class ApplicationHealthCheckBase extends HealthCheck {
     private static final String APP_ID_PATH = "appId";
     protected static final long DEFAULT_MAX_DELAY_TIME = 2 * 60 * 60 * 1000L;
@@ -41,4 +44,13 @@ public abstract class ApplicationHealthCheckBase extends 
HealthCheck {
         ApplicationEntity applicationEntity = 
applicationEntityService.getByUUIDOrAppId(null, config.getString(APP_ID_PATH));
         return applicationEntity.getStatus();
     }
+
+    protected String printMessages(String ... messages) {
+        StringWriter sw = new StringWriter();
+        PrintWriter pw = new PrintWriter(sw, true);
+        for (int i = 0; i < messages.length; i++) {
+            pw.println(messages[i]);
+        }
+        return sw.getBuffer().toString();
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckEmailPublisher.java
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckEmailPublisher.java
 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckEmailPublisher.java
index 033d326..f3d543a 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckEmailPublisher.java
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckEmailPublisher.java
@@ -39,6 +39,9 @@ public class ApplicationHealthCheckEmailPublisher implements 
ApplicationHealthCh
     private static final String CONF_MAIL_CC = "mail.smtp.cc";
     private static final String CONF_MAIL_TEMPLATE = "mail.smtp.template";
     private static final String UNHEALTHY_CONTEXT = "unHealthyContext";
+    private static final Integer HEALTH_CHECK_PORT = 9091;
+    private static final String SERVICE_HOST = "host";
+    private static final String SERVICE_PORT = "port";
 
     private Config config;
 
@@ -47,7 +50,7 @@ public class ApplicationHealthCheckEmailPublisher implements 
ApplicationHealthCh
     }
 
     @Override
-    public void onUnHealthApplication(String appId, HealthCheck.Result result) 
{
+    public void onUnHealthApplication(Map<String, HealthCheck.Result> results) 
{
         Properties properties = parseMailClientConfig();
         if (properties == null) {
             return;
@@ -65,16 +68,21 @@ public class ApplicationHealthCheckEmailPublisher 
implements ApplicationHealthCh
                 }
 
                 final VelocityContext context = new VelocityContext();
+                Map<String, String> appMsgs = new HashMap<>();
+                for (String appId : results.keySet()) {
+                    appMsgs.put(appId, results.get(appId).getMessage());
+                }
                 Map<String, Object> unHealthyContext = new HashMap<>();
-                unHealthyContext.put("appId", appId);
-                unHealthyContext.put("unHealthyMessage", result.getMessage());
+                unHealthyContext.put("appMsgs", appMsgs);
+                unHealthyContext.put("appMgmtUrl", "http://"; + 
config.getString(SERVICE_HOST) + ":" + config.getInt(SERVICE_PORT) + 
"/#/integration/site");
+                unHealthyContext.put("healthCheckUrl", "http://"; + 
config.getString(SERVICE_HOST) + ":" + HEALTH_CHECK_PORT + "/healthcheck");
                 context.put(UNHEALTHY_CONTEXT, unHealthyContext);
 
                 EagleMailClient client = new EagleMailClient(properties);
                 success = client.send(config.getString(CONF_MAIL_SENDER),
                         recipients,
                         config.hasPath(CONF_MAIL_CC) ? 
config.getString(CONF_MAIL_CC) : null,
-                        config.getString(CONF_MAIL_SUBJECT) + ": " + appId,
+                        config.getString(CONF_MAIL_SUBJECT),
                         config.getString(CONF_MAIL_TEMPLATE),
                         context,
                         null);
@@ -89,9 +97,9 @@ public class ApplicationHealthCheckEmailPublisher implements 
ApplicationHealthCh
             }
         }
         if (success) {
-            LOG.info("Successfully send unhealthy email of application {}", 
appId);
+            LOG.info("Successfully send unhealthy email");
         } else {
-            LOG.warn("Fail sending unhealthy email of application {} after 
tries {} times", appId, MAX_RETRY_COUNT);
+            LOG.warn("Fail sending unhealthy email after tries {} times", 
MAX_RETRY_COUNT);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckServiceImpl.java
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckServiceImpl.java
 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckServiceImpl.java
index 4aea963..b292fa1 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckServiceImpl.java
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckServiceImpl.java
@@ -33,9 +33,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.lang.reflect.Constructor;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.TimeUnit;
 
 public class ApplicationHealthCheckServiceImpl extends 
ApplicationHealthCheckService {
@@ -55,6 +53,13 @@ public class ApplicationHealthCheckServiceImpl extends 
ApplicationHealthCheckSer
     private static final String HEALTH_PERIOD_PATH = 
"application.healthCheck.period";
     private static final String HEALTH_PUBLISHER_PATH = 
"application.healthCheck.publisher";
     private static final String HEALTH_PUBLISHER_IMPL_PATH = 
"application.healthCheck.publisher.publisherImpl";
+    private static final String HEALTH_CHECK_DAILY_SEND_HOUR_PATH = 
"application.healthCheck.publisher.dailySendHour";
+    private static final String SERVICE_PATH = "service";
+    private static final String TIMEZONE_PATH = "service.timezone";
+    private static final String HEALTHY = "OK";
+    private boolean hasSendDaily = false;
+
+    private TimeZone timeZone;
 
     @Inject
     private Injector currentInjector;
@@ -82,7 +87,8 @@ public class ApplicationHealthCheckServiceImpl extends 
ApplicationHealthCheckSer
                 clz = 
Thread.currentThread().getContextClassLoader().loadClass(className);
                 if 
(ApplicationHealthCheckPublisher.class.isAssignableFrom(clz)) {
                     Constructor<?> cotr = clz.getConstructor(Config.class);
-                    this.applicationHealthCheckPublisher = 
(ApplicationHealthCheckPublisher)cotr.newInstance(this.config.getConfig(HEALTH_PUBLISHER_PATH));
+                    this.applicationHealthCheckPublisher = 
(ApplicationHealthCheckPublisher)cotr.newInstance(
+                            
this.config.getConfig(HEALTH_PUBLISHER_PATH).withFallback(this.config.getConfig(SERVICE_PATH)));
                 }
             } catch (Exception e) {
                 LOG.warn("exception found when create 
ApplicationHealthCheckPublisher instance {}", e.getCause());
@@ -94,6 +100,7 @@ public class ApplicationHealthCheckServiceImpl extends 
ApplicationHealthCheckSer
     public void init(Environment environment) {
         this.environment = environment;
         registerAll();
+        this.timeZone = TimeZone.getTimeZone(config.getString(TIMEZONE_PATH));
     }
 
     private void registerAll() {
@@ -140,23 +147,54 @@ public class ApplicationHealthCheckServiceImpl extends 
ApplicationHealthCheckSer
     protected void runOneIteration() throws Exception {
         LOG.info("start application health check");
         registerAll();
+
+        boolean isDaily = false;
+        int dailySendHour = config.getInt(HEALTH_CHECK_DAILY_SEND_HOUR_PATH);
+
+        GregorianCalendar cal = new GregorianCalendar(timeZone);
+        if (cal.get(Calendar.HOUR_OF_DAY) % dailySendHour == 0 && 
!hasSendDaily) {
+            isDaily = true;
+        }
+
+        if (cal.get(Calendar.HOUR_OF_DAY) % dailySendHour != 0) {
+            hasSendDaily = false;
+        }
+        Map<String, HealthCheck> copyAppHealthChecks = new HashMap<>();
         synchronized (lock) {
             for (String appId : appHealthChecks.keySet()) {
-                HealthCheck.Result result = 
appHealthChecks.get(appId).execute();
-                if (result.isHealthy()) {
-                    LOG.info("application {} is healthy", appId);
-                } else {
-                    LOG.warn("application {} is not healthy, {}", appId, 
result.getMessage(), result.getError());
-                    if (this.applicationHealthCheckPublisher != null) {
-                        try {
-                            
this.applicationHealthCheckPublisher.onUnHealthApplication(appId, result);
-                        } catch (Exception e) {
-                            LOG.warn("failed to send email for unhealthy 
application {}", appId, e.getCause());
-                        }
+                copyAppHealthChecks.put(appId, appHealthChecks.get(appId));
+            }
+        }
+
+        Map<String, HealthCheck.Result> results = new HashMap<>();
+        for (String appId : copyAppHealthChecks.keySet()) {
+            HealthCheck.Result result = 
copyAppHealthChecks.get(appId).execute();
+            if (result.isHealthy()) {
+                if (isDaily) {
+                    if (result.getMessage() == null || 
result.getMessage().isEmpty()) {
+                        results.put(appId, 
HealthCheck.Result.healthy(HEALTHY));
+                    } else {
+                        results.put(appId, result);
                     }
                 }
+                LOG.info("application {} is healthy", appId);
+            } else {
+                results.put(appId, result);
+                LOG.warn("application {} is not healthy, {}", appId, 
result.getMessage(), result.getError());
+            }
+        }
+
+        if (this.applicationHealthCheckPublisher != null) {
+            try {
+                
this.applicationHealthCheckPublisher.onUnHealthApplication(results);
+                if (isDaily) {
+                    hasSendDaily = true;
+                }
+            } catch (Exception e) {
+                LOG.warn("failed to send email for unhealthy applications", e);
             }
         }
+
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/spi/ApplicationProvider.java
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/spi/ApplicationProvider.java
 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/spi/ApplicationProvider.java
index d9c1eff..143e026 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/spi/ApplicationProvider.java
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/spi/ApplicationProvider.java
@@ -23,7 +23,6 @@ import org.apache.eagle.app.Application;
 import org.apache.eagle.app.service.ApplicationListener;
 import org.apache.eagle.common.module.ModuleRegistry;
 import org.apache.eagle.metadata.model.ApplicationDesc;
-import org.apache.eagle.metadata.service.ApplicationEntityService;
 
 import java.lang.reflect.ParameterizedType;
 import java.util.Optional;

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-core/eagle-app/eagle-app-base/src/main/resources/HealthCheckTemplate.vm
----------------------------------------------------------------------
diff --git 
a/eagle-core/eagle-app/eagle-app-base/src/main/resources/HealthCheckTemplate.vm 
b/eagle-core/eagle-app/eagle-app-base/src/main/resources/HealthCheckTemplate.vm
index 51c1186..74f401a 100644
--- 
a/eagle-core/eagle-app/eagle-app-base/src/main/resources/HealthCheckTemplate.vm
+++ 
b/eagle-core/eagle-app/eagle-app-base/src/main/resources/HealthCheckTemplate.vm
@@ -23,8 +23,22 @@
 <body>
     #set ( $elem = $unHealthyContext )
 
-<p><b>Message: </b>$elem["appId"] has delayed</p>
-<p><b>Detail: </b>$elem["unHealthyMessage"]</p>
+
+<p><b>Health Check: </b>$elem["healthCheckUrl"]</p>
+<p><b>Appliaction Management: </b>$elem["appMgmtUrl"]</p>
+
+<table border="1">
+    <tr>
+        <th><b>Application ID</b></th>
+        <td><b>Health Check Message</b></td>
+    </tr>
+    #foreach($appId in ${elem["appMsgs"].keySet()})
+    <tr>
+        <th>$appId</th>
+        <td>${elem["appMsgs"].get($appId)}</td>
+    </tr>
+    #end
+</table>
 
 </body>
 </html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java
 
b/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java
index 02308c0..bc5b7c4 100644
--- 
a/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java
+++ 
b/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java
@@ -53,13 +53,14 @@ public class HadoopQueueRunningApplicationHealthCheck 
extends ApplicationHealthC
 
         client.getJerseyClient().setReadTimeout(60000);
 
+        String message = "";
         try {
             ApplicationEntity.Status status = getApplicationStatus();
             if 
(!status.toString().equals(ApplicationEntity.Status.RUNNING.toString())) {
-                String message = String.format("Application is not running, 
status is %s", status.toString());
-                return Result.unhealthy(message);
+                message += String.format("Application is not RUNNING, status 
is %s. ", status.toString());
             }
 
+
             String query = 
String.format("%s[@site=\"%s\"]<@site>{max(timestamp)}",
                     Constants.GENERIC_METRIC_SERVICE,
                     hadoopQueueRunningAppConfig.eagleProps.site);
@@ -67,7 +68,7 @@ public class HadoopQueueRunningApplicationHealthCheck extends 
ApplicationHealthC
             GenericServiceAPIResponseEntity response = client
                     .search(query)
                     
.metricName(HadoopClusterConstants.MetricName.HADOOP_CLUSTER_ALLOCATED_MEMORY)
-                    .startTime(System.currentTimeMillis() - 2 * 60 * 60000L)
+                    .startTime(System.currentTimeMillis() - 24 * 60 * 60000L)
                     .endTime(System.currentTimeMillis())
                     .pageSize(Integer.MAX_VALUE)
                     .send();
@@ -79,15 +80,15 @@ public class HadoopQueueRunningApplicationHealthCheck 
extends ApplicationHealthC
                 maxDelayTime = 
hadoopQueueRunningAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY);
             }
 
-            if (currentTimeStamp - currentProcessTimeStamp > maxDelayTime) {
-                String message = String.format("Current process time is %sms, 
delay %s hours",
+            if (!message.isEmpty() || currentTimeStamp - 
currentProcessTimeStamp > maxDelayTime) {
+                message += String.format("Current process time is %sms, delay 
%s hours.",
                         currentProcessTimeStamp, (currentTimeStamp - 
currentProcessTimeStamp) * 1.0 / 60000L / 60);
                 return Result.unhealthy(message);
             } else {
                 return Result.healthy();
             }
         } catch (Exception e) {
-            return Result.unhealthy(ExceptionUtils.getStackTrace(e));
+            return Result.unhealthy(printMessages(message, "An exception was 
caught when fetch application current process time: ", 
ExceptionUtils.getStackTrace(e)));
         } finally {
             client.getJerseyClient().destroy();
             try {

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java
index 96d8672..20506c0 100644
--- 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java
@@ -52,11 +52,11 @@ public class MRHistoryJobApplicationHealthCheck extends 
ApplicationHealthCheckBa
 
         
client.getJerseyClient().setReadTimeout(eagleServiceConfig.readTimeoutSeconds * 
1000);
 
+        String message = "";
         try {
             ApplicationEntity.Status status = getApplicationStatus();
             if 
(!status.toString().equals(ApplicationEntity.Status.RUNNING.toString())) {
-                String message = String.format("Application is not running, 
status is %s", status.toString());
-                return Result.unhealthy(message);
+                message += String.format("Application is not RUNNING, status 
is %s. ", status.toString());
             }
 
             String query = 
String.format("%s[@site=\"%s\"]<@site>{max(currentTimeStamp)}",
@@ -78,15 +78,15 @@ public class MRHistoryJobApplicationHealthCheck extends 
ApplicationHealthCheckBa
                 maxDelayTime = 
mrHistoryJobConfig.getConfig().getLong(MAX_DELAY_TIME_KEY);
             }
 
-            if (currentTimeStamp - currentProcessTimeStamp > maxDelayTime) {
-                String message = String.format("Current process time is %sms, 
delay %s hours",
+            if (!message.isEmpty() || currentTimeStamp - 
currentProcessTimeStamp > maxDelayTime) {
+                message += String.format("Current process time is %sms, delay 
%s hours.",
                         currentProcessTimeStamp, (currentTimeStamp - 
currentProcessTimeStamp) * 1.0 / 60000L / 60);
                 return Result.unhealthy(message);
             } else {
                 return Result.healthy();
             }
         } catch (Exception e) {
-            return Result.unhealthy(ExceptionUtils.getStackTrace(e));
+            return Result.unhealthy(printMessages(message, "An exception was 
caught when fetch application current process time: ", 
ExceptionUtils.getStackTrace(e)));
         } finally {
             client.getJerseyClient().destroy();
             try {

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JobEntityCreationEagleServiceListener.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JobEntityCreationEagleServiceListener.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JobEntityCreationEagleServiceListener.java
index 902590b..c802442 100644
--- 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JobEntityCreationEagleServiceListener.java
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JobEntityCreationEagleServiceListener.java
@@ -27,6 +27,7 @@ import org.apache.eagle.jpm.mr.historyentity.*;
 import org.apache.eagle.jpm.mr.historyentity.JobExecutionAPIEntity;
 import org.apache.eagle.jpm.mr.historyentity.TaskAttemptExecutionAPIEntity;
 import org.apache.eagle.jpm.mr.historyentity.TaskExecutionAPIEntity;
+import org.apache.eagle.jpm.util.Constants;
 import org.apache.eagle.jpm.util.MRJobTagName;
 import org.apache.eagle.log.entity.GenericMetricEntity;
 import org.apache.eagle.log.entity.GenericServiceAPIResponseEntity;
@@ -107,7 +108,9 @@ public class JobEntityCreationEagleServiceListener 
implements HistoryJobEntityCr
                     ((JobExecutionAPIEntity) entity).getCurrentState());
 
                 
metricEntities.addAll(jobExecutionMetricsCreationListener.generateMetrics((JobExecutionAPIEntity)entity));
-                emitFailedJob((JobExecutionAPIEntity)entity);
+                if 
(((JobExecutionAPIEntity)entity).getCurrentState().equals(Constants.JobState.FAILED.toString()))
 {
+                    emitFailedJob((JobExecutionAPIEntity) entity);
+                }
             } else if (entity instanceof JobEventAPIEntity) {
                 jobEvents.add((JobEventAPIEntity) entity);
             } else if (entity instanceof TaskExecutionAPIEntity) {

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java
 
b/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java
index bb6f390..fe6bd5a 100644
--- 
a/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java
+++ 
b/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java
@@ -52,11 +52,11 @@ public class SparkHistoryJobApplicationHealthCheck extends 
ApplicationHealthChec
 
         client.getJerseyClient().setReadTimeout(eagleServiceConfig.timeout * 
1000);
 
+        String message = "";
         try {
             ApplicationEntity.Status status = getApplicationStatus();
             if 
(!status.toString().equals(ApplicationEntity.Status.RUNNING.toString())) {
-                String message = String.format("Application is not running, 
status is %s", status.toString());
-                return Result.unhealthy(message);
+                message += String.format("Application is not RUNNING, status 
is %s. ", status.toString());
             }
 
             String query = 
String.format("%s[@site=\"%s\"]<@site>{max(endTime)}",
@@ -65,7 +65,7 @@ public class SparkHistoryJobApplicationHealthCheck extends 
ApplicationHealthChec
 
             GenericServiceAPIResponseEntity response = client
                     .search(query)
-                    .startTime(System.currentTimeMillis() - 12 * 60 * 60000L)
+                    .startTime(System.currentTimeMillis() - 24 * 60 * 60000L)
                     .endTime(System.currentTimeMillis())
                     .pageSize(10)
                     .send();
@@ -78,15 +78,15 @@ public class SparkHistoryJobApplicationHealthCheck extends 
ApplicationHealthChec
                 maxDelayTime = 
sparkHistoryJobAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY);
             }
 
-            if (currentTimeStamp - currentProcessTimeStamp > maxDelayTime * 3) 
{
-                String message = String.format("Current process time is %sms, 
delay %s hours",
+            if (!message.isEmpty() || currentTimeStamp - 
currentProcessTimeStamp > maxDelayTime * 3) {
+                message += String.format("Current process time is %sms, delay 
%s hours.",
                         currentProcessTimeStamp, (currentTimeStamp - 
currentProcessTimeStamp) * 1.0 / 60000L / 60);
                 return Result.unhealthy(message);
             } else {
                 return Result.healthy();
             }
         } catch (Exception e) {
-            return Result.unhealthy(ExceptionUtils.getStackTrace(e));
+            return Result.unhealthy(printMessages(message, "An exception was 
caught when fetch application current process time: ", 
ExceptionUtils.getStackTrace(e)));
         } finally {
             client.getJerseyClient().destroy();
             try {

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-server/src/main/resources/application.conf
----------------------------------------------------------------------
diff --git a/eagle-server/src/main/resources/application.conf 
b/eagle-server/src/main/resources/application.conf
index db973c8..da0c7c9 100644
--- a/eagle-server/src/main/resources/application.conf
+++ b/eagle-server/src/main/resources/application.conf
@@ -101,11 +101,12 @@ application {
     period = 60
     publisher {
       publisherImpl = 
org.apache.eagle.app.service.impl.ApplicationHealthCheckEmailPublisher
+      dailySendHour = 11
       mail.smtp.host = "mail.host.com"
       mail.smtp.port = 25
       mail.smtp.recipients = "some...@email.com"
       mail.smtp.sender = "some...@email.com"
-      mail.smtp.subject = "Eagle Application is unhealthy"
+      mail.smtp.subject = "Eagle Application Health Check"
       mail.smtp.template = "HealthCheckTemplate.vm"
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/929a21da/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java
----------------------------------------------------------------------
diff --git 
a/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java
 
b/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java
index 67669f8..4b740db 100644
--- 
a/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java
+++ 
b/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java
@@ -51,11 +51,11 @@ public class TopologyCheckApplicationHealthCheck extends 
ApplicationHealthCheckB
 
         
client.getJerseyClient().setReadTimeout(topologyCheckAppConfig.getConfig().getInt("service.readTimeOutSeconds")
 * 1000);
 
+        String message = "";
         try {
             ApplicationEntity.Status status = getApplicationStatus();
             if 
(!status.toString().equals(ApplicationEntity.Status.RUNNING.toString())) {
-                String message = String.format("Application is not running, 
status is %s", status.toString());
-                return Result.unhealthy(message);
+                message += String.format("Application is not RUNNING, status 
is %s. ", status.toString());
             }
 
             String query = 
String.format("%s[@site=\"%s\"]<@site>{max(timestamp)}",
@@ -77,15 +77,15 @@ public class TopologyCheckApplicationHealthCheck extends 
ApplicationHealthCheckB
                 maxDelayTime = 
topologyCheckAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY);
             }
 
-            if (currentTimeStamp - currentProcessTimeStamp > maxDelayTime) {
-                String message = String.format("Current process time is %sms, 
delay %s hours",
+            if (!message.isEmpty() || currentTimeStamp - 
currentProcessTimeStamp > maxDelayTime) {
+                message += String.format("Current process time is %sms, delay 
%s hours.",
                         currentProcessTimeStamp, (currentTimeStamp - 
currentProcessTimeStamp) * 1.0 / 60000L / 60);
                 return Result.unhealthy(message);
             } else {
                 return Result.healthy();
             }
         } catch (Exception e) {
-            return Result.unhealthy(ExceptionUtils.getStackTrace(e));
+            return Result.unhealthy(printMessages(message, "An exception was 
caught when fetch application current process time: ", 
ExceptionUtils.getStackTrace(e)));
         } finally {
             client.getJerseyClient().destroy();
             try {

Reply via email to