Repository: incubator-eagle Updated Branches: refs/heads/master 87feb883c -> 84ceeb150
[EAGLE-839] add job diagnostics Author: wujinhu <wujinhu...@126.com> Closes #735 from wujinhu/EAGLE-835. Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/84ceeb15 Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/84ceeb15 Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/84ceeb15 Branch: refs/heads/master Commit: 84ceeb150a2cdbc9a44be001fb5a86c4079644e1 Parents: 87feb88 Author: wujinhu <wujinhu...@126.com> Authored: Tue Dec 13 18:59:19 2016 +0800 Committer: Hao Chen <h...@apache.org> Committed: Tue Dec 13 18:59:19 2016 +0800 ---------------------------------------------------------------------- .../mr/historyentity/JobExecutionAPIEntity.java | 12 +++---- .../mr/history/parser/JHFEventReaderBase.java | 36 +++++++++++++------- .../mr/history/parser/JHFMRVer2EventReader.java | 3 ++ 3 files changed, 33 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java index 0e40099..55233aa 100644 --- a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java +++ b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java @@ -94,7 +94,7 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity { @Column("ad") private String trackingUrl; @Column("ae") - private Map<String, Map<String, String>> failedTasks; + private String diagnostics; public String getTrackingUrl() { return trackingUrl; @@ -348,12 +348,12 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity { valueChanged("failedReduceAttempts"); } - public Map<String, Map<String, String>> getFailedTasks() { - return failedTasks; + public String getDiagnostics() { + return diagnostics; } - public void setFailedTasks(Map<String, Map<String, String>> failedTasks) { - this.failedTasks = failedTasks; - valueChanged("failedTasks"); + public void setDiagnostics(String diagnostics) { + this.diagnostics = diagnostics; + valueChanged("diagnostics"); } } http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java index 80cdb1c..3a9e147 100644 --- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java +++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java @@ -18,6 +18,7 @@ package org.apache.eagle.jpm.mr.history.parser; +import org.apache.commons.lang3.tuple.Pair; import org.apache.eagle.jpm.mr.history.MRHistoryJobConfig; import org.apache.eagle.jpm.mr.history.crawler.JobHistoryContentFilter; import org.apache.eagle.jpm.mr.history.metrics.JobCounterMetricsGenerator; @@ -60,6 +61,8 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl protected Map<String, String> taskRunningHosts; // hostname to rack mapping protected Map<String, String> host2RackMapping; + // taskattempt to error msg, attemptId, taskId, error + protected Map<String, Pair<String, String>> attempt2ErrorMsg; protected String jobId; protected String jobName; @@ -105,11 +108,11 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl jobExecutionEntity.setTags(new HashMap<>(baseTags)); jobExecutionEntity.setNumFailedMaps(0); jobExecutionEntity.setNumFailedReduces(0); - jobExecutionEntity.setFailedTasks(new HashMap<>()); taskRunningHosts = new HashMap<>(); host2RackMapping = new HashMap<>(); + attempt2ErrorMsg = new HashMap<>(); taskStartTime = new HashMap<>(); taskAttemptStartTime = new HashMap<>(); @@ -295,10 +298,28 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl jobExecutionEntity.setAvgReduceTaskDuration(this.sumReduceTaskDuration * 1.0 / numTotalReduces); } this.jobCounterMetricsGenerator.setBaseTags(jobExecutionEntity.getTags()); + + formatDiagnostics(values.get(Keys.DIAGNOSTICS)); + entityCreated(jobExecutionEntity); } } + private void formatDiagnostics(String diagnostics) { + String formatDiagnostics = ""; + if (diagnostics != null) { + for (String attemptId : attempt2ErrorMsg.keySet()) { + String taskId = attempt2ErrorMsg.get(attemptId).getLeft(); + String error = attempt2ErrorMsg.get(attemptId).getRight(); + if (diagnostics.contains(taskId)) { + formatDiagnostics = error; + break; + } + } + } + jobExecutionEntity.setDiagnostics(formatDiagnostics); + } + private void entityCreated(JobBaseAPIEntity entity) throws Exception { for (HistoryJobEntityLifecycleListener lifecycleListener : this.jobEntityLifecycleListeners) { lifecycleListener.jobEntityCreated(entity); @@ -440,16 +461,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl } entityCreated(entity); - if (entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()) != null) { - jobExecutionEntity.getFailedTasks().put(taskID, - new HashMap<String, String>() { - { - put(entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()), - entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()));//decide later - } - } - ); - } + attempt2ErrorMsg.put(taskAttemptID, Pair.of(taskID, entity.getError())); taskAttemptStartTime.remove(taskAttemptID); } else { // silently ignore @@ -544,7 +556,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl ERROR, TASK_ATTEMPT_ID, TASK_STATUS, COPY_PHASE, SORT_PHASE, REDUCE_PHASE, SHUFFLE_FINISHED, SORT_FINISHED, COUNTERS, SPLITS, JOB_PRIORITY, HTTP_PORT, TRACKER_NAME, STATE_STRING, VERSION, MAP_COUNTERS, REDUCE_COUNTERS, - VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK, + VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK, DIAGNOSTICS, UBERISED, SPLIT_LOCATIONS, FAILED_DUE_TO_ATTEMPT, MAP_FINISH_TIME, PORT, RACK_NAME, http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java index 6e0e3aa..8184f90 100644 --- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java +++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java @@ -255,6 +255,9 @@ public class JHFMRVer2EventReader extends JHFEventReaderBase { if (js.getJobStatus() != null) { values.put(Keys.JOB_STATUS, js.getJobStatus().toString()); } + if (js.getDiagnostics() != null) { + values.put(Keys.DIAGNOSTICS, js.getDiagnostics().toString()); + } handleJob(wrapper.getType(), values, null); }