[ 
https://issues.apache.org/jira/browse/GOBBLIN-1552?focusedWorklogId=677304&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-677304
 ]

ASF GitHub Bot logged work on GOBBLIN-1552:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 05/Nov/21 19:35
            Start Date: 05/Nov/21 19:35
    Worklog Time Spent: 10m 
      Work Description: arjun4084346 commented on a change in pull request 
#3403:
URL: https://github.com/apache/gobblin/pull/3403#discussion_r743146832



##########
File path: 
gobblin-runtime/src/main/java/org/apache/gobblin/service/monitoring/JobStatusRetriever.java
##########
@@ -218,4 +228,42 @@ public static boolean 
isFlowStatus(org.apache.gobblin.service.monitoring.JobStat
     return jobStatus.getJobName() != null && jobStatus.getJobGroup() != null
         && jobStatus.getJobName().equals(JobStatusRetriever.NA_KEY) && 
jobStatus.getJobGroup().equals(JobStatusRetriever.NA_KEY);
   }
+
+  public static ExecutionStatus getFlowStatusFromJobStatuses(boolean 
dagManagerEnabled, Iterator<JobStatus> jobStatusIterator) {
+    ExecutionStatus flowExecutionStatus = ExecutionStatus.$UNKNOWN;
+
+    if (dagManagerEnabled) {
+      while (jobStatusIterator.hasNext()) {
+        JobStatus jobStatus = jobStatusIterator.next();
+        // Check if this is the flow status instead of a single job status
+        if (JobStatusRetriever.isFlowStatus(jobStatus)) {
+          flowExecutionStatus = 
ExecutionStatus.valueOf(jobStatus.getEventName());
+        }
+      }
+    } else {
+      Set<ExecutionStatus> jobStatuses = new HashSet<>();
+      while (jobStatusIterator.hasNext()) {
+        JobStatus jobStatus = jobStatusIterator.next();
+        // because in absence of DagManager we do not get all flow level 
events, we will ignore the flow level events
+        // we actually get and purely calculate flow status based on flow 
statuses.

Review comment:
       in future, i actually plan to remove the 'dagManagerDisabled' blocks :)

##########
File path: 
gobblin-runtime/src/main/java/org/apache/gobblin/service/monitoring/JobStatusRetriever.java
##########
@@ -218,4 +228,42 @@ public static boolean 
isFlowStatus(org.apache.gobblin.service.monitoring.JobStat
     return jobStatus.getJobName() != null && jobStatus.getJobGroup() != null
         && jobStatus.getJobName().equals(JobStatusRetriever.NA_KEY) && 
jobStatus.getJobGroup().equals(JobStatusRetriever.NA_KEY);
   }
+
+  public static ExecutionStatus getFlowStatusFromJobStatuses(boolean 
dagManagerEnabled, Iterator<JobStatus> jobStatusIterator) {
+    ExecutionStatus flowExecutionStatus = ExecutionStatus.$UNKNOWN;
+
+    if (dagManagerEnabled) {
+      while (jobStatusIterator.hasNext()) {
+        JobStatus jobStatus = jobStatusIterator.next();
+        // Check if this is the flow status instead of a single job status
+        if (JobStatusRetriever.isFlowStatus(jobStatus)) {
+          flowExecutionStatus = 
ExecutionStatus.valueOf(jobStatus.getEventName());
+        }
+      }
+    } else {
+      Set<ExecutionStatus> jobStatuses = new HashSet<>();
+      while (jobStatusIterator.hasNext()) {
+        JobStatus jobStatus = jobStatusIterator.next();
+        // because in absence of DagManager we do not get all flow level 
events, we will ignore the flow level events
+        // we actually get and purely calculate flow status based on flow 
statuses.
+        if (!JobStatusRetriever.isFlowStatus(jobStatus)) {
+          jobStatuses.add(ExecutionStatus.valueOf(jobStatus.getEventName()));
+        }
+      }
+
+      if (jobStatuses.contains(ExecutionStatus.FAILED)) {
+        flowExecutionStatus = ExecutionStatus.FAILED;
+      } else if (jobStatuses.contains(ExecutionStatus.CANCELLED)) {
+        flowExecutionStatus = ExecutionStatus.CANCELLED;
+      } else if (jobStatuses.contains(ExecutionStatus.ORCHESTRATED)) {
+        flowExecutionStatus = ExecutionStatus.ORCHESTRATED;
+      } else if (jobStatuses.contains(ExecutionStatus.RUNNING)) {
+        flowExecutionStatus = ExecutionStatus.RUNNING;
+      } else if (jobStatuses.contains(ExecutionStatus.COMPLETE)) {
+        flowExecutionStatus = ExecutionStatus.COMPLETE;

Review comment:
       Yes, `KafkaJobStatusMonitor` should update the job status

##########
File path: 
gobblin-runtime/src/test/java/org/apache/gobblin/service/monitoring/FlowStatusGeneratorTest.java
##########
@@ -134,8 +143,9 @@ public void testGetFlowStatusesAcrossGroup() {
         Arrays.asList(f0jsmDep2)));
   }
 
-  private FlowStatus createFlowStatus(String flowGroup, String flowName, long 
flowExecutionId, List<JobStatus> jobStatuses) {
-    return new FlowStatus(flowName, flowGroup, flowExecutionId, 
jobStatuses.iterator());
+  private FlowStatus createFlowStatus(String flowGroup, String flowName, long 
flowExecutionId, List<JobStatus> jobStatuses, JobStatusRetriever 
jobStatusRetriever) {
+    return new FlowStatus(flowName, flowGroup, flowExecutionId, 
jobStatuses.iterator(),
+        
jobStatusRetriever.getFlowStatusFromJobStatuses(jobStatusRetriever.dagManagerEnabled,
 jobStatuses.iterator()));

Review comment:
       final param?

##########
File path: 
gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/FsJobStatusRetriever.java
##########
@@ -60,7 +62,8 @@
 
   @Inject
   public FsJobStatusRetriever(Config config, MultiContextIssueRepository 
issueRepository) {
-    super(issueRepository);
+    super(ConfigUtils.getBoolean(config, 
ServiceConfigKeys.GOBBLIN_SERVICE_DAG_MANAGER_ENABLED_KEY,
+        ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_DAG_MANAGER_ENABLED), 
issueRepository);

Review comment:
       Yes, I would prefer to leave it like this. If in future we need more 
params to be passed, we again have to change the signature/DI logic. We already 
need other configs in the next line to create a state store

##########
File path: 
gobblin-service/src/test/java/org/apache/gobblin/service/monitoring/JobStatusRetrieverTest.java
##########
@@ -103,8 +103,8 @@ public void testGetJobStatusesForFlowExecution() throws 
IOException {
     long flowExecutionId = 1234L;
     addJobStatusToStateStore(flowExecutionId, JobStatusRetriever.NA_KEY, 
ExecutionStatus.COMPILED.name());
 
-    Iterator<JobStatus>
-        jobStatusIterator = 
this.jobStatusRetriever.getJobStatusesForFlowExecution(FLOW_NAME, FLOW_GROUP, 
flowExecutionId);
+    List<JobStatus> jobStatuses = 
ImmutableList.copyOf(this.jobStatusRetriever.getJobStatusesForFlowExecution(FLOW_NAME,
 FLOW_GROUP, flowExecutionId));

Review comment:
       getJobStatusesForFlowExecution is still non-static

##########
File path: 
gobblin-service/src/test/java/org/apache/gobblin/service/monitoring/MysqlJobStatusRetrieverTestWithoutDagManager.java
##########
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.monitoring;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Properties;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import com.google.common.base.Strings;
+
+import org.apache.gobblin.config.ConfigBuilder;
+import org.apache.gobblin.configuration.ConfigurationKeys;
+import org.apache.gobblin.configuration.State;
+import org.apache.gobblin.metastore.MysqlJobStatusStateStore;
+import org.apache.gobblin.metastore.testing.ITestMetastoreDatabase;
+import org.apache.gobblin.metastore.testing.TestMetastoreDatabaseFactory;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.troubleshooter.MultiContextIssueRepository;
+import org.apache.gobblin.service.ExecutionStatus;
+import org.apache.gobblin.service.ServiceConfigKeys;
+
+import static org.mockito.Mockito.mock;
+
+
+public class MysqlJobStatusRetrieverTestWithoutDagManager extends 
JobStatusRetrieverTest {
+  private MysqlJobStatusStateStore<State> dbJobStateStore;
+  private static final String TEST_USER = "testUser";
+  private static final String TEST_PASSWORD = "testPassword";
+
+  @BeforeClass
+  @Override
+  public void setUp() throws Exception {
+    ITestMetastoreDatabase testMetastoreDatabase = 
TestMetastoreDatabaseFactory.get();
+    String jdbcUrl = testMetastoreDatabase.getJdbcUrl();
+
+    ConfigBuilder configBuilder = ConfigBuilder.create();
+    
configBuilder.addPrimitive(MysqlJobStatusRetriever.MYSQL_JOB_STATUS_RETRIEVER_PREFIX
 + "." + ConfigurationKeys.STATE_STORE_DB_URL_KEY, jdbcUrl);
+    
configBuilder.addPrimitive(MysqlJobStatusRetriever.MYSQL_JOB_STATUS_RETRIEVER_PREFIX
 + "." + ConfigurationKeys.STATE_STORE_DB_USER_KEY, TEST_USER);
+    
configBuilder.addPrimitive(MysqlJobStatusRetriever.MYSQL_JOB_STATUS_RETRIEVER_PREFIX
 + "." + ConfigurationKeys.STATE_STORE_DB_PASSWORD_KEY, TEST_PASSWORD);
+
+    this.jobStatusRetriever =
+        new MysqlJobStatusRetriever(configBuilder.build(), 
mock(MultiContextIssueRepository.class));
+    
configBuilder.addPrimitive(ServiceConfigKeys.GOBBLIN_SERVICE_DAG_MANAGER_ENABLED_KEY,
 "true");
+    this.dbJobStateStore = ((MysqlJobStatusRetriever) 
this.jobStatusRetriever).getStateStore();
+    cleanUpDir();
+  }
+
+  @Test
+  public void testGetJobStatusesForFlowExecution() throws IOException {
+    super.testGetJobStatusesForFlowExecution();
+  }
+
+  @Test (dependsOnMethods = "testGetJobStatusesForFlowExecution")
+  public void testJobTiming() throws Exception {
+    super.testJobTiming();
+  }
+
+  @Test (dependsOnMethods = "testJobTiming")
+  public void testOutOfOrderJobTimingEvents() throws IOException {
+    super.testOutOfOrderJobTimingEvents();
+  }
+
+  @Test (dependsOnMethods = "testJobTiming")
+  public void testGetJobStatusesForFlowExecution1() {
+    super.testGetJobStatusesForFlowExecution1();
+  }
+
+  @Test (dependsOnMethods = "testGetJobStatusesForFlowExecution1")
+  public void testGetLatestExecutionIdsForFlow() throws Exception {
+    super.testGetLatestExecutionIdsForFlow();
+  }
+
+  @Test (dependsOnMethods = "testGetLatestExecutionIdsForFlow")
+  public void testGetFlowStatusFromJobStatuses() throws Exception {
+    long flowExecutionId = 1237L;
+
+    addJobStatusToStateStore(flowExecutionId, JobStatusRetriever.NA_KEY, 
ExecutionStatus.COMPILED.name());
+    Assert.assertEquals(ExecutionStatus.$UNKNOWN,
+        
jobStatusRetriever.getFlowStatusFromJobStatuses(jobStatusRetriever.dagManagerEnabled,
 jobStatusRetriever.getJobStatusesForFlowExecution(FLOW_NAME, FLOW_GROUP, 
flowExecutionId)));

Review comment:
       maybe it was static in b/w changes. not now




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 677304)
    Time Spent: 3h 20m  (was: 3h 10m)

> fix flow status reporting when dag manager is not enabled
> ---------------------------------------------------------
>
>                 Key: GOBBLIN-1552
>                 URL: https://issues.apache.org/jira/browse/GOBBLIN-1552
>             Project: Apache Gobblin
>          Issue Type: Bug
>            Reporter: Arjun Singh Bora
>            Priority: Major
>          Time Spent: 3h 20m
>  Remaining Estimate: 0h
>
> flow status is determined by looking at the flow level events. but flow level 
> events are not emitted outside of dag manager. so currently flow status is 
> not being determined correctly when dag manager is disabled



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to