[ 
https://issues.apache.org/jira/browse/GOBBLIN-2022?focusedWorklogId=911375&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-911375
 ]

ASF GitHub Bot logged work on GOBBLIN-2022:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 25/Mar/24 20:32
            Start Date: 25/Mar/24 20:32
    Worklog Time Spent: 10m 
      Work Description: phet commented on code in PR #3896:
URL: https://github.com/apache/gobblin/pull/3896#discussion_r1538144200


##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagProcFactory.java:
##########
@@ -39,16 +42,24 @@
 public class DagProcFactory implements DagTaskVisitor<DagProc> {
 
   private final FlowCompilationValidationHelper 
flowCompilationValidationHelper;
+  private final JobStatusRetriever jobStatusRetriever;
 
   @Inject
-  public DagProcFactory(FlowCompilationValidationHelper 
flowCompilationValidationHelper) {
+  public DagProcFactory(FlowCompilationValidationHelper 
flowCompilationValidationHelper,
+      JobStatusRetriever jobStatusRetriever) {
     this.flowCompilationValidationHelper = flowCompilationValidationHelper;
+    this.jobStatusRetriever = jobStatusRetriever;
   }
 
   @Override
   public LaunchDagProc meet(LaunchDagTask launchDagTask) {
     return new LaunchDagProc(launchDagTask, 
this.flowCompilationValidationHelper);
   }
+
+  @Override
+  public ReevaluateDagProc meet(ReevaluateDagTask reEvaluateDagTask) {
+    return new ReevaluateDagProc(reEvaluateDagTask, this.jobStatusRetriever);

Review Comment:
   why have the factory provide this, rather than passing it later as part of 
`DagManagementStateStore`, which is already given to `DagProc::process`?
   
   overall, "job status" seems reasonably part of "DAG mgmt state"



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -70,16 +53,16 @@ public class LaunchDagProc extends 
DagProc<Optional<Dag<JobExecutionPlan>>, Opti
         
metricContext.newContextAwareGauge(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY, 
orchestrationDelayCounter::get));
   }
 
-  @Override
-  protected DagManager.DagId getDagId() {
-    return this.launchDagTask.getDagId();
+  public LaunchDagProc(LaunchDagTask dagTask, FlowCompilationValidationHelper 
flowCompilationValidationHelper) {
+    this.dagTask = dagTask;

Review Comment:
   `super(dagTask)`



##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/DagManagerTest.java:
##########
@@ -154,6 +154,11 @@ static Dag<JobExecutionPlan> buildDag(String id, Long 
flowExecutionId, String fl
 
   public static Dag<JobExecutionPlan> buildDag(String id, Long 
flowExecutionId, String flowFailureOption, int numNodes, String proxyUser, 
Config additionalConfig)
       throws URISyntaxException {
+    if (additionalConfig.hasPath(ConfigurationKeys.JOB_NAME_KEY)) {
+      throw new RuntimeException("Please do not set " + 
ConfigurationKeys.JOB_NAME_KEY + " because this method is "
+          + "is using hard coded job names in setting " + 
ConfigurationKeys.JOB_DEPENDENCIES);

Review Comment:
   repeated "is"



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.proc;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.util.Set;
+
+import com.codahale.metrics.Timer;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.metrics.ServiceMetricNames;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.service.ExecutionStatus;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import org.apache.gobblin.service.modules.flowgraph.DagNodeId;
+import 
org.apache.gobblin.service.modules.orchestration.DagManagementStateStore;
+import org.apache.gobblin.service.modules.orchestration.DagManagerUtils;
+import org.apache.gobblin.service.modules.orchestration.task.ReevaluateDagTask;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.service.monitoring.FlowStatusGenerator;
+import org.apache.gobblin.service.monitoring.JobStatus;
+import org.apache.gobblin.service.monitoring.JobStatusRetriever;
+
+
+/**
+ * An implementation for {@link DagProc} that launches a new job if there 
exists a job whose pre-requisite jobs are
+ * completed successfully. If there are no more jobs to run and no job is 
running for the Dag, it cleans up the Dag.
+ */
+@Slf4j
+public class ReevaluateDagProc extends 
DagProc<Optional<Dag.DagNode<JobExecutionPlan>>, Void> {
+  private final JobStatusRetriever jobStatusRetriever;
+  private final Timer jobStatusPolledTimer;
+  private final DagNodeId dagNodeId;
+  private JobStatus jobStatus;
+
+  public ReevaluateDagProc(ReevaluateDagTask reEvaluateDagTask, 
JobStatusRetriever jobStatusRetriever) {
+    this.dagTask = reEvaluateDagTask;
+    this.jobStatusRetriever = jobStatusRetriever;
+    this.jobStatusPolledTimer = 
metricContext.timer(ServiceMetricNames.JOB_STATUS_POLLED_TIMER);
+    this.dagNodeId = new DagNodeId(this.dagTask.getDagAction().getFlowGroup(), 
this.dagTask.getDagAction().getFlowName(),
+        Long.parseLong(this.dagTask.getDagAction().getFlowExecutionId()),
+        this.dagTask.getDagAction().getFlowGroup(), 
this.dagTask.getDagAction().getJobName());
+  }
+
+  @Override
+  protected Optional<Dag.DagNode<JobExecutionPlan>> 
initialize(DagManagementStateStore dagManagementStateStore)
+      throws IOException {
+    Optional<Dag.DagNode<JobExecutionPlan>> dagNode = 
dagManagementStateStore.getDagNode(this.dagNodeId);
+    if (!dagNode.isPresent()) {
+      log.error("DagNode not found for a ReEvaluate DagAction with dag node id 
" + this.dagNodeId);

Review Comment:
   capitalized as "Reevaluate DagAction"
   
   also, more canonical is to use `{}` for logging, rather than `+` (also below)



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -70,16 +53,16 @@ public class LaunchDagProc extends 
DagProc<Optional<Dag<JobExecutionPlan>>, Opti
         
metricContext.newContextAwareGauge(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY, 
orchestrationDelayCounter::get));
   }
 
-  @Override
-  protected DagManager.DagId getDagId() {
-    return this.launchDagTask.getDagId();
+  public LaunchDagProc(LaunchDagTask dagTask, FlowCompilationValidationHelper 
flowCompilationValidationHelper) {
+    this.dagTask = dagTask;
+    this.flowCompilationValidationHelper = flowCompilationValidationHelper;
   }
 
   @Override
   protected Optional<Dag<JobExecutionPlan>> initialize(DagManagementStateStore 
dagManagementStateStore)
       throws IOException {
     try {
-      DagActionStore.DagAction dagAction = this.launchDagTask.getDagAction();
+      DagActionStore.DagAction dagAction = this.dagTask.getDagAction();

Review Comment:
   pulling out the `DagAction` breaks encapsulation and seems best avoided.  
could we rework to add a method to `DagId` for `DagId::getFlowId`?
   
   then, all we need is `DagProc::getDagId`:
   ```
   DagId dagId = this.getDagId();
   FlowSpec flowSpec = loadFlowSpec(dmss, dagId);
   flowSpec.addProperty(..., dagId.getFlowExecutionId());
   ```



##########
gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagManagementDagActionStoreChangeMonitor.java:
##########
@@ -62,28 +59,18 @@ protected void handleDagAction(DagActionStore.DagAction 
dagAction, boolean isSta
     LaunchSubmissionMetricProxy launchSubmissionMetricProxy = isStartup ? 
ON_STARTUP : POST_STARTUP;
     try {
       // todo - add actions for other other type of dag actions
-      if 
(dagAction.getDagActionType().equals(DagActionStore.DagActionType.LAUNCH)) {
-        // If multi-active scheduler is NOT turned on we should not receive 
these type of events
-        if (!this.isMultiActiveSchedulerEnabled) {

Review Comment:
   just recently you were checking for `!this.isMultiActiveSchedulerEnabled`.  
why no longer?



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/task/ReevaluateDagTask.java:
##########
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.task;
+
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter;
+import org.apache.gobblin.service.modules.orchestration.DagTaskVisitor;
+
+
+/**
+ * A {@link DagTask} responsible to handle launch tasks.

Review Comment:
   needs update



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.proc;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.util.Set;
+
+import com.codahale.metrics.Timer;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.metrics.ServiceMetricNames;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.service.ExecutionStatus;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import org.apache.gobblin.service.modules.flowgraph.DagNodeId;
+import 
org.apache.gobblin.service.modules.orchestration.DagManagementStateStore;
+import org.apache.gobblin.service.modules.orchestration.DagManagerUtils;
+import org.apache.gobblin.service.modules.orchestration.task.ReevaluateDagTask;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.service.monitoring.FlowStatusGenerator;
+import org.apache.gobblin.service.monitoring.JobStatus;
+import org.apache.gobblin.service.monitoring.JobStatusRetriever;
+
+
+/**
+ * An implementation for {@link DagProc} that launches a new job if there 
exists a job whose pre-requisite jobs are
+ * completed successfully. If there are no more jobs to run and no job is 
running for the Dag, it cleans up the Dag.
+ */
+@Slf4j
+public class ReevaluateDagProc extends 
DagProc<Optional<Dag.DagNode<JobExecutionPlan>>, Void> {
+  private final JobStatusRetriever jobStatusRetriever;
+  private final Timer jobStatusPolledTimer;
+  private final DagNodeId dagNodeId;
+  private JobStatus jobStatus;
+
+  public ReevaluateDagProc(ReevaluateDagTask reEvaluateDagTask, 
JobStatusRetriever jobStatusRetriever) {
+    this.dagTask = reEvaluateDagTask;
+    this.jobStatusRetriever = jobStatusRetriever;
+    this.jobStatusPolledTimer = 
metricContext.timer(ServiceMetricNames.JOB_STATUS_POLLED_TIMER);
+    this.dagNodeId = new DagNodeId(this.dagTask.getDagAction().getFlowGroup(), 
this.dagTask.getDagAction().getFlowName(),
+        Long.parseLong(this.dagTask.getDagAction().getFlowExecutionId()),
+        this.dagTask.getDagAction().getFlowGroup(), 
this.dagTask.getDagAction().getJobName());

Review Comment:
   `DagAction` has always had `getDagId()`.  now that we added `jobName`, it 
probably also deserves a `getDagNodeId()` method.





Issue Time Tracking
-------------------

    Worklog Id:     (was: 911375)
    Time Spent: 20m  (was: 10m)

> create dag proc for taking actions on job completion
> ----------------------------------------------------
>
>                 Key: GOBBLIN-2022
>                 URL: https://issues.apache.org/jira/browse/GOBBLIN-2022
>             Project: Apache Gobblin
>          Issue Type: Task
>            Reporter: Arjun Singh Bora
>            Priority: Major
>          Time Spent: 20m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to