[
https://issues.apache.org/jira/browse/GOBBLIN-2022?focusedWorklogId=911375&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-911375
]
ASF GitHub Bot logged work on GOBBLIN-2022:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 25/Mar/24 20:32
Start Date: 25/Mar/24 20:32
Worklog Time Spent: 10m
Work Description: phet commented on code in PR #3896:
URL: https://github.com/apache/gobblin/pull/3896#discussion_r1538144200
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagProcFactory.java:
##########
@@ -39,16 +42,24 @@
public class DagProcFactory implements DagTaskVisitor<DagProc> {
private final FlowCompilationValidationHelper
flowCompilationValidationHelper;
+ private final JobStatusRetriever jobStatusRetriever;
@Inject
- public DagProcFactory(FlowCompilationValidationHelper
flowCompilationValidationHelper) {
+ public DagProcFactory(FlowCompilationValidationHelper
flowCompilationValidationHelper,
+ JobStatusRetriever jobStatusRetriever) {
this.flowCompilationValidationHelper = flowCompilationValidationHelper;
+ this.jobStatusRetriever = jobStatusRetriever;
}
@Override
public LaunchDagProc meet(LaunchDagTask launchDagTask) {
return new LaunchDagProc(launchDagTask,
this.flowCompilationValidationHelper);
}
+
+ @Override
+ public ReevaluateDagProc meet(ReevaluateDagTask reEvaluateDagTask) {
+ return new ReevaluateDagProc(reEvaluateDagTask, this.jobStatusRetriever);
Review Comment:
why have the factory provide this, rather than passing it later as part of
`DagManagementStateStore`, which is already given to `DagProc::process`?
overall, "job status" seems reasonably part of "DAG mgmt state"
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -70,16 +53,16 @@ public class LaunchDagProc extends
DagProc<Optional<Dag<JobExecutionPlan>>, Opti
metricContext.newContextAwareGauge(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY,
orchestrationDelayCounter::get));
}
- @Override
- protected DagManager.DagId getDagId() {
- return this.launchDagTask.getDagId();
+ public LaunchDagProc(LaunchDagTask dagTask, FlowCompilationValidationHelper
flowCompilationValidationHelper) {
+ this.dagTask = dagTask;
Review Comment:
`super(dagTask)`
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/DagManagerTest.java:
##########
@@ -154,6 +154,11 @@ static Dag<JobExecutionPlan> buildDag(String id, Long
flowExecutionId, String fl
public static Dag<JobExecutionPlan> buildDag(String id, Long
flowExecutionId, String flowFailureOption, int numNodes, String proxyUser,
Config additionalConfig)
throws URISyntaxException {
+ if (additionalConfig.hasPath(ConfigurationKeys.JOB_NAME_KEY)) {
+ throw new RuntimeException("Please do not set " +
ConfigurationKeys.JOB_NAME_KEY + " because this method is "
+ + "is using hard coded job names in setting " +
ConfigurationKeys.JOB_DEPENDENCIES);
Review Comment:
repeated "is"
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.proc;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.util.Set;
+
+import com.codahale.metrics.Timer;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.metrics.ServiceMetricNames;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.service.ExecutionStatus;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import org.apache.gobblin.service.modules.flowgraph.DagNodeId;
+import
org.apache.gobblin.service.modules.orchestration.DagManagementStateStore;
+import org.apache.gobblin.service.modules.orchestration.DagManagerUtils;
+import org.apache.gobblin.service.modules.orchestration.task.ReevaluateDagTask;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.service.monitoring.FlowStatusGenerator;
+import org.apache.gobblin.service.monitoring.JobStatus;
+import org.apache.gobblin.service.monitoring.JobStatusRetriever;
+
+
+/**
+ * An implementation for {@link DagProc} that launches a new job if there
exists a job whose pre-requisite jobs are
+ * completed successfully. If there are no more jobs to run and no job is
running for the Dag, it cleans up the Dag.
+ */
+@Slf4j
+public class ReevaluateDagProc extends
DagProc<Optional<Dag.DagNode<JobExecutionPlan>>, Void> {
+ private final JobStatusRetriever jobStatusRetriever;
+ private final Timer jobStatusPolledTimer;
+ private final DagNodeId dagNodeId;
+ private JobStatus jobStatus;
+
+ public ReevaluateDagProc(ReevaluateDagTask reEvaluateDagTask,
JobStatusRetriever jobStatusRetriever) {
+ this.dagTask = reEvaluateDagTask;
+ this.jobStatusRetriever = jobStatusRetriever;
+ this.jobStatusPolledTimer =
metricContext.timer(ServiceMetricNames.JOB_STATUS_POLLED_TIMER);
+ this.dagNodeId = new DagNodeId(this.dagTask.getDagAction().getFlowGroup(),
this.dagTask.getDagAction().getFlowName(),
+ Long.parseLong(this.dagTask.getDagAction().getFlowExecutionId()),
+ this.dagTask.getDagAction().getFlowGroup(),
this.dagTask.getDagAction().getJobName());
+ }
+
+ @Override
+ protected Optional<Dag.DagNode<JobExecutionPlan>>
initialize(DagManagementStateStore dagManagementStateStore)
+ throws IOException {
+ Optional<Dag.DagNode<JobExecutionPlan>> dagNode =
dagManagementStateStore.getDagNode(this.dagNodeId);
+ if (!dagNode.isPresent()) {
+ log.error("DagNode not found for a ReEvaluate DagAction with dag node id
" + this.dagNodeId);
Review Comment:
capitalized as "Reevaluate DagAction"
also, more canonical is to use `{}` for logging, rather than `+` (also below)
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -70,16 +53,16 @@ public class LaunchDagProc extends
DagProc<Optional<Dag<JobExecutionPlan>>, Opti
metricContext.newContextAwareGauge(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY,
orchestrationDelayCounter::get));
}
- @Override
- protected DagManager.DagId getDagId() {
- return this.launchDagTask.getDagId();
+ public LaunchDagProc(LaunchDagTask dagTask, FlowCompilationValidationHelper
flowCompilationValidationHelper) {
+ this.dagTask = dagTask;
+ this.flowCompilationValidationHelper = flowCompilationValidationHelper;
}
@Override
protected Optional<Dag<JobExecutionPlan>> initialize(DagManagementStateStore
dagManagementStateStore)
throws IOException {
try {
- DagActionStore.DagAction dagAction = this.launchDagTask.getDagAction();
+ DagActionStore.DagAction dagAction = this.dagTask.getDagAction();
Review Comment:
pulling out the `DagAction` breaks encapsulation and seems best avoided.
could we rework to add a method to `DagId` for `DagId::getFlowId`?
then, all we need is `DagProc::getDagId`:
```
DagId dagId = this.getDagId();
FlowSpec flowSpec = loadFlowSpec(dmss, dagId);
flowSpec.addProperty(..., dagId.getFlowExecutionId());
```
##########
gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagManagementDagActionStoreChangeMonitor.java:
##########
@@ -62,28 +59,18 @@ protected void handleDagAction(DagActionStore.DagAction
dagAction, boolean isSta
LaunchSubmissionMetricProxy launchSubmissionMetricProxy = isStartup ?
ON_STARTUP : POST_STARTUP;
try {
// todo - add actions for other other type of dag actions
- if
(dagAction.getDagActionType().equals(DagActionStore.DagActionType.LAUNCH)) {
- // If multi-active scheduler is NOT turned on we should not receive
these type of events
- if (!this.isMultiActiveSchedulerEnabled) {
Review Comment:
just recently you were checking for `!this.isMultiActiveSchedulerEnabled`.
why no longer?
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/task/ReevaluateDagTask.java:
##########
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.task;
+
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter;
+import org.apache.gobblin.service.modules.orchestration.DagTaskVisitor;
+
+
+/**
+ * A {@link DagTask} responsible to handle launch tasks.
Review Comment:
needs update
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration.proc;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.util.Set;
+
+import com.codahale.metrics.Timer;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.metrics.ServiceMetricNames;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.service.ExecutionStatus;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import org.apache.gobblin.service.modules.flowgraph.DagNodeId;
+import
org.apache.gobblin.service.modules.orchestration.DagManagementStateStore;
+import org.apache.gobblin.service.modules.orchestration.DagManagerUtils;
+import org.apache.gobblin.service.modules.orchestration.task.ReevaluateDagTask;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.service.monitoring.FlowStatusGenerator;
+import org.apache.gobblin.service.monitoring.JobStatus;
+import org.apache.gobblin.service.monitoring.JobStatusRetriever;
+
+
+/**
+ * An implementation for {@link DagProc} that launches a new job if there
exists a job whose pre-requisite jobs are
+ * completed successfully. If there are no more jobs to run and no job is
running for the Dag, it cleans up the Dag.
+ */
+@Slf4j
+public class ReevaluateDagProc extends
DagProc<Optional<Dag.DagNode<JobExecutionPlan>>, Void> {
+ private final JobStatusRetriever jobStatusRetriever;
+ private final Timer jobStatusPolledTimer;
+ private final DagNodeId dagNodeId;
+ private JobStatus jobStatus;
+
+ public ReevaluateDagProc(ReevaluateDagTask reEvaluateDagTask,
JobStatusRetriever jobStatusRetriever) {
+ this.dagTask = reEvaluateDagTask;
+ this.jobStatusRetriever = jobStatusRetriever;
+ this.jobStatusPolledTimer =
metricContext.timer(ServiceMetricNames.JOB_STATUS_POLLED_TIMER);
+ this.dagNodeId = new DagNodeId(this.dagTask.getDagAction().getFlowGroup(),
this.dagTask.getDagAction().getFlowName(),
+ Long.parseLong(this.dagTask.getDagAction().getFlowExecutionId()),
+ this.dagTask.getDagAction().getFlowGroup(),
this.dagTask.getDagAction().getJobName());
Review Comment:
`DagAction` has always had `getDagId()`. now that we added `jobName`, it
probably also deserves a `getDagNodeId()` method.
Issue Time Tracking
-------------------
Worklog Id: (was: 911375)
Time Spent: 20m (was: 10m)
> create dag proc for taking actions on job completion
> ----------------------------------------------------
>
> Key: GOBBLIN-2022
> URL: https://issues.apache.org/jira/browse/GOBBLIN-2022
> Project: Apache Gobblin
> Issue Type: Task
> Reporter: Arjun Singh Bora
> Priority: Major
> Time Spent: 20m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)