[
https://issues.apache.org/jira/browse/GOBBLIN-2142?focusedWorklogId=931692&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-931692
]
ASF GitHub Bot logged work on GOBBLIN-2142:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 26/Aug/24 09:07
Start Date: 26/Aug/24 09:07
Worklog Time Spent: 10m
Work Description: arjun4084346 commented on code in PR #4037:
URL: https://github.com/apache/gobblin/pull/4037#discussion_r1730943749
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/DagProcUtils.java:
##########
@@ -291,4 +291,79 @@ public static void
removeFlowFinishDeadlineDagAction(DagManagementStateStore dag
log.warn("Failed to delete dag action {}",
enforceFlowFinishDeadlineDagAction);
}
}
+
+ /**
+ * Returns true if all dag nodes are finished, and it is not possible to run
any new dag node.
+ */
+ public static boolean isDagFinished(Dag<JobExecutionPlan> dag) {
+ List<Dag.DagNode<JobExecutionPlan>> nodes = dag.getNodes();
+ Set<Dag.DagNode<JobExecutionPlan>> canRun = new HashSet<>(nodes);
+ Set<Dag.DagNode<JobExecutionPlan>> completed = new HashSet<>();
+ DagManager.FailureOption failureOption =
DagManagerUtils.getFailureOption(dag);
+ boolean anyFailure = false;
+
+ for (Dag.DagNode<JobExecutionPlan> node : nodes) {
+ if (!canRun.contains(node)) {
+ continue;
+ }
+ ExecutionStatus status = node.getValue().getExecutionStatus();
+ if (status == ExecutionStatus.FAILED || status ==
ExecutionStatus.CANCELLED) {
+ anyFailure = true;
+ removeChildrenFromCanRun(node, dag, canRun);
+ completed.add(node);
+ } else if (status == ExecutionStatus.COMPLETE) {
+ completed.add(node);
+ } else if (status == ExecutionStatus.PENDING) {
+ // Remove PENDING node if its parents are not in canRun, this means
remove the pending nodes also from canRun set
+ // if its parents cannot run
+ if (!areParentsInCanRun(node, canRun)) {
+ canRun.remove(node);
+ }
+ }
+ }
+
+ // In the end, check if there are more nodes in canRun than completed
+ assert canRun.size() >= completed.size();
+
+ if (!anyFailure || failureOption ==
DagManager.FailureOption.FINISH_ALL_POSSIBLE) {
+ return canRun.size() == completed.size();
+ } else if (failureOption == DagManager.FailureOption.FINISH_RUNNING) {
+ //if all the remaining jobs are pending return true
+ canRun.removeAll(completed);
+ boolean isFinished = true;
+ for (Dag.DagNode<JobExecutionPlan> remainingNode : canRun) {
+ if (remainingNode.getValue().getExecutionStatus() ==
ExecutionStatus.RUNNING ||
+ remainingNode.getValue().getExecutionStatus() ==
ExecutionStatus.PENDING_RESUME ||
+ remainingNode.getValue().getExecutionStatus() ==
ExecutionStatus.ORCHESTRATED ||
+ remainingNode.getValue().getExecutionStatus() ==
ExecutionStatus.PENDING_RETRY) {
+ isFinished = false;
+ break;
+ }
+ }
+ return isFinished;
Review Comment:
doing `return canRun.stream().noneMatch(node ->
unfinishedStatuses.contains(node.getValue().getExecutionStatus()));`
Issue Time Tracking
-------------------
Worklog Id: (was: 931692)
Time Spent: 2h (was: 1h 50m)
> find if the dag is running or not correctly
> -------------------------------------------
>
> Key: GOBBLIN-2142
> URL: https://issues.apache.org/jira/browse/GOBBLIN-2142
> Project: Apache Gobblin
> Issue Type: Bug
> Reporter: Arjun Singh Bora
> Priority: Major
> Time Spent: 2h
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)