phet commented on code in PR #3965:
URL: https://github.com/apache/gobblin/pull/3965#discussion_r1635234210
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore
dagManagementStateStore, Pair<Optiona
}
Dag.DagNode<JobExecutionPlan> dagNode =
dagNodeWithJobStatus.getLeft().get();
- JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
- ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+ if (!dagNodeWithJobStatus.getRight().isPresent()) {
+ // Usually reevaluate dag action is created by JobStatusMonitor when a
finished job status is available,
+ // but when reevaluate/resume/launch dag proc found multiple parallel
jobs to run next, it creates reevaluate
+ // dag actions for each of those parallel job and in this scenario there
is no job status available.
+ // If the job status is not present, this job was never launched, submit
it now.
+ DagProcUtils.submitJobToExecutor(dagManagementStateStore, dagNode,
getDagId());
+ return;
+ }
+
Dag<JobExecutionPlan> dag =
dagManagementStateStore.getDag(getDagId()).get();
+ JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
+ ExecutionStatus executionStatus =
ExecutionStatus.valueOf(jobStatus.getEventName());
+ setStatus(dagManagementStateStore, dagNodeWithJobStatus.getLeft().get(),
executionStatus);
+
+ if
(!FlowStatusGenerator.FINISHED_STATUSES.contains(executionStatus.name())) {
+ // this may happen if adding job status in the store failed after adding
a ReevaluateDagAction in KafkaJobStatusMonitor
+ throw new RuntimeException(String.format("Job status for dagNode %s is
%s. Re-evaluate dag action are created for"
Review Comment:
there used to be a `log.warn` in addition to the `RuntimeException`. seems
like worth continuing unless you have a specific objection
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,105 @@ public void testNoNextJobToRun() throws Exception {
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
.filter(a ->
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
}
+
+ @Test
+ public void testCurrentJobToRun() throws Exception {
+ String flowName = "fn3";
+ Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId,
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+ 2, "user5", ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)),
Optional.empty()))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job0",
DagActionStore.DagActionType.REEVALUATE), null,
+ dagManagementStateStore));
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
+
+ int numOfLaunchedJobs = 1; // only the current job
+ // only the current job should have run
+ Mockito.verify(specProducers.get(0), Mockito.times(1)).addSpec(any());
+ Assert.assertEquals(numOfLaunchedJobs, addSpecCount);
+
+ // no job's state is deleted because that happens when the job finishes
triggered the reevaluate dag proc
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagNodeState(any(), any());
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagAction(any());
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).addJobDagAction(any(), any(), any(), any(),
+ eq(DagActionStore.DagActionType.REEVALUATE));
+ Mockito.verify(dagActionReminderScheduler,
Mockito.never()).unscheduleReminderJob(any());
+ }
+
+ @Test
+ public void testMultipleNextJobToRun() throws Exception {
+ String flowName = "fn4";
+ Dag<JobExecutionPlan> dag =
LaunchDagProcTest.buildDagWithMultipleNodesAtDifferentLevels("1",
String.valueOf(flowExecutionId),
+ DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(), "user5",
ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ JobStatus jobStatus =
JobStatus.builder().flowName(flowName).flowGroup(flowGroup).jobGroup(flowGroup)
+ .jobName("job3").flowExecutionId(flowExecutionId).message("Test
message").eventName(ExecutionStatus.COMPLETE.name())
+
.startTime(flowExecutionId).shouldRetry(false).orchestratedTime(flowExecutionId).build();
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(dag.getStartNodes().get(0)),
Optional.of(jobStatus)))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ // mocked job status for the first four jobs
+
dag.getNodes().get(0).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(1).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(2).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(3).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job3",
DagActionStore.DagActionType.REEVALUATE), null, dagManagementStateStore));
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+ // process 4th job
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ int numOfLaunchedJobs = 2; // = number of jobs that should launch when 4th
job passes, i.e. 5th and 6th job
+ // parallel jobs are launched through reevaluate dag action
+ Mockito.verify(dagManagementStateStore, Mockito.times(numOfLaunchedJobs))
+ .addJobDagAction(eq(flowGroup), eq(flowName),
eq(String.valueOf(flowExecutionId)), any(),
eq(DagActionStore.DagActionType.REEVALUATE));
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
+ // when there are parallel jobs to launch, they are not directly sent to
spec producers, instead reevaluate dag action is created
+ Assert.assertEquals(addSpecCount, 0L);
Review Comment:
similar here...
could we:
```
specProducers.stream()
.forEach(sp -> Mockito.verify(sp, Mockito.never()).addSpec(any()));
```
?
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore
dagManagementStateStore, Pair<Optiona
}
Dag.DagNode<JobExecutionPlan> dagNode =
dagNodeWithJobStatus.getLeft().get();
- JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
- ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+ if (!dagNodeWithJobStatus.getRight().isPresent()) {
+ // Usually reevaluate dag action is created by JobStatusMonitor when a
finished job status is available,
+ // but when reevaluate/resume/launch dag proc found multiple parallel
jobs to run next, it creates reevaluate
+ // dag actions for each of those parallel job and in this scenario there
is no job status available.
+ // If the job status is not present, this job was never launched, submit
it now.
+ DagProcUtils.submitJobToExecutor(dagManagementStateStore, dagNode,
getDagId());
+ return;
+ }
+
Dag<JobExecutionPlan> dag =
dagManagementStateStore.getDag(getDagId()).get();
+ JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
+ ExecutionStatus executionStatus =
ExecutionStatus.valueOf(jobStatus.getEventName());
+ setStatus(dagManagementStateStore, dagNodeWithJobStatus.getLeft().get(),
executionStatus);
Review Comment:
nit: already `dagNode = dagNodeWJS.getLeft().get()` above
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,105 @@ public void testNoNextJobToRun() throws Exception {
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
.filter(a ->
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
}
+
+ @Test
+ public void testCurrentJobToRun() throws Exception {
+ String flowName = "fn3";
+ Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId,
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+ 2, "user5", ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)),
Optional.empty()))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job0",
DagActionStore.DagActionType.REEVALUATE), null,
+ dagManagementStateStore));
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
Review Comment:
does this boil down to an alternative to sth like:
```
specProducers.stream()
.skip(1) // separately verified `specProducers.get(0)`
.forEach(sp -> Mockito.verify(sp, Mockito.never()).addSpec(any()));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]