[
https://issues.apache.org/jira/browse/GOBBLIN-2017?focusedWorklogId=923006&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-923006
]
ASF GitHub Bot logged work on GOBBLIN-2017:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 11/Jun/24 17:31
Start Date: 11/Jun/24 17:31
Worklog Time Spent: 10m
Work Description: phet commented on code in PR #3965:
URL: https://github.com/apache/gobblin/pull/3965#discussion_r1635246463
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,105 @@ public void testNoNextJobToRun() throws Exception {
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
.filter(a ->
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
}
+
+ @Test
+ public void testCurrentJobToRun() throws Exception {
+ String flowName = "fn3";
+ Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId,
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+ 2, "user5", ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)),
Optional.empty()))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job0",
DagActionStore.DagActionType.REEVALUATE), null,
+ dagManagementStateStore));
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
+
+ int numOfLaunchedJobs = 1; // only the current job
+ // only the current job should have run
+ Mockito.verify(specProducers.get(0), Mockito.times(1)).addSpec(any());
+ Assert.assertEquals(numOfLaunchedJobs, addSpecCount);
+
+ // no job's state is deleted because that happens when the job finishes
triggered the reevaluate dag proc
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagNodeState(any(), any());
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagAction(any());
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).addJobDagAction(any(), any(), any(), any(),
+ eq(DagActionStore.DagActionType.REEVALUATE));
+ Mockito.verify(dagActionReminderScheduler,
Mockito.never()).unscheduleReminderJob(any());
+ }
+
+ @Test
+ public void testMultipleNextJobToRun() throws Exception {
+ String flowName = "fn4";
+ Dag<JobExecutionPlan> dag =
LaunchDagProcTest.buildDagWithMultipleNodesAtDifferentLevels("1",
String.valueOf(flowExecutionId),
+ DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(), "user5",
ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ JobStatus jobStatus =
JobStatus.builder().flowName(flowName).flowGroup(flowGroup).jobGroup(flowGroup)
+ .jobName("job3").flowExecutionId(flowExecutionId).message("Test
message").eventName(ExecutionStatus.COMPLETE.name())
+
.startTime(flowExecutionId).shouldRetry(false).orchestratedTime(flowExecutionId).build();
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(dag.getStartNodes().get(0)),
Optional.of(jobStatus)))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ // mocked job status for the first four jobs
+
dag.getNodes().get(0).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(1).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(2).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
dag.getNodes().get(3).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job3",
DagActionStore.DagActionType.REEVALUATE), null, dagManagementStateStore));
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+ // process 4th job
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ int numOfLaunchedJobs = 2; // = number of jobs that should launch when 4th
job passes, i.e. 5th and 6th job
+ // parallel jobs are launched through reevaluate dag action
+ Mockito.verify(dagManagementStateStore, Mockito.times(numOfLaunchedJobs))
+ .addJobDagAction(eq(flowGroup), eq(flowName),
eq(String.valueOf(flowExecutionId)), any(),
eq(DagActionStore.DagActionType.REEVALUATE));
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
+ // when there are parallel jobs to launch, they are not directly sent to
spec producers, instead reevaluate dag action is created
+ Assert.assertEquals(addSpecCount, 0L);
Review Comment:
similar here...
could we:
```
specProducers.forEach(sp -> Mockito.verify(sp,
Mockito.never()).addSpec(any()));
```
?
actually likely even `.forEach(Mockito::verifyNoInteractions)`
Issue Time Tracking
-------------------
Worklog Id: (was: 923006)
Time Spent: 7h 40m (was: 7.5h)
> divide multiple job launches in a LaunchDagProc into multiple LaunchDagActions
> ------------------------------------------------------------------------------
>
> Key: GOBBLIN-2017
> URL: https://issues.apache.org/jira/browse/GOBBLIN-2017
> Project: Apache Gobblin
> Issue Type: Task
> Reporter: Arjun Singh Bora
> Priority: Major
> Time Spent: 7h 40m
> Remaining Estimate: 0h
>
> divide multiple job launches in a LaunchDagProc into multiple
> LaunchDagActions for two reasons
> 1) it will then spend less time in each dag proc processing and have more
> chance to complete the operation in the lease time
> 2) handling partial job submissions in one LaunchDagProc sending N jobs is
> difficult
--
This message was sent by Atlassian Jira
(v8.20.10#820010)