Asquator commented on code in PR #61274:
URL: https://github.com/apache/airflow/pull/61274#discussion_r2807960289


##########
airflow-core/src/airflow/models/dagrun.py:
##########
@@ -1410,24 +1416,44 @@ def notify_dagrun_state_changed(self, msg: str):
         # or LocalTaskJob, so we don't want to "falsely advertise" we notify 
about that
 
     @provide_session
-    def get_last_ti(self, dag: SerializedDAG, session: Session = NEW_SESSION) 
-> TI | None:
-        """Get Last TI from the dagrun to build and pass Execution context 
object from server to then run callbacks."""
+    def get_first_ti_causing_failure(self, dag: SerializedDAG, session: 
Session = NEW_SESSION) -> TI | None:  
+        """  
+        Get the first task instance that would cause a leaf task to fail the 
run.
+        """
+
         tis = self.get_task_instances(session=session)
-        # tis from a dagrun may not be a part of dag.partial_subset,
-        # since dag.partial_subset is a subset of the dag.
-        # This ensures that we will only use the accessible TI
-        # context for the callback.
+
+        failed_leaf_tis = [  
+            ti for ti in self._tis_for_dagrun_state(dag=dag, tis=tis)  
+            if ti.state in State.failed_states  
+        ]
+          
+        if not failed_leaf_tis:
+            return None  
+
         if dag.partial:
-            tis = [ti for ti in tis if not ti.state == State.NONE]
-        # filter out removed tasks
-        tis = natsorted(
-            (ti for ti in tis if ti.state != TaskInstanceState.REMOVED),
-            key=lambda ti: ti.task_id,
-        )
-        if not tis:
-            return None
-        ti = tis[-1]  # get last TaskInstance of DagRun
-        return ti
+            tis = [
+                ti for ti in tis if not ti.state in (
+                    State.NONE, TaskInstanceState.REMOVED
+                )
+            ]
+
+        # Collect all task IDs on failure paths
+        failure_path_task_ids = set()
+        for failed_leaf in failed_leaf_tis:
+            leaf_task = dag.get_task(failed_leaf.task_id)
+            upstream_ids = leaf_task.get_flat_relative_ids(upstream=True)
+            failure_path_task_ids.update(upstream_ids)
+            failure_path_task_ids.add(failed_leaf.task_id)
+
+        # Find failed tasks on possible failure paths
+        failed_on_paths = [  
+            ti for ti in tis
+            if ti.task_id in failure_path_task_ids and ti.state == 
State.FAILED  
+        ]

Review Comment:
   The thing I wanted to do is to return the latest failed task that actually 
caused the DAG run to fail.
   
   The simplest way is just returning the latest failed task in the DAG run. I 
switched to this logic now, but we have to acknowledge some edge cases where a 
task is marked as failed just after the DAG run fails and this task is sent to 
the callback.
   
   The previous logic wouldn't catch the case too, so I removed it. Do you 
think we should consider this edge case?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to