HsiuChuanHsu commented on code in PR #54115:
URL: https://github.com/apache/airflow/pull/54115#discussion_r2255536315
##########
providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py:
##########
@@ -417,6 +417,46 @@ def _change_state(
if TYPE_CHECKING:
assert self.kube_scheduler
+ if state == TaskInstanceState.FAILED:
+ try:
+ if self.kube_client:
+ pod = self.kube_client.read_namespaced_pod(name=pod_name,
namespace=namespace)
+ pod_status = getattr(pod.status, "phase", None)
+ pod_reason = getattr(pod.status, "reason", None)
+ pod_message = getattr(pod.status, "message", None)
+
+ # If containerStatuses has detailed reasons, print them as
well.
+ container_statuses = getattr(pod.status,
"container_statuses", None)
+ container_state = None
+ container_reason = None
+ container_message = None
+ if container_statuses:
+ for cs in container_statuses:
+ state_obj = cs.state
+ if state_obj.terminated:
+ container_state = "terminated"
+ container_reason =
getattr(state_obj.terminated, "reason", None)
Review Comment:
True! Fix: Only containers that actually failed (non-zero exit code AND
reason != "Completed") are reported as failures.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]