HsiuChuanHsu commented on code in PR #54115:
URL: https://github.com/apache/airflow/pull/54115#discussion_r2255541525
##########
providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py:
##########
@@ -417,6 +417,46 @@ def _change_state(
if TYPE_CHECKING:
assert self.kube_scheduler
+ if state == TaskInstanceState.FAILED:
+ try:
+ if self.kube_client:
+ pod = self.kube_client.read_namespaced_pod(name=pod_name,
namespace=namespace)
+ pod_status = getattr(pod.status, "phase", None)
+ pod_reason = getattr(pod.status, "reason", None)
+ pod_message = getattr(pod.status, "message", None)
+
+ # If containerStatuses has detailed reasons, print them as
well.
+ container_statuses = getattr(pod.status,
"container_statuses", None)
+ container_state = None
+ container_reason = None
+ container_message = None
+ if container_statuses:
+ for cs in container_statuses:
+ state_obj = cs.state
+ if state_obj.terminated:
+ container_state = "terminated"
+ container_reason =
getattr(state_obj.terminated, "reason", None)
+ container_message =
getattr(state_obj.terminated, "message", None)
+ break
+ if state_obj.waiting:
+ container_state = "waiting"
+ container_reason = getattr(state_obj.waiting,
"reason", None)
+ container_message = getattr(state_obj.waiting,
"message", None)
+ break
+ self.log.error(
Review Comment:
I temporarily make the log as warning. Yes, from the scheduler's perspective
it's just normal business logic where a task didn't succeed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]