eladkal commented on code in PR #63946:
URL: https://github.com/apache/airflow/pull/63946#discussion_r2962141989
##########
providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py:
##########
@@ -629,6 +654,29 @@ def get_or_create_pod(self, pod_request_obj: k8s.V1Pod,
context: Context) -> k8s
self.log.info("Deleted pod to handle rerun and create new
pod!")
self.log.debug("Starting pod:\n%s",
yaml.safe_dump(pod_request_obj.to_dict()))
+
+ # Check resource quotas before attempting to create the pod
+ if self.check_resource_quotas and self.on_quota_exceeded != "ignore":
+ try:
+ check_pod_quota_compliance(self.client, pod_request_obj,
pod_request_obj.metadata.namespace)
+ except PodResourceQuotaExceededException as e:
+ if self.on_quota_exceeded == "queue":
+ self.log.warning(
+ "Pod creation would exceed resource quota. Task will
be rescheduled to retry later. %s",
+ str(e),
+ )
+ reschedule_time =
datetime.datetime.now(datetime.timezone.utc) + timedelta(
+ seconds=self.quota_check_interval
+ )
+ raise AirflowRescheduleException(reschedule_time)
+ if self.on_quota_exceeded == "fail":
+ self.log.error(
+ "Pod creation blocked due to resource quota violation.
"
+ "Set on_quota_exceeded='queue' to retry or 'ignore' to
skip this check."
+ )
+ raise
Review Comment:
The quota issue is not so trivial problem and I think this is somewhat over
simplifying it.
I find this also somewhat confusing as reschedule is something reserved for
scheduler and this is not about that.
There is also the question of how K8s executor should act on quota issues.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]