Re: [PR] Bugfix/fix spark k8s pod duplicate issue [airflow]

via GitHub Mon, 02 Feb 2026 12:49:01 -0800


SameerMesiah97 commented on code in PR #61110:
URL: https://github.com/apache/airflow/pull/61110#discussion_r2756079622



##########
providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_spark_kubernetes.py:
##########
@@ -983,21 +991,141 @@ def test_find_spark_job_picks_running_pod(
         running_pod.metadata.labels = {"try_number": "1"}
         running_pod.status.phase = "Running"
 
-        # Pending pod should not be selected.
+        # Terminating pod should not be selected.
+        terminating_pod = mock.MagicMock()
+        terminating_pod.metadata.creation_timestamp = timezone.datetime(2025, 
1, 1, tzinfo=timezone.utc)
+        terminating_pod.metadata.deletion_timestamp = timezone.datetime(2025, 
1, 2, tzinfo=timezone.utc)
+        terminating_pod.metadata.name = "spark-driver-pending"
+        terminating_pod.metadata.labels = {"try_number": "1"}
+        terminating_pod.status.phase = "Running"
+
+        mock_get_kube_client.list_namespaced_pod.return_value.items = [
+            running_pod,
+            terminating_pod,
+        ]
+
+        returned_pod = op.find_spark_job(context)
+
+        assert returned_pod is running_pod
+
+    def test_find_spark_job_picks_pending_pod(
+        self,
+        mock_is_in_cluster,
+        mock_parent_execute,
+        mock_create_namespaced_crd,
+        mock_get_namespaced_custom_object_status,
+        mock_cleanup,
+        mock_create_job_name,
+        mock_get_kube_client,
+        mock_create_pod,
+        mock_await_pod_completion,
+        mock_fetch_requested_container_logs,
+        data_file,
+    ):
+        """
+        Verifies that find_spark_job picks a Running Spark driver pod over a 
non-Running pod.
+        """
+
+        task_name = "test_find_spark_job_prefers_running_pod"
+        job_spec = 
yaml.safe_load(data_file("spark/application_template.yaml").read_text())
+
+        mock_create_job_name.return_value = task_name
+        op = SparkKubernetesOperator(
+            template_spec=job_spec,
+            kubernetes_conn_id="kubernetes_default_kube_config",
+            task_id=task_name,
+            get_logs=True,
+            reattach_on_restart=True,
+        )
+        context = create_context(op)
+
+        # Pending pod should be selected.
         pending_pod = mock.MagicMock()
         pending_pod.metadata.creation_timestamp = timezone.datetime(2025, 1, 
1, tzinfo=timezone.utc)
-        pending_pod.metadata.name = "spark-driver-pending"
+        pending_pod.metadata.name = "spark-driver-running"
         pending_pod.metadata.labels = {"try_number": "1"}
         pending_pod.status.phase = "Pending"
 
+        # Terminating pod should not be selected.
+        terminating_pod = mock.MagicMock()
+        terminating_pod.metadata.creation_timestamp = timezone.datetime(2025, 
1, 1, tzinfo=timezone.utc)
+        terminating_pod.metadata.deletion_timestamp = timezone.datetime(2025, 
1, 2, tzinfo=timezone.utc)
+        terminating_pod.metadata.name = "spark-driver-pending"
+        terminating_pod.metadata.labels = {"try_number": "1"}
+        terminating_pod.status.phase = "Running"
+
         mock_get_kube_client.list_namespaced_pod.return_value.items = [
-            running_pod,
             pending_pod,
+            terminating_pod,
         ]
 
         returned_pod = op.find_spark_job(context)
 
-        assert returned_pod is running_pod
+        assert returned_pod is pending_pod
+

Review Comment:
   What is the intention behind this test? My understanding is that this is not 
testing phase-based selection but whether the ordering key based on 
`deletion_timestamp` prioritizes pods that are not terminating over those that 
are terminating. If you wish to keep the naming of the variables the same, I 
would advise adding a short comment explicitly stating that 'Pending' pods have 
no `deletion_timestamp`. This is not obvious. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Bugfix/fix spark k8s pod duplicate issue [airflow]

Reply via email to