Github user mccheah commented on a diff in the pull request: https://github.com/apache/spark/pull/21241#discussion_r186825312 --- Diff: resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala --- @@ -320,50 +322,83 @@ private[spark] class KubernetesClusterSchedulerBackend( override def eventReceived(action: Action, pod: Pod): Unit = { val podName = pod.getMetadata.getName val podIP = pod.getStatus.getPodIP - + val podPhase = pod.getStatus.getPhase action match { - case Action.MODIFIED if (pod.getStatus.getPhase == "Running" + case Action.MODIFIED if (podPhase == "Running" && pod.getMetadata.getDeletionTimestamp == null) => val clusterNodeName = pod.getSpec.getNodeName logInfo(s"Executor pod $podName ready, launched at $clusterNodeName as IP $podIP.") executorPodsByIPs.put(podIP, pod) - case Action.DELETED | Action.ERROR => + case Action.MODIFIED if (podPhase == "Init:Error" || podPhase == "Init:CrashLoopBackoff") --- End diff -- The Kubernetes client doesn't use any enumerations from the underlying API, it only takes the raw strings in the response body. So if the response gives us those values, we should be fine.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org