Github user mccheah commented on a diff in the pull request: https://github.com/apache/spark/pull/21067#discussion_r194862373 --- Diff: resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala --- @@ -67,12 +68,19 @@ private[spark] class BasicExecutorFeatureStep( } private val executorLimitCores = kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES) - override def configurePod(pod: SparkPod): SparkPod = { - val name = s"$executorPodNamePrefix-exec-${kubernetesConf.roleSpecificConf.executorId}" + // If the driver pod is killed, the new driver pod will try to + // create new executors with the same name, but it will fail + // and hangs indefinitely because a terminating executors blocks + // the creation of the new ones, so to avoid that apply salt + private val executorNameSalt = Random.alphanumeric.take(4).mkString("").toLowerCase --- End diff -- 4 digits does not have enough randomness to be a reliable salt, which is why I suggest this to avoid collisions.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org