[GitHub] spark pull request #19468: [SPARK-18278] [Scheduler] Spark on Kubernetes - B...

mccheah Wed, 25 Oct 2017 15:15:42 -0700

Github user mccheah commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19468#discussion_r147001912
  
    --- Diff: 
resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
 ---
    @@ -0,0 +1,229 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.spark.scheduler.cluster.k8s
    +
    +import scala.collection.JavaConverters._
    +
    +import io.fabric8.kubernetes.api.model._
    +
    +import org.apache.spark.{SparkConf, SparkException}
    +import org.apache.spark.deploy.k8s.ConfigurationUtils
    +import org.apache.spark.deploy.k8s.config._
    +import org.apache.spark.deploy.k8s.constants._
    +import org.apache.spark.util.Utils
    +
    +/**
    + * Configures executor pods. Construct one of these with a SparkConf to 
set up properties that are
    + * common across all executors. Then, pass in dynamic parameters into 
createExecutorPod.
    + */
    +private[spark] trait ExecutorPodFactory {
    +  def createExecutorPod(
    +      executorId: String,
    +      applicationId: String,
    +      driverUrl: String,
    +      executorEnvs: Seq[(String, String)],
    +      driverPod: Pod,
    +      nodeToLocalTaskCount: Map[String, Int]): Pod
    +}
    +
    +private[spark] class ExecutorPodFactoryImpl(sparkConf: SparkConf)
    +  extends ExecutorPodFactory {
    +
    +  import ExecutorPodFactoryImpl._
    +
    +  private val executorExtraClasspath = sparkConf.get(
    +    org.apache.spark.internal.config.EXECUTOR_CLASS_PATH)
    +  private val executorJarsDownloadDir = 
sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION)
    +
    +  private val executorLabels = 
ConfigurationUtils.parsePrefixedKeyValuePairs(
    +    sparkConf,
    +    KUBERNETES_EXECUTOR_LABEL_PREFIX,
    +    "executor label")
    +  require(
    +    !executorLabels.contains(SPARK_APP_ID_LABEL),
    +    s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is 
reserved for Spark.")
    +  require(
    +    !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL),
    +    s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it 
is reserved for" +
    +      s" Spark.")
    +
    +  private val executorAnnotations =
    +    ConfigurationUtils.parsePrefixedKeyValuePairs(
    +      sparkConf,
    +      KUBERNETES_EXECUTOR_ANNOTATION_PREFIX,
    +      "executor annotation")
    +  private val nodeSelector =
    +    ConfigurationUtils.parsePrefixedKeyValuePairs(
    +      sparkConf,
    +      KUBERNETES_NODE_SELECTOR_PREFIX,
    +      "node selector")
    +
    +  private val executorDockerImage = sparkConf.get(EXECUTOR_DOCKER_IMAGE)
    +  private val dockerImagePullPolicy = 
sparkConf.get(DOCKER_IMAGE_PULL_POLICY)
    +  private val executorPort = sparkConf.getInt("spark.executor.port", 
DEFAULT_STATIC_PORT)
    +  private val blockmanagerPort = sparkConf
    +    .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT)
    +  private val kubernetesDriverPodName = sparkConf
    +    .get(KUBERNETES_DRIVER_POD_NAME)
    +    .getOrElse(throw new SparkException("Must specify the driver pod 
name"))
    +
    +  private val executorPodNamePrefix = 
sparkConf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)
    +
    +  private val executorMemoryMiB = 
sparkConf.get(org.apache.spark.internal.config.EXECUTOR_MEMORY)
    +  private val executorMemoryString = sparkConf.get(
    +    org.apache.spark.internal.config.EXECUTOR_MEMORY.key,
    +    org.apache.spark.internal.config.EXECUTOR_MEMORY.defaultValueString)
    +
    +  private val memoryOverheadMiB = sparkConf
    +    .get(KUBERNETES_EXECUTOR_MEMORY_OVERHEAD)
    +    .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryMiB).toInt,
    +      MEMORY_OVERHEAD_MIN_MIB))
    +  private val executorMemoryWithOverhead = executorMemoryMiB + 
memoryOverheadMiB
    +
    +  private val executorCores = sparkConf.getDouble("spark.executor.cores", 
1d)
    +  private val executorLimitCores = 
sparkConf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key)
    +
    +  override def createExecutorPod(
    +      executorId: String,
    +      applicationId: String,
    +      driverUrl: String,
    +      executorEnvs: Seq[(String, String)],
    +      driverPod: Pod,
    +      nodeToLocalTaskCount: Map[String, Int]): Pod = {
    +    val name = s"$executorPodNamePrefix-exec-$executorId"
    +
    +    // hostname must be no longer than 63 characters, so take the last 63 
characters of the pod
    +    // name as the hostname.  This preserves uniqueness since the end of 
name contains
    +    // executorId and applicationId
    +    val hostname = name.substring(Math.max(0, name.length - 63))
    +    val resolvedExecutorLabels = Map(
    +      SPARK_EXECUTOR_ID_LABEL -> executorId,
    +      SPARK_APP_ID_LABEL -> applicationId,
    +      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
    +      executorLabels
    +    val executorMemoryQuantity = new QuantityBuilder(false)
    +      .withAmount(s"${executorMemoryMiB}Mi")
    +      .build()
    +    val executorMemoryLimitQuantity = new QuantityBuilder(false)
    +      .withAmount(s"${executorMemoryWithOverhead}Mi")
    +      .build()
    +    val executorCpuQuantity = new QuantityBuilder(false)
    +      .withAmount(executorCores.toString)
    +      .build()
    +    val executorExtraClasspathEnv = executorExtraClasspath.map { cp =>
    +      new EnvVarBuilder()
    +        .withName(ENV_EXECUTOR_EXTRA_CLASSPATH)
    +        .withValue(cp)
    +        .build()
    +    }
    +    val executorExtraJavaOptionsEnv = sparkConf
    +      .get(org.apache.spark.internal.config.EXECUTOR_JAVA_OPTIONS)
    +      .map { opts =>
    +        val delimitedOpts = Utils.splitCommandString(opts)
    +        delimitedOpts.zipWithIndex.map {
    +          case (opt, index) =>
    +            new 
EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
    +        }
    +      }.getOrElse(Seq.empty[EnvVar])
    +    val executorEnv = (Seq(
    +      (ENV_EXECUTOR_PORT, executorPort.toString),
    +      (ENV_DRIVER_URL, driverUrl),
    +      // Executor backend expects integral value for executor cores, so 
round it up to an int.
    +      (ENV_EXECUTOR_CORES, math.ceil(executorCores).toInt.toString),
    +      (ENV_EXECUTOR_MEMORY, executorMemoryString),
    +      (ENV_APPLICATION_ID, applicationId),
    +      (ENV_EXECUTOR_ID, executorId),
    +      (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) ++ 
executorEnvs)
    +      .map(env => new EnvVarBuilder()
    +        .withName(env._1)
    +        .withValue(env._2)
    +        .build()
    +      ) ++ Seq(
    +      new EnvVarBuilder()
    +        .withName(ENV_EXECUTOR_POD_IP)
    +        .withValueFrom(new EnvVarSourceBuilder()
    +          .withNewFieldRef("v1", "status.podIP")
    +          .build())
    +        .build()
    --- End diff --
    
    Hm, the above comment should be more precise.
    
    DNS is handled by kube-dns in any given Kubernetes context. (kube-dns is an 
optional component, but in future commits it will become clear that Spark will 
require kube-dns to be installed on a cluster, and we will document as such). 
kube-dns creates a DNS entry for services that route to the IPs of pods in the 
system. But kube-dns does not create a DNS entry for every pod, unless there is 
a service that maps to it.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19468: [SPARK-18278] [Scheduler] Spark on Kubernetes - B...

Reply via email to