[ 
https://issues.apache.org/jira/browse/SPARK-24491?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16727003#comment-16727003
 ] 

ASF GitHub Bot commented on SPARK-24491:
----------------------------------------

vanzin closed pull request #21511: [SPARK-24491][Kubernetes] Configuration 
support for requesting GPUs on k8s
URL: https://github.com/apache/spark/pull/21511
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index bf33179ae3dab..6ba78f75689bf 100644
--- 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -104,6 +104,20 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_EXECUTOR_LIMIT_GPUS =
+    ConfigBuilder("spark.kubernetes.executor.limit.gpus")
+      .doc("Specify the gpu request for each executor pod")
+      .stringConf
+      .createOptional
+
+  val KUBERNETES_EXECUTOR_GPU_PROVIDER =
+    ConfigBuilder("spark.kubernetes.executor.gpu.provider")
+      .doc("Specify the gpu provider for each executor pod")
+      .stringConf
+      .createWithDefault("nvidia.com")
+
+
+
   val KUBERNETES_DRIVER_POD_NAME =
     ConfigBuilder("spark.kubernetes.driver.pod.name")
       .doc("Name of the driver pod.")
diff --git 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 91c54a9776982..1b82b29b7baa0 100644
--- 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -67,6 +67,8 @@ private[spark] class BasicExecutorFeatureStep(
       executorCores.toString
     }
   private val executorLimitCores = 
kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
+  private val executorLimitGpus = 
kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_GPUS)
+  private val gpuProvider = 
kubernetesConf.get(KUBERNETES_EXECUTOR_GPU_PROVIDER)
 
   override def configurePod(pod: SparkPod): SparkPod = {
     val name = 
s"$executorPodNamePrefix-exec-${kubernetesConf.roleSpecificConf.executorId}"
@@ -151,6 +153,16 @@ private[spark] class BasicExecutorFeatureStep(
           .endResources()
         .build()
     }.getOrElse(executorContainer)
+    val containerWithLimitGpus = executorLimitGpus.map { limitGpus =>
+      val executorGpuLimitQuantity = new QuantityBuilder(false)
+        .withAmount(limitGpus)
+        .build()
+      new ContainerBuilder(containerWithLimitCores)
+        .editResources()
+          .addToLimits(gpuProvider + "/gpu", executorGpuLimitQuantity)
+          .endResources()
+        .build()
+    }.getOrElse(containerWithLimitCores)
     val driverPod = kubernetesConf.roleSpecificConf.driverPod
     val executorPod = new PodBuilder(pod.pod)
       .editOrNewMetadata()
@@ -173,7 +185,7 @@ private[spark] class BasicExecutorFeatureStep(
         .addToImagePullSecrets(kubernetesConf.imagePullSecrets(): _*)
         .endSpec()
       .build()
-    SparkPod(executorPod, containerWithLimitCores)
+    SparkPod(executorPod, containerWithLimitGpus)
   }
 
   override def getAdditionalPodSystemProperties(): Map[String, String] = 
Map.empty


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Support for requesting GPU resources on K8S
> -------------------------------------------
>
>                 Key: SPARK-24491
>                 URL: https://issues.apache.org/jira/browse/SPARK-24491
>             Project: Spark
>          Issue Type: New Feature
>          Components: Kubernetes
>    Affects Versions: 2.3.0
>            Reporter: Alex Milowski
>            Priority: Minor
>
> If GPU resources are required for executor PODS in a multi-tentan k8s 
> cluster, the resource limits section needs to specify that GPUs are required 
> [1]. This is a simple request:
> {{resources:}}
> {{  limits:}}
> {{    nvidia.com/gpu: 1 # requesting 1 GPU}}
>  A simple configuration limit could be specified (just like executor memory):
> {{  --conf spark.kubernetes.executor.limit.gpus=1}}
> [1] [https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/]
> I have an implementation I will submit via a pull request.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to