[spark] branch master updated: [SPARK-43540][K8S][CORE] Add working directory into classpath on the driver in K8S cluster mode

dongjoon Wed, 07 Jun 2023 15:39:05 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 0c7b4306c7c [SPARK-43540][K8S][CORE] Add working directory into 
classpath on the driver in K8S cluster mode
0c7b4306c7c is described below

commit 0c7b4306c7c5fbdd6c577774f8172f82e1d23e3b
Author: fwang12 <fwan...@ebay.com>
AuthorDate: Wed Jun 7 15:38:46 2023 -0700

    [SPARK-43540][K8S][CORE] Add working directory into classpath on the driver 
in K8S cluster mode
    
    ### What changes were proposed in this pull request?
    
    Adding working directory into classpath on the driver in K8S cluster mode.
    
    ### Why are the changes needed?
    
    After #37417, the spark.files, spark.jars are placed  in the working 
directory.
    But seems that the spark context classloader can not access them because 
they are not in the classpath by default.
    This pr adds the current working directory into classpath, so that the 
spark.files, spark.jars placed in the working directory can be accessible by 
the classloader.
    For example, the `hive-site.xml` uploaded by `spark.files`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, users do not need to add the working directory into spark classpath 
manually.
    
    ### How was this patch tested?
    
    UT.
    
    Closes #41201 from turboFei/work_dir_classpath.
    
    Authored-by: fwang12 <fwan...@ebay.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 12 +++++++-----
 .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala     | 12 ++++++++++++
 .../docker/src/main/dockerfiles/spark/entrypoint.sh          |  3 +++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index e1d616b9b83..8f9477385e7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -414,6 +414,9 @@ private[spark] class SparkSubmit extends Logging {
         // directory too.
         // SPARK-33782 : This downloads all the files , jars , archiveFiles 
and pyfiles to current
         // working directory
+        // SPARK-43540: add current working directory into driver classpath
+        val workingDirectory = "."
+        childClasspath += workingDirectory
         def downloadResourcesToCurrentDirectory(uris: String, isArchive: 
Boolean = false):
         String = {
           val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI)
@@ -423,13 +426,12 @@ private[spark] class SparkSubmit extends Logging {
             targetDir, sparkConf, hadoopConf)
           
Utils.stringToSeq(localResources).map(Utils.resolveURI).zip(resolvedUris).map {
             case (localResources, resolvedUri) =>
-              val source = new File(localResources.getPath)
+              val source = new File(localResources.getPath).getCanonicalFile
               val dest = new File(
-                ".",
+                workingDirectory,
                 if (resolvedUri.getFragment != null) resolvedUri.getFragment 
else source.getName)
-              logInfo(
-                s"Files  $resolvedUri " +
-                  s"from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+                .getCanonicalFile
+              logInfo(s"Files $resolvedUri from $source to $dest")
               Utils.deleteRecursively(dest)
               if (isArchive) {
                 Utils.unpack(source, dest)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 44c35ed70e0..8e2d6e6cf5f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1618,6 +1618,18 @@ class SparkSubmitSuite
       conf.get(k) should be (v)
     }
   }
+
+  test("SPARK-43540: Add working directory into classpath on the driver in K8S 
cluster mode") {
+    val clArgs = Seq(
+      "--deploy-mode", "client",
+      "--master", "k8s://host:port",
+      "--class", "org.SomeClass",
+      "--conf", "spark.kubernetes.submitInDriver=true",
+      "/home/thejar.jar")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val (_, classpath, _, _) = submit.prepareSubmitEnvironment(appArgs)
+    assert(classpath.contains("."))
+  }
 }
 
 object JarCreationTest extends Logging {
diff --git 
a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh 
b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 42f4df88f3d..f9561b9aa4e 100755
--- 
a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ 
b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -75,6 +75,9 @@ elif ! [ -z ${SPARK_HOME+x} ]; then
   SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
 fi
 
+# SPARK-43540: add current working directory into executor classpath
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"
+
 case "$1" in
   driver)
     shift 1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-43540][K8S][CORE] Add working directory into classpath on the driver in K8S cluster mode

Reply via email to