This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0c7b4306c7c [SPARK-43540][K8S][CORE] Add working directory into classpath on the driver in K8S cluster mode 0c7b4306c7c is described below commit 0c7b4306c7c5fbdd6c577774f8172f82e1d23e3b Author: fwang12 <fwan...@ebay.com> AuthorDate: Wed Jun 7 15:38:46 2023 -0700 [SPARK-43540][K8S][CORE] Add working directory into classpath on the driver in K8S cluster mode ### What changes were proposed in this pull request? Adding working directory into classpath on the driver in K8S cluster mode. ### Why are the changes needed? After #37417, the spark.files, spark.jars are placed in the working directory. But seems that the spark context classloader can not access them because they are not in the classpath by default. This pr adds the current working directory into classpath, so that the spark.files, spark.jars placed in the working directory can be accessible by the classloader. For example, the `hive-site.xml` uploaded by `spark.files`. ### Does this PR introduce _any_ user-facing change? yes, users do not need to add the working directory into spark classpath manually. ### How was this patch tested? UT. Closes #41201 from turboFei/work_dir_classpath. Authored-by: fwang12 <fwan...@ebay.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 12 +++++++----- .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 12 ++++++++++++ .../docker/src/main/dockerfiles/spark/entrypoint.sh | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index e1d616b9b83..8f9477385e7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -414,6 +414,9 @@ private[spark] class SparkSubmit extends Logging { // directory too. // SPARK-33782 : This downloads all the files , jars , archiveFiles and pyfiles to current // working directory + // SPARK-43540: add current working directory into driver classpath + val workingDirectory = "." + childClasspath += workingDirectory def downloadResourcesToCurrentDirectory(uris: String, isArchive: Boolean = false): String = { val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI) @@ -423,13 +426,12 @@ private[spark] class SparkSubmit extends Logging { targetDir, sparkConf, hadoopConf) Utils.stringToSeq(localResources).map(Utils.resolveURI).zip(resolvedUris).map { case (localResources, resolvedUri) => - val source = new File(localResources.getPath) + val source = new File(localResources.getPath).getCanonicalFile val dest = new File( - ".", + workingDirectory, if (resolvedUri.getFragment != null) resolvedUri.getFragment else source.getName) - logInfo( - s"Files $resolvedUri " + - s"from ${source.getAbsolutePath} to ${dest.getAbsolutePath}") + .getCanonicalFile + logInfo(s"Files $resolvedUri from $source to $dest") Utils.deleteRecursively(dest) if (isArchive) { Utils.unpack(source, dest) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 44c35ed70e0..8e2d6e6cf5f 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1618,6 +1618,18 @@ class SparkSubmitSuite conf.get(k) should be (v) } } + + test("SPARK-43540: Add working directory into classpath on the driver in K8S cluster mode") { + val clArgs = Seq( + "--deploy-mode", "client", + "--master", "k8s://host:port", + "--class", "org.SomeClass", + "--conf", "spark.kubernetes.submitInDriver=true", + "/home/thejar.jar") + val appArgs = new SparkSubmitArguments(clArgs) + val (_, classpath, _, _) = submit.prepareSubmitEnvironment(appArgs) + assert(classpath.contains(".")) + } } object JarCreationTest extends Logging { diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh index 42f4df88f3d..f9561b9aa4e 100755 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh @@ -75,6 +75,9 @@ elif ! [ -z ${SPARK_HOME+x} ]; then SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; fi +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + case "$1" in driver) shift 1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org