This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b303eced7f86 [SPARK-46530][PYTHON][SQL][FOLLOW-UP] Uses path separator instead of file separator to correctly check PySpark library existence b303eced7f86 is described below commit b303eced7f8639887278db34e0080ffa0c19bd0c Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Thu Jan 4 15:49:45 2024 +0900 [SPARK-46530][PYTHON][SQL][FOLLOW-UP] Uses path separator instead of file separator to correctly check PySpark library existence ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/44519 that fixes a mistake of separating the paths. It should use `Files.pathSeparator`. ### Why are the changes needed? It works with testing mode, but it doesn't work with production mode otherwise. ### Does this PR introduce _any_ user-facing change? No, because the main change has not been released. ### How was this patch tested? Manually as described in "How was this patch tested?" at https://github.com/apache/spark/pull/44504. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44590 from HyukjinKwon/SPARK-46530-followup. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala | 6 ++++-- .../apache/spark/sql/execution/datasources/DataSourceManager.scala | 4 +--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 26c790a12447..929058fb7185 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -36,7 +36,7 @@ private[spark] object PythonUtils extends Logging { val PY4J_ZIP_NAME = "py4j-0.10.9.7-src.zip" /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */ - def sparkPythonPath: String = { + def sparkPythonPaths: Seq[String] = { val pythonPath = new ArrayBuffer[String] for (sparkHome <- sys.env.get("SPARK_HOME")) { pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator) @@ -44,9 +44,11 @@ private[spark] object PythonUtils extends Logging { Seq(sparkHome, "python", "lib", PY4J_ZIP_NAME).mkString(File.separator) } pythonPath ++= SparkContext.jarOfObject(this) - pythonPath.mkString(File.pathSeparator) + pythonPath.toSeq } + def sparkPythonPath: String = sparkPythonPaths.mkString(File.pathSeparator) + /** Merge PYTHONPATHS with the appropriate separator. Ignores blank strings. */ def mergePythonPaths(paths: String*): String = { paths.filter(_ != "").mkString(File.pathSeparator) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala index 4fc636a59e5a..236ab98969e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.datasources import java.io.File import java.util.Locale import java.util.concurrent.ConcurrentHashMap -import java.util.regex.Pattern import scala.jdk.CollectionConverters._ @@ -91,8 +90,7 @@ object DataSourceManager extends Logging { private lazy val shouldLoadPythonDataSources: Boolean = { Utils.checkCommandAvailable(PythonUtils.defaultPythonExec) && // Make sure PySpark zipped files also exist. - PythonUtils.sparkPythonPath - .split(Pattern.quote(File.separator)).forall(new File(_).exists()) + PythonUtils.sparkPythonPaths.forall(new File(_).exists()) } private def initialDataSourceBuilders: Map[String, UserDefinedPythonDataSource] = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org