xkrogen commented on a change in pull request #29966: URL: https://github.com/apache/spark/pull/29966#discussion_r529840209
########## File path: core/src/main/scala/org/apache/spark/util/Utils.scala ########## @@ -2980,6 +2980,77 @@ private[spark] object Utils extends Logging { metadata.toString } + /** + * Download Ivy URIs dependent jars. + * + * @param uri Ivy uri need to be downloaded. + * @return Comma separated string list of URIs of downloaded jars + */ + def resolveMavenDependencies(uri: URI): String = { + val Seq(repositories, ivyRepoPath, ivySettingsPath) = + Seq( + "spark.jars.repositories", + "spark.jars.ivy", + "spark.jars.ivySettings" + ).map(sys.props.get(_).orNull) + // Create the IvySettings, either load from file or build defaults + val ivySettings = Option(ivySettingsPath) match { + case Some(path) => + SparkSubmitUtils.loadIvySettings(path, Option(repositories), Option(ivyRepoPath)) + + case None => + SparkSubmitUtils.buildIvySettings(Option(repositories), Option(ivyRepoPath)) + } + SparkSubmitUtils.resolveMavenCoordinates(uri.getAuthority, ivySettings, + parseExcludeList(uri.getQuery), parseTransitive(uri.getQuery)) + } + + private def parseURLQueryParameter(queryString: String, queryTag: String): Array[String] = { + if (queryString == null || queryString.isEmpty) { + Array.empty[String] + } else { + val mapTokens = queryString.split("&") + assert(mapTokens.forall(_.split("=").length == 2), "Invalid query string: " + queryString) + mapTokens.map(_.split("=")).map(kv => (kv(0), kv(1))).filter(_._1 == queryTag).map(_._2) + } + } + + /** + * Parse excluded list in ivy URL. When download ivy URL jar, Spark won't download transitive jar + * in excluded list. + * + * @param queryString Ivy URI query part string. + * @return Exclude list which contains grape parameters of exclude. + * Example: Input: exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http + * Output: [org.mortbay.jetty:jetty, org.eclipse.jetty:jetty-http] + */ + private def parseExcludeList(queryString: String): Array[String] = { + parseURLQueryParameter(queryString, "exclude") + .flatMap { excludeString => + val excludes: Array[String] = excludeString.split(",") + assert(excludes.forall(_.split(":").length == 2), + "Invalid exclude string: expected 'org:module,org:module,..', found " + excludeString) + excludes + } + } + + /** + * Parse transitive parameter in ivy URL, default value is false. + * + * @param queryString Ivy URI query part string. + * @return Exclude list which contains grape parameters of transitive. + * Example: Input: exclude=org.mortbay.jetty:jetty&transitive=true + * Output: true + */ + private def parseTransitive(queryString: String): Boolean = { + val transitive = parseURLQueryParameter(queryString, "transitive") + if (transitive.isEmpty) { + false + } else { + transitive.last.toBoolean Review comment: Maybe print a warning if it is specified multiple times? This is not expected. ########## File path: core/src/main/scala/org/apache/spark/util/Utils.scala ########## @@ -2980,6 +2980,77 @@ private[spark] object Utils extends Logging { metadata.toString } + /** + * Download Ivy URIs dependent jars. + * + * @param uri Ivy uri need to be downloaded. + * @return Comma separated string list of URIs of downloaded jars + */ + def resolveMavenDependencies(uri: URI): String = { + val Seq(repositories, ivyRepoPath, ivySettingsPath) = + Seq( + "spark.jars.repositories", + "spark.jars.ivy", + "spark.jars.ivySettings" + ).map(sys.props.get(_).orNull) + // Create the IvySettings, either load from file or build defaults + val ivySettings = Option(ivySettingsPath) match { + case Some(path) => + SparkSubmitUtils.loadIvySettings(path, Option(repositories), Option(ivyRepoPath)) Review comment: Some of this logic is duplicated from `DependencyUtils.resolveMavenDependencies`, seems it will be better if we can unify? `DependencyUtils` seems like a more suitable place for this logic anyway. With the addition of `Utils.resolveMavenDependencies` we have three identically-named methods in 3 different utility classes... I worry this will become very confusing. Better to consolidate. ########## File path: sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala ########## @@ -159,6 +161,13 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade } } + protected def resolveJars(path: String): List[String] = { + new Path(path).toUri.getScheme match { Review comment: Similar comment for `addJar` below ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org