Github user JoshRosen commented on a diff in the pull request: https://github.com/apache/spark/pull/4215#discussion_r23878382 --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala --- @@ -431,6 +458,155 @@ object SparkSubmit { } } +/** Provides utility functions to be used inside SparkSubmit. */ +private[spark] object SparkSubmitUtils extends Logging { + + // Directories for caching downloads through ivy and storing the jars when maven coordinates are + // supplied to spark-submit + private var PACKAGES_DIRECTORY: File = null + + /** + * Represents a Maven Coordinate + * @param groupId the groupId of the coordinate + * @param artifactId the artifactId of the coordinate + * @param version the version of the coordinate + */ + private[spark] case class MavenCoordinate(groupId: String, artifactId: String, version: String) + + /** + * Resolves any dependencies that were supplied through maven coordinates + * @param coordinates Comma-delimited string of maven coordinates + * @param remoteRepos Comma-delimited string of remote repositories other than maven central + * @param ivyPath The path to the local ivy repository + * @return The comma-delimited path to the jars of the given maven artifacts including their + * transitive dependencies + */ + private[spark] def resolveMavenCoordinates( + coordinates: String, + remoteRepos: String, + ivyPath: String, + isTest: Boolean = false): String = { + if (coordinates == null || coordinates.trim.isEmpty) { + "" + } else { + val artifacts = coordinates.split(",").map { p => + val splits = p.split(":") + require(splits.length == 3, s"Provided Maven Coordinates must be in the form " + + s"'groupId:artifactId:version'. The coordinate provided is: $p") + require(splits(0) != null && splits(0).trim.nonEmpty, s"The groupId cannot be null or " + + s"be whitespace. The groupId provided is: ${splits(0)}") + require(splits(1) != null && splits(1).trim.nonEmpty, s"The artifactId cannot be null or " + + s"be whitespace. The artifactId provided is: ${splits(1)}") + require(splits(2) != null && splits(2).trim.nonEmpty, s"The version cannot be null or " + + s"be whitespace. The version provided is: ${splits(2)}") + new MavenCoordinate(splits(0), splits(1), splits(2)) + } + // Default configuration name for ivy + val conf = "default" + // set ivy settings for location of cache + val ivySettings: IvySettings = new IvySettings + if (ivyPath == null || ivyPath.trim.isEmpty) { + PACKAGES_DIRECTORY = new File(ivySettings.getDefaultIvyUserDir, "jars") + } else { + ivySettings.setDefaultCache(new File(ivyPath, "cache")) + PACKAGES_DIRECTORY = new File(ivyPath, "jars") + } + logInfo(s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") + logInfo(s"The jars for the packages stored in: $PACKAGES_DIRECTORY") + + // create a pattern matcher + ivySettings.addMatcher(new GlobPatternMatcher) + + // the biblio resolver resolves POM declared dependencies + val br: IBiblioResolver = new IBiblioResolver + br.setM2compatible(true) + br.setUsepoms(true) + br.setName("central") + + // We need a chain resolver if we want to check multiple repositories + val cr = new ChainResolver + cr.setName("list") + cr.add(br) + + // Add an exclusion rule for Spark + val sparkArtifacts = new ArtifactId(new ModuleId("org.apache.spark", "*"), "*", "*", "*") + val sparkDependencyExcludeRule = + new DefaultExcludeRule(sparkArtifacts, ivySettings.getMatcher("glob"), null) + sparkDependencyExcludeRule.addConfiguration(conf) + + // add any other remote repositories other than maven central + if (remoteRepos != null && remoteRepos.trim.nonEmpty) { + var i = 1 --- End diff -- Minor nit, but I think you can use `zipWithIndex` to avoid defining a var for counting in this loop.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org