spark git commit: [SPARK-9263] Added flags to exclude dependencies when using --packages
Repository: spark Updated Branches: refs/heads/branch-1.5 73c863ac8 -> 34335719a [SPARK-9263] Added flags to exclude dependencies when using --packages While the functionality is there to exclude packages, there are no flags that allow users to exclude dependencies, in case of dependency conflicts. We should provide users with a flag to add dependency exclusions in case the packages are not resolved properly (or not available due to licensing). The flag I added was --packages-exclude, but I'm open on renaming it. I also added property flags in case people would like to use a conf file to provide dependencies, which is possible if there is a long list of dependencies or exclusions. cc andrewor14 vanzin pwendell Author: Burak Yavuz Closes #7599 from brkyvz/packages-exclusions and squashes the following commits: 636f410 [Burak Yavuz] addressed nits 6e54ede [Burak Yavuz] is this the culprit b5e508e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into packages-exclusions 154f5db [Burak Yavuz] addressed initial comments 1536d7a [Burak Yavuz] Added flags to exclude packages using --packages-exclude (cherry picked from commit 1633d0a2612d94151f620c919425026150e69ae1) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34335719 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34335719 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34335719 Branch: refs/heads/branch-1.5 Commit: 34335719a372c1951fdb4dd25b75b086faf1076f Parents: 73c863a Author: Burak Yavuz Authored: Mon Aug 3 17:42:03 2015 -0700 Committer: Marcelo Vanzin Committed: Mon Aug 3 17:42:35 2015 -0700 -- .../org/apache/spark/deploy/SparkSubmit.scala | 29 +-- .../spark/deploy/SparkSubmitArguments.scala | 11 +++ .../spark/deploy/SparkSubmitUtilsSuite.scala| 30 .../spark/launcher/SparkSubmitOptionParser.java | 2 ++ 4 files changed, 57 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/34335719/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0b39ee8..31185c8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction import scala.collection.mutable.{ArrayBuffer, HashMap, Map} +import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.Path import org.apache.hadoop.security.UserGroupInformation import org.apache.ivy.Ivy @@ -37,6 +38,7 @@ import org.apache.ivy.core.settings.IvySettings import org.apache.ivy.plugins.matcher.GlobPatternMatcher import org.apache.ivy.plugins.repository.file.FileRepository import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver} + import org.apache.spark.api.r.RUtils import org.apache.spark.SPARK_VERSION import org.apache.spark.deploy.rest._ @@ -275,21 +277,18 @@ object SparkSubmit { // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files // too for packages that include Python code -val resolvedMavenCoordinates = - SparkSubmitUtils.resolveMavenCoordinates( -args.packages, Option(args.repositories), Option(args.ivyRepoPath)) -if (!resolvedMavenCoordinates.trim.isEmpty) { - if (args.jars == null || args.jars.trim.isEmpty) { -args.jars = resolvedMavenCoordinates +val exclusions: Seq[String] = + if (!StringUtils.isBlank(args.packagesExclusions)) { +args.packagesExclusions.split(",") } else { -args.jars += s",$resolvedMavenCoordinates" +Nil } +val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages, + Some(args.repositories), Some(args.ivyRepoPath), exclusions = exclusions) +if (!StringUtils.isBlank(resolvedMavenCoordinates)) { + args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates) if (args.isPython) { -if (args.pyFiles == null || args.pyFiles.trim.isEmpty) { - args.pyFiles = resolvedMavenCoordinates -} else { - args.pyFiles += s",$resolvedMavenCoordinates" -} +args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates) } } @@ -736,7 +735,7 @@ object SparkSubmit { * no files, into a single comma-separated string. */ private def mergeFileLists(lists: String*): String = { -val merged = lists.filter(_ != null) +val merged = lists.filterNot(StringU
spark git commit: [SPARK-9263] Added flags to exclude dependencies when using --packages
Repository: spark Updated Branches: refs/heads/master b79b4f5f2 -> 1633d0a26 [SPARK-9263] Added flags to exclude dependencies when using --packages While the functionality is there to exclude packages, there are no flags that allow users to exclude dependencies, in case of dependency conflicts. We should provide users with a flag to add dependency exclusions in case the packages are not resolved properly (or not available due to licensing). The flag I added was --packages-exclude, but I'm open on renaming it. I also added property flags in case people would like to use a conf file to provide dependencies, which is possible if there is a long list of dependencies or exclusions. cc andrewor14 vanzin pwendell Author: Burak Yavuz Closes #7599 from brkyvz/packages-exclusions and squashes the following commits: 636f410 [Burak Yavuz] addressed nits 6e54ede [Burak Yavuz] is this the culprit b5e508e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into packages-exclusions 154f5db [Burak Yavuz] addressed initial comments 1536d7a [Burak Yavuz] Added flags to exclude packages using --packages-exclude Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1633d0a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1633d0a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1633d0a2 Branch: refs/heads/master Commit: 1633d0a2612d94151f620c919425026150e69ae1 Parents: b79b4f5 Author: Burak Yavuz Authored: Mon Aug 3 17:42:03 2015 -0700 Committer: Marcelo Vanzin Committed: Mon Aug 3 17:42:03 2015 -0700 -- .../org/apache/spark/deploy/SparkSubmit.scala | 29 +-- .../spark/deploy/SparkSubmitArguments.scala | 11 +++ .../spark/deploy/SparkSubmitUtilsSuite.scala| 30 .../spark/launcher/SparkSubmitOptionParser.java | 2 ++ 4 files changed, 57 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1633d0a2/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0b39ee8..31185c8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction import scala.collection.mutable.{ArrayBuffer, HashMap, Map} +import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.Path import org.apache.hadoop.security.UserGroupInformation import org.apache.ivy.Ivy @@ -37,6 +38,7 @@ import org.apache.ivy.core.settings.IvySettings import org.apache.ivy.plugins.matcher.GlobPatternMatcher import org.apache.ivy.plugins.repository.file.FileRepository import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver} + import org.apache.spark.api.r.RUtils import org.apache.spark.SPARK_VERSION import org.apache.spark.deploy.rest._ @@ -275,21 +277,18 @@ object SparkSubmit { // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files // too for packages that include Python code -val resolvedMavenCoordinates = - SparkSubmitUtils.resolveMavenCoordinates( -args.packages, Option(args.repositories), Option(args.ivyRepoPath)) -if (!resolvedMavenCoordinates.trim.isEmpty) { - if (args.jars == null || args.jars.trim.isEmpty) { -args.jars = resolvedMavenCoordinates +val exclusions: Seq[String] = + if (!StringUtils.isBlank(args.packagesExclusions)) { +args.packagesExclusions.split(",") } else { -args.jars += s",$resolvedMavenCoordinates" +Nil } +val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages, + Some(args.repositories), Some(args.ivyRepoPath), exclusions = exclusions) +if (!StringUtils.isBlank(resolvedMavenCoordinates)) { + args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates) if (args.isPython) { -if (args.pyFiles == null || args.pyFiles.trim.isEmpty) { - args.pyFiles = resolvedMavenCoordinates -} else { - args.pyFiles += s",$resolvedMavenCoordinates" -} +args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates) } } @@ -736,7 +735,7 @@ object SparkSubmit { * no files, into a single comma-separated string. */ private def mergeFileLists(lists: String*): String = { -val merged = lists.filter(_ != null) +val merged = lists.filterNot(StringUtils.isBlank) .flatMap(_.split(",")) .mkString(",") if (me