spark git commit: [SPARK-9263] Added flags to exclude dependencies when using --packages

2015-08-03 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 73c863ac8 -> 34335719a


[SPARK-9263] Added flags to exclude dependencies when using --packages

While the functionality is there to exclude packages, there are no flags that 
allow users to exclude dependencies, in case of dependency conflicts. We should 
provide users with a flag to add dependency exclusions in case the packages are 
not resolved properly (or not available due to licensing).

The flag I added was --packages-exclude, but I'm open on renaming it. I also 
added property flags in case people would like to use a conf file to provide 
dependencies, which is possible if there is a long list of dependencies or 
exclusions.

cc andrewor14 vanzin pwendell

Author: Burak Yavuz 

Closes #7599 from brkyvz/packages-exclusions and squashes the following commits:

636f410 [Burak Yavuz] addressed nits
6e54ede [Burak Yavuz] is this the culprit
b5e508e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
packages-exclusions
154f5db [Burak Yavuz] addressed initial comments
1536d7a [Burak Yavuz] Added flags to exclude packages using --packages-exclude

(cherry picked from commit 1633d0a2612d94151f620c919425026150e69ae1)
Signed-off-by: Marcelo Vanzin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34335719
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34335719
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34335719

Branch: refs/heads/branch-1.5
Commit: 34335719a372c1951fdb4dd25b75b086faf1076f
Parents: 73c863a
Author: Burak Yavuz 
Authored: Mon Aug 3 17:42:03 2015 -0700
Committer: Marcelo Vanzin 
Committed: Mon Aug 3 17:42:35 2015 -0700

--
 .../org/apache/spark/deploy/SparkSubmit.scala   | 29 +--
 .../spark/deploy/SparkSubmitArguments.scala | 11 +++
 .../spark/deploy/SparkSubmitUtilsSuite.scala| 30 
 .../spark/launcher/SparkSubmitOptionParser.java |  2 ++
 4 files changed, 57 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/34335719/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0b39ee8..31185c8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 
+import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.ivy.Ivy
@@ -37,6 +38,7 @@ import org.apache.ivy.core.settings.IvySettings
 import org.apache.ivy.plugins.matcher.GlobPatternMatcher
 import org.apache.ivy.plugins.repository.file.FileRepository
 import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, 
IBiblioResolver}
+
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.SPARK_VERSION
 import org.apache.spark.deploy.rest._
@@ -275,21 +277,18 @@ object SparkSubmit {
 
 // Resolve maven dependencies if there are any and add classpath to jars. 
Add them to py-files
 // too for packages that include Python code
-val resolvedMavenCoordinates =
-  SparkSubmitUtils.resolveMavenCoordinates(
-args.packages, Option(args.repositories), Option(args.ivyRepoPath))
-if (!resolvedMavenCoordinates.trim.isEmpty) {
-  if (args.jars == null || args.jars.trim.isEmpty) {
-args.jars = resolvedMavenCoordinates
+val exclusions: Seq[String] =
+  if (!StringUtils.isBlank(args.packagesExclusions)) {
+args.packagesExclusions.split(",")
   } else {
-args.jars += s",$resolvedMavenCoordinates"
+Nil
   }
+val resolvedMavenCoordinates = 
SparkSubmitUtils.resolveMavenCoordinates(args.packages,
+  Some(args.repositories), Some(args.ivyRepoPath), exclusions = exclusions)
+if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+  args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
   if (args.isPython) {
-if (args.pyFiles == null || args.pyFiles.trim.isEmpty) {
-  args.pyFiles = resolvedMavenCoordinates
-} else {
-  args.pyFiles += s",$resolvedMavenCoordinates"
-}
+args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
   }
 }
 
@@ -736,7 +735,7 @@ object SparkSubmit {
* no files, into a single comma-separated string.
*/
   private def mergeFileLists(lists: String*): String = {
-val merged = lists.filter(_ != null)
+val merged = lists.filterNot(StringU

spark git commit: [SPARK-9263] Added flags to exclude dependencies when using --packages

2015-08-03 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/master b79b4f5f2 -> 1633d0a26


[SPARK-9263] Added flags to exclude dependencies when using --packages

While the functionality is there to exclude packages, there are no flags that 
allow users to exclude dependencies, in case of dependency conflicts. We should 
provide users with a flag to add dependency exclusions in case the packages are 
not resolved properly (or not available due to licensing).

The flag I added was --packages-exclude, but I'm open on renaming it. I also 
added property flags in case people would like to use a conf file to provide 
dependencies, which is possible if there is a long list of dependencies or 
exclusions.

cc andrewor14 vanzin pwendell

Author: Burak Yavuz 

Closes #7599 from brkyvz/packages-exclusions and squashes the following commits:

636f410 [Burak Yavuz] addressed nits
6e54ede [Burak Yavuz] is this the culprit
b5e508e [Burak Yavuz] Merge branch 'master' of github.com:apache/spark into 
packages-exclusions
154f5db [Burak Yavuz] addressed initial comments
1536d7a [Burak Yavuz] Added flags to exclude packages using --packages-exclude


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1633d0a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1633d0a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1633d0a2

Branch: refs/heads/master
Commit: 1633d0a2612d94151f620c919425026150e69ae1
Parents: b79b4f5
Author: Burak Yavuz 
Authored: Mon Aug 3 17:42:03 2015 -0700
Committer: Marcelo Vanzin 
Committed: Mon Aug 3 17:42:03 2015 -0700

--
 .../org/apache/spark/deploy/SparkSubmit.scala   | 29 +--
 .../spark/deploy/SparkSubmitArguments.scala | 11 +++
 .../spark/deploy/SparkSubmitUtilsSuite.scala| 30 
 .../spark/launcher/SparkSubmitOptionParser.java |  2 ++
 4 files changed, 57 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1633d0a2/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0b39ee8..31185c8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 
+import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.ivy.Ivy
@@ -37,6 +38,7 @@ import org.apache.ivy.core.settings.IvySettings
 import org.apache.ivy.plugins.matcher.GlobPatternMatcher
 import org.apache.ivy.plugins.repository.file.FileRepository
 import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, 
IBiblioResolver}
+
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.SPARK_VERSION
 import org.apache.spark.deploy.rest._
@@ -275,21 +277,18 @@ object SparkSubmit {
 
 // Resolve maven dependencies if there are any and add classpath to jars. 
Add them to py-files
 // too for packages that include Python code
-val resolvedMavenCoordinates =
-  SparkSubmitUtils.resolveMavenCoordinates(
-args.packages, Option(args.repositories), Option(args.ivyRepoPath))
-if (!resolvedMavenCoordinates.trim.isEmpty) {
-  if (args.jars == null || args.jars.trim.isEmpty) {
-args.jars = resolvedMavenCoordinates
+val exclusions: Seq[String] =
+  if (!StringUtils.isBlank(args.packagesExclusions)) {
+args.packagesExclusions.split(",")
   } else {
-args.jars += s",$resolvedMavenCoordinates"
+Nil
   }
+val resolvedMavenCoordinates = 
SparkSubmitUtils.resolveMavenCoordinates(args.packages,
+  Some(args.repositories), Some(args.ivyRepoPath), exclusions = exclusions)
+if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+  args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
   if (args.isPython) {
-if (args.pyFiles == null || args.pyFiles.trim.isEmpty) {
-  args.pyFiles = resolvedMavenCoordinates
-} else {
-  args.pyFiles += s",$resolvedMavenCoordinates"
-}
+args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
   }
 }
 
@@ -736,7 +735,7 @@ object SparkSubmit {
* no files, into a single comma-separated string.
*/
   private def mergeFileLists(lists: String*): String = {
-val merged = lists.filter(_ != null)
+val merged = lists.filterNot(StringUtils.isBlank)
   .flatMap(_.split(","))
   .mkString(",")
 if (me