spark git commit: [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode
Repository: spark Updated Branches: refs/heads/branch-2.0 eb790c5b1 -> 1696bcfad [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode ## What changes were proposed in this pull request? spark.files is still passed to driver in yarn mode, so SparkContext will still handle it which cause the error in the jira desc. ## How was this patch tested? Tested manually in a 5 node cluster. As this issue only happens in multiple node cluster, so I didn't write test for it. Author: Jeff ZhangCloses #15669 from zjffdu/SPARK-18160. (cherry picked from commit 3c24299b71e23e159edbb972347b13430f92a465) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1696bcfa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1696bcfa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1696bcfa Branch: refs/heads/branch-2.0 Commit: 1696bcfadabb91693bf1ab556a321949d1e4fe45 Parents: eb790c5 Author: Jeff Zhang Authored: Wed Nov 2 11:47:45 2016 -0700 Committer: Marcelo Vanzin Committed: Wed Nov 2 11:48:25 2016 -0700 -- .../scala/org/apache/spark/SparkContext.scala | 29 .../org/apache/spark/deploy/yarn/Client.scala | 5 +++- 2 files changed, 10 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1696bcfa/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index e9f9d72..43cec70 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1705,29 +1705,12 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli key = uri.getScheme match { // A JAR file which exists only on the driver node case null | "file" => -if (master == "yarn" && deployMode == "cluster") { - // In order for this to work in yarn cluster mode the user must specify the - // --addJars option to the client to upload the file into the distributed cache - // of the AM to make it show up in the current working directory. - val fileName = new Path(uri.getPath).getName() - try { -env.rpcEnv.fileServer.addJar(new File(fileName)) - } catch { -case e: Exception => - // For now just log an error but allow to go through so spark examples work. - // The spark examples don't really need the jar distributed since its also - // the app jar. - logError("Error adding jar (" + e + "), was the --addJars option used?") - null - } -} else { - try { -env.rpcEnv.fileServer.addJar(new File(uri.getPath)) - } catch { -case exc: FileNotFoundException => - logError(s"Jar not found at $path") - null - } +try { + env.rpcEnv.fileServer.addJar(new File(uri.getPath)) +} catch { + case exc: FileNotFoundException => +logError(s"Jar not found at $path") +null } // A JAR file which exists locally on every worker node case "local" => http://git-wip-us.apache.org/repos/asf/spark/blob/1696bcfa/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index a47a64c..981da4b 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1170,7 +1170,10 @@ private object Client extends Logging { // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf - +// SparkSubmit would use yarn cache to distribute files & jars in yarn mode, +// so remove them from sparkConf here for yarn mode. +sparkConf.remove("spark.jars") +sparkConf.remove("spark.files") val args = new ClientArguments(argStrings) new Client(args, sparkConf).run() } - To unsubscribe, e-mail:
spark git commit: [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode
Repository: spark Updated Branches: refs/heads/branch-2.1 0093257ea -> bd3ea6595 [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode ## What changes were proposed in this pull request? spark.files is still passed to driver in yarn mode, so SparkContext will still handle it which cause the error in the jira desc. ## How was this patch tested? Tested manually in a 5 node cluster. As this issue only happens in multiple node cluster, so I didn't write test for it. Author: Jeff ZhangCloses #15669 from zjffdu/SPARK-18160. (cherry picked from commit 3c24299b71e23e159edbb972347b13430f92a465) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd3ea659 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd3ea659 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd3ea659 Branch: refs/heads/branch-2.1 Commit: bd3ea6595788a4fe5399e6c6c18d8cb6872c Parents: 0093257 Author: Jeff Zhang Authored: Wed Nov 2 11:47:45 2016 -0700 Committer: Marcelo Vanzin Committed: Wed Nov 2 11:48:09 2016 -0700 -- .../scala/org/apache/spark/SparkContext.scala | 29 .../org/apache/spark/deploy/yarn/Client.scala | 5 +++- 2 files changed, 10 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bd3ea659/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 4694790..63478c8 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1716,29 +1716,12 @@ class SparkContext(config: SparkConf) extends Logging { key = uri.getScheme match { // A JAR file which exists only on the driver node case null | "file" => -if (master == "yarn" && deployMode == "cluster") { - // In order for this to work in yarn cluster mode the user must specify the - // --addJars option to the client to upload the file into the distributed cache - // of the AM to make it show up in the current working directory. - val fileName = new Path(uri.getPath).getName() - try { -env.rpcEnv.fileServer.addJar(new File(fileName)) - } catch { -case e: Exception => - // For now just log an error but allow to go through so spark examples work. - // The spark examples don't really need the jar distributed since its also - // the app jar. - logError("Error adding jar (" + e + "), was the --addJars option used?") - null - } -} else { - try { -env.rpcEnv.fileServer.addJar(new File(uri.getPath)) - } catch { -case exc: FileNotFoundException => - logError(s"Jar not found at $path") - null - } +try { + env.rpcEnv.fileServer.addJar(new File(uri.getPath)) +} catch { + case exc: FileNotFoundException => +logError(s"Jar not found at $path") +null } // A JAR file which exists locally on every worker node case "local" => http://git-wip-us.apache.org/repos/asf/spark/blob/bd3ea659/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 55e4a83..053a786 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1202,7 +1202,10 @@ private object Client extends Logging { // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf - +// SparkSubmit would use yarn cache to distribute files & jars in yarn mode, +// so remove them from sparkConf here for yarn mode. +sparkConf.remove("spark.jars") +sparkConf.remove("spark.files") val args = new ClientArguments(argStrings) new Client(args, sparkConf).run() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional
spark git commit: [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode
Repository: spark Updated Branches: refs/heads/master 02f203107 -> 3c24299b7 [SPARK-18160][CORE][YARN] spark.files & spark.jars should not be passed to driver in yarn mode ## What changes were proposed in this pull request? spark.files is still passed to driver in yarn mode, so SparkContext will still handle it which cause the error in the jira desc. ## How was this patch tested? Tested manually in a 5 node cluster. As this issue only happens in multiple node cluster, so I didn't write test for it. Author: Jeff ZhangCloses #15669 from zjffdu/SPARK-18160. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3c24299b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3c24299b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3c24299b Branch: refs/heads/master Commit: 3c24299b71e23e159edbb972347b13430f92a465 Parents: 02f2031 Author: Jeff Zhang Authored: Wed Nov 2 11:47:45 2016 -0700 Committer: Marcelo Vanzin Committed: Wed Nov 2 11:47:45 2016 -0700 -- .../scala/org/apache/spark/SparkContext.scala | 29 .../org/apache/spark/deploy/yarn/Client.scala | 5 +++- 2 files changed, 10 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3c24299b/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 4694790..63478c8 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1716,29 +1716,12 @@ class SparkContext(config: SparkConf) extends Logging { key = uri.getScheme match { // A JAR file which exists only on the driver node case null | "file" => -if (master == "yarn" && deployMode == "cluster") { - // In order for this to work in yarn cluster mode the user must specify the - // --addJars option to the client to upload the file into the distributed cache - // of the AM to make it show up in the current working directory. - val fileName = new Path(uri.getPath).getName() - try { -env.rpcEnv.fileServer.addJar(new File(fileName)) - } catch { -case e: Exception => - // For now just log an error but allow to go through so spark examples work. - // The spark examples don't really need the jar distributed since its also - // the app jar. - logError("Error adding jar (" + e + "), was the --addJars option used?") - null - } -} else { - try { -env.rpcEnv.fileServer.addJar(new File(uri.getPath)) - } catch { -case exc: FileNotFoundException => - logError(s"Jar not found at $path") - null - } +try { + env.rpcEnv.fileServer.addJar(new File(uri.getPath)) +} catch { + case exc: FileNotFoundException => +logError(s"Jar not found at $path") +null } // A JAR file which exists locally on every worker node case "local" => http://git-wip-us.apache.org/repos/asf/spark/blob/3c24299b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 55e4a83..053a786 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1202,7 +1202,10 @@ private object Client extends Logging { // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf - +// SparkSubmit would use yarn cache to distribute files & jars in yarn mode, +// so remove them from sparkConf here for yarn mode. +sparkConf.remove("spark.jars") +sparkConf.remove("spark.files") val args = new ClientArguments(argStrings) new Client(args, sparkConf).run() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org