Repository: spark Updated Branches: refs/heads/master 695d9a0fd -> a313a5ae7
[SPARK-15405][YARN] Remove unnecessary upload of config archive. We only need one copy of it. The client code that was uploading the second copy just needs to be modified to update the metadata in the cache, so that the AM knows where to find the configuration. Tested by running app on YARN and verifying in the logs only one archive is uploaded. Author: Marcelo Vanzin <van...@cloudera.com> Closes #13232 from vanzin/SPARK-15405. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a313a5ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a313a5ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a313a5ae Branch: refs/heads/master Commit: a313a5ae74ae4e7686283657ba56076222317595 Parents: 695d9a0 Author: Marcelo Vanzin <van...@cloudera.com> Authored: Tue May 24 10:26:55 2016 -0700 Committer: Marcelo Vanzin <van...@cloudera.com> Committed: Tue May 24 10:26:55 2016 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/deploy/yarn/Client.scala | 9 +++++---- .../org/apache/spark/deploy/yarn/ClientSuite.scala | 12 ++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a313a5ae/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala ---------------------------------------------------------------------- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 3f6d7b2..a12391d 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -591,10 +591,11 @@ private[spark] class Client( copyFileToRemote(destDir, localConfArchive, replication, force = true, destName = Some(LOCALIZED_CONF_ARCHIVE)) - val (_, confLocalizedPath) = distribute(createConfArchive().toURI().getPath(), - resType = LocalResourceType.ARCHIVE, - destName = Some(LOCALIZED_CONF_DIR)) - require(confLocalizedPath != null) + // Manually add the config archive to the cache manager so that the AM is launched with + // the proper files set up. + distCacheMgr.addResource( + remoteFs, hadoopConf, remoteConfArchivePath, localResources, LocalResourceType.ARCHIVE, + LOCALIZED_CONF_DIR, statCache, appMasterOnly = false) // Clear the cache-related entries from the configuration to avoid them polluting the // UI's environment page. This works for client mode; for cluster mode, this is handled http://git-wip-us.apache.org/repos/asf/spark/blob/a313a5ae/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala ---------------------------------------------------------------------- diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index a408c48..0a4f291 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -144,9 +144,16 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll .set(SPARK_JARS, Seq(SPARK)) .set("spark.yarn.dist.jars", ADDED) val client = createClient(sparkConf, args = Array("--jar", USER)) + doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), + any(classOf[Path]), anyShort(), anyBoolean(), any()) val tempDir = Utils.createTempDir() try { + // Because we mocked "copyFileToRemote" above to avoid having to create fake local files, + // we need to create a fake config archive in the temp dir to avoid having + // prepareLocalResources throw an exception. + new FileOutputStream(new File(tempDir, LOCALIZED_CONF_ARCHIVE)).close() + client.prepareLocalResources(new Path(tempDir.getAbsolutePath()), Nil) sparkConf.get(APP_JAR) should be (Some(USER)) @@ -384,10 +391,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll conf: Configuration = new Configuration(), args: Array[String] = Array()): Client = { val clientArgs = new ClientArguments(args) - val client = spy(new Client(clientArgs, conf, sparkConf)) - doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), - any(classOf[Path]), anyShort(), anyBoolean(), any()) - client + spy(new Client(clientArgs, conf, sparkConf)) } private def classpath(client: Client): Array[String] = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org