Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/19130#discussion_r139575943 --- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala --- @@ -367,6 +368,54 @@ object SparkSubmit extends CommandLineUtils with Logging { }.orNull } + // When running in YARN, for some remote resources with scheme: + // 1. Hadoop FileSystem doesn't support them. + // 2. We explicitly bypass Hadoop FileSystem with "spark.yarn.dist.forceDownloadSchemes". + // We will download them to local disk prior to add to YARN's distributed cache. + // For yarn client mode, since we already download them with above code, so we only need to + // figure out the local path and replace the remote one. + if (clusterManager == YARN) { + sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF, "unused") + val secMgr = new SecurityManager(sparkConf) + val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES) + + def shouldDownload(scheme: String): Boolean = { + val isFsAvailable = () => { --- End diff -- why make it a function? Can't we just inline it?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org