mridulm commented on code in PR #42357: URL: https://github.com/apache/spark/pull/42357#discussion_r1329512411
########## resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala: ########## @@ -533,9 +536,12 @@ private[spark] class Client( // If preload is enabled, preload the statCache with the files in the directories val statCache = if (statCachePreloadEnabled) { // Consider only following configurations, as they involve the distribution of multiple files - val files = sparkConf.get(SPARK_JARS).orNull ++ sparkConf.get(JARS_TO_DISTRIBUTE) ++ - sparkConf.get(FILES_TO_DISTRIBUTE) ++ sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++ - sparkConf.get(PY_FILES) ++ pySparkArchives + var files = sparkConf.get(JARS_TO_DISTRIBUTE) ++ sparkConf.get(FILES_TO_DISTRIBUTE) ++ + sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++ sparkConf.get(PY_FILES) ++ pySparkArchives + if (!sparkConf.get(SPARK_JARS).isEmpty) { Review Comment: Why this `if` condition ? we can directly added to `files` in previous line itself ? ########## resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala: ########## @@ -494,11 +494,14 @@ private[spark] class Client( fsLookup: URI => FileSystem = FileSystem.get(_, hadoopConf)): HashMap[URI, FileStatus] = { val statCache = HashMap[URI, FileStatus]() directoriesToBePreloaded(files).foreach { case (dir: URI, filesInDir: HashSet[String]) => - fsLookup(dir).listStatus(new Path(dir)).filter(_.isFile()). - filter(f => filesInDir.contains(f.getPath.getName)).foreach { fileStatus => - val uri = fileStatus.getPath.toUri + fsLookup(dir).listStatus(new Path(dir), new PathFilter() { + override def accept(path: Path): Boolean = filesInDir.contains(path.getName) + }).filter(_.isFile()).foreach { fileStatus => + val uri = fileStatus.getPath.toUri + if (uri != null) { Review Comment: IIRC `uri` cant be `null` - why was this condition added ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org