mridulm commented on code in PR #42357:
URL: https://github.com/apache/spark/pull/42357#discussion_r1329512411


##########
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:
##########
@@ -533,9 +536,12 @@ private[spark] class Client(
     // If preload is enabled, preload the statCache with the files in the 
directories
     val statCache = if (statCachePreloadEnabled) {
       // Consider only following configurations, as they involve the 
distribution of multiple files
-      val files = sparkConf.get(SPARK_JARS).orNull ++ 
sparkConf.get(JARS_TO_DISTRIBUTE) ++
-        sparkConf.get(FILES_TO_DISTRIBUTE) ++ 
sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++
-        sparkConf.get(PY_FILES) ++ pySparkArchives
+      var files = sparkConf.get(JARS_TO_DISTRIBUTE) ++ 
sparkConf.get(FILES_TO_DISTRIBUTE) ++
+        sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++ sparkConf.get(PY_FILES) ++ 
pySparkArchives
+      if (!sparkConf.get(SPARK_JARS).isEmpty) {

Review Comment:
   Why this `if` condition ? we can directly added to `files` in previous line 
itself ?



##########
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:
##########
@@ -494,11 +494,14 @@ private[spark] class Client(
       fsLookup: URI => FileSystem = FileSystem.get(_, hadoopConf)): 
HashMap[URI, FileStatus] = {
     val statCache = HashMap[URI, FileStatus]()
     directoriesToBePreloaded(files).foreach { case (dir: URI, filesInDir: 
HashSet[String]) =>
-      fsLookup(dir).listStatus(new Path(dir)).filter(_.isFile()).
-        filter(f => filesInDir.contains(f.getPath.getName)).foreach { 
fileStatus =>
-          val uri = fileStatus.getPath.toUri
+      fsLookup(dir).listStatus(new Path(dir), new PathFilter() {
+        override def accept(path: Path): Boolean = 
filesInDir.contains(path.getName)
+      }).filter(_.isFile()).foreach { fileStatus =>
+        val uri = fileStatus.getPath.toUri
+        if (uri != null) {

Review Comment:
   IIRC `uri` cant be `null` - why was this condition added ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to