Github user berngp commented on a diff in the pull request: https://github.com/apache/spark/pull/433#discussion_r11789723 --- Diff: yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala --- @@ -361,55 +361,48 @@ object ClientBase { val LOG4J_CONF_ENV_KEY: String = "SPARK_LOG4J_CONF" val LOCAL_SCHEME = "local" - // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps - def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) { - val classpathEntries = Option(conf.getStrings( - YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse( - getDefaultYarnApplicationClasspath()) - for (c <- classpathEntries) { - YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim, + def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) = { + val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf) + for (c <- classPathElementsToAdd.flatten) { + YarnSparkHadoopUtil.addToEnvironment( + env, + Environment.CLASSPATH.name, + c.trim, File.pathSeparator) } + classPathElementsToAdd + } - val mrClasspathEntries = Option(conf.getStrings( - "mapreduce.application.classpath")).getOrElse( - getDefaultMRApplicationClasspath()) - if (mrClasspathEntries != null) { - for (c <- mrClasspathEntries) { - YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim, - File.pathSeparator) - } + private def getYarnAppClasspath(conf: Configuration): Option[Array[String]] = + Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) match { + case s: Some[Array[String]] => s + case None => getDefaultYarnApplicationClasspath } - } - def getDefaultYarnApplicationClasspath(): Array[String] = { - try { - val field = classOf[MRJobConfig].getField("DEFAULT_YARN_APPLICATION_CLASSPATH") - field.get(null).asInstanceOf[Array[String]] - } catch { - case err: NoSuchFieldError => null - case err: NoSuchFieldException => null + private def getMRAppClasspath(conf: Configuration): Option[Array[String]] = + Option(conf.getStrings("mapreduce.application.classpath")) match { + case s: Some[Array[String]] => s + case None => getDefaultMRApplicationClasspath } - } + + def getDefaultYarnApplicationClasspath: Option[Array[String]] = Try[Array[String]] { + val field = classOf[YarnConfiguration].getField("DEFAULT_YARN_APPLICATION_CLASSPATH") + field.get(null).asInstanceOf[Array[String]] + }.toOption /** * In Hadoop 0.23, the MR application classpath comes with the YARN application * classpath. In Hadoop 2.0, it's an array of Strings, and in 2.2+ it's a String. * So we need to use reflection to retrieve it. */ - def getDefaultMRApplicationClasspath(): Array[String] = { - try { - val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH") - if (field.getType == classOf[String]) { - StringUtils.getStrings(field.get(null).asInstanceOf[String]) - } else { - field.get(null).asInstanceOf[Array[String]] - } - } catch { - case err: NoSuchFieldError => null - case err: NoSuchFieldException => null + def getDefaultMRApplicationClasspath: Option[Array[String]] = Try[Array[String]] { + val field = classOf[MRJobConfig].getField("DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH") + if (field.getType == classOf[String]) { + StringUtils.getStrings(field.get(null).asInstanceOf[String]).toArray + } else { + field.get(null).asInstanceOf[Array[String]] } - } + }.toOption --- End diff -- Thanks @pwendell, the easiest thing could be to avoid a longer discussion and just use the `try-catch` block but I do think that the `Try` monad is a good option. It won't swallow _fatal_ errors based on [NonFatal](https://github.com/scala/scala/blob/v2.10.3/src/library/scala/util/control/NonFatal.scala#L1) and can handle specific error recovery scenarios through `recoverWith`. What it avoids is cases where you return `null`, which overall is not a good practice, and the _bubbling_, mostly by mistake, of errors that should not disable a program/application while enabling _functional composition_. In the end, I will align with the view of the **Apache Spark Project**. Let me know and I'll make the changes without further arguments, but if we decide to use the `try-catch` block please let me know how `populateHadoopClasspath` should handle the exception: 1. _bubbling_ it up in case it is not either a `NoSuchFieldError` or a `NoSuchFieldException`. 2. ignore every type, and potentially just log warnings. Plus handling of the returned value. 1. null = empty 2. don't even assume that `null` will be returned and treat it as the original implementation did.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---