spark git commit: [SPARK-24446][YARN] Properly quote library path for YARN.

vanzin Wed, 27 Jun 2018 10:58:32 -0700

Repository: spark
Updated Branches:
  refs/heads/master 6a0b77a55 -> 78ecb6d45



[SPARK-24446][YARN] Properly quote library path for YARN.

Because the way YARN executes commands via bash -c, everything needs
to be quoted so that the whole command is fully contained inside a
bash string and is interpreted correctly when the string is read by
bash. This is a bit different than the quoting done when executing
things as if typing in a bash shell.

Tweaked unit tests to exercise the bad behavior, which would cause
existing tests to time out without the fix. Also tested on a real
cluster, verifying the shell script created by YARN to run the
container.

Author: Marcelo Vanzin <van...@cloudera.com>

Closes #21476 from vanzin/SPARK-24446.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78ecb6d4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78ecb6d4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78ecb6d4

Branch: refs/heads/master
Commit: 78ecb6d457970b136a2e0e0e27d170c84ea28eac
Parents: 6a0b77a
Author: Marcelo Vanzin <van...@cloudera.com>
Authored: Wed Jun 27 10:57:29 2018 -0700
Committer: Marcelo Vanzin <van...@cloudera.com>
Committed: Wed Jun 27 10:57:29 2018 -0700

----------------------------------------------------------------------
 .../org/apache/spark/deploy/yarn/Client.scala   | 22 ++++++++++++++++++--
 .../spark/deploy/yarn/ExecutorRunnable.scala    | 11 +++++-----
 .../deploy/yarn/BaseYarnClusterSuite.scala      |  9 ++++++++
 3 files changed, 34 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/78ecb6d4/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7225ff0..793d012 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -899,7 +899,8 @@ private[spark] class Client(
       val libraryPaths = Seq(sparkConf.get(DRIVER_LIBRARY_PATH),
         sys.props.get("spark.driver.libraryPath")).flatten
       if (libraryPaths.nonEmpty) {
-        prefixEnv = Some(getClusterPath(sparkConf, 
Utils.libraryPathEnvPrefix(libraryPaths)))
+        prefixEnv = 
Some(createLibraryPathPrefix(libraryPaths.mkString(File.pathSeparator),
+          sparkConf))
       }
       if (sparkConf.get(AM_JAVA_OPTIONS).isDefined) {
         logWarning(s"${AM_JAVA_OPTIONS.key} will not take effect in cluster 
mode")
@@ -921,7 +922,7 @@ private[spark] class Client(
           .map(YarnSparkHadoopUtil.escapeForShell)
       }
       sparkConf.get(AM_LIBRARY_PATH).foreach { paths =>
-        prefixEnv = Some(getClusterPath(sparkConf, 
Utils.libraryPathEnvPrefix(Seq(paths))))
+        prefixEnv = Some(createLibraryPathPrefix(paths, sparkConf))
       }
     }
 
@@ -1485,6 +1486,23 @@ private object Client extends Logging {
     YarnAppReport(report.getYarnApplicationState(), 
report.getFinalApplicationStatus(), diagsOpt)
   }
 
+  /**
+   * Create a properly quoted and escaped library path string to be added as a 
prefix to the command
+   * executed by YARN. This is different from normal quoting / escaping due to 
YARN executing the
+   * command through "bash -c".
+   */
+  def createLibraryPathPrefix(libpath: String, conf: SparkConf): String = {
+    val cmdPrefix = if (Utils.isWindows) {
+      Utils.libraryPathEnvPrefix(Seq(libpath))
+    } else {
+      val envName = Utils.libraryPathEnvName
+      // For quotes, escape both the quote and the escape character when 
encoding in the command
+      // string.
+      val quoted = libpath.replace("\"", "\\\\\\\"")
+      envName + "=\\\"" + quoted + File.pathSeparator + "$" + envName + "\\\""
+    }
+    getClusterPath(conf, cmdPrefix)
+  }
 }
 
 private[spark] class YarnClusterApplication extends SparkApplication {

http://git-wip-us.apache.org/repos/asf/spark/blob/78ecb6d4/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
----------------------------------------------------------------------
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index a2a18cd..49a0b93 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -131,10 +131,6 @@ private[yarn] class ExecutorRunnable(
     // Extra options for the JVM
     val javaOpts = ListBuffer[String]()
 
-    // Set the environment variable through a command prefix
-    // to append to the existing value of the variable
-    var prefixEnv: Option[String] = None
-
     // Set the JVM memory
     val executorMemoryString = executorMemory + "m"
     javaOpts += "-Xmx" + executorMemoryString
@@ -144,8 +140,11 @@ private[yarn] class ExecutorRunnable(
       val subsOpt = Utils.substituteAppNExecIds(opts, appId, executorId)
       javaOpts ++= 
Utils.splitCommandString(subsOpt).map(YarnSparkHadoopUtil.escapeForShell)
     }
-    sparkConf.get(EXECUTOR_LIBRARY_PATH).foreach { p =>
-      prefixEnv = Some(Client.getClusterPath(sparkConf, 
Utils.libraryPathEnvPrefix(Seq(p))))
+
+    // Set the library path through a command prefix to append to the existing 
value of the
+    // env variable.
+    val prefixEnv = sparkConf.get(EXECUTOR_LIBRARY_PATH).map { libPath =>
+      Client.createLibraryPathPrefix(libPath, sparkConf)
     }
 
     javaOpts += "-Djava.io.tmpdir=" +

http://git-wip-us.apache.org/repos/asf/spark/blob/78ecb6d4/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
----------------------------------------------------------------------
diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index ac67f21..b0abcc9 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -36,6 +36,7 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.launcher._
 import org.apache.spark.util.Utils
 
@@ -216,6 +217,14 @@ abstract class BaseYarnClusterSuite
     props.setProperty("spark.driver.extraJavaOptions", "-Dfoo=\"one two 
three\"")
     props.setProperty("spark.executor.extraJavaOptions", "-Dfoo=\"one two 
three\"")
 
+    // SPARK-24446: make sure special characters in the library path do not 
break containers.
+    if (!Utils.isWindows) {
+      val libPath = """/tmp/does not 
exist:$PWD/tmp:/tmp/quote":/tmp/ampersand&"""
+      props.setProperty(AM_LIBRARY_PATH.key, libPath)
+      props.setProperty(DRIVER_LIBRARY_PATH.key, libPath)
+      props.setProperty(EXECUTOR_LIBRARY_PATH.key, libPath)
+    }
+
     yarnCluster.getConfig().asScala.foreach { e =>
       props.setProperty("spark.hadoop." + e.getKey(), e.getValue())
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24446][YARN] Properly quote library path for YARN.

Reply via email to