vanzin commented on a change in pull request #23260: [SPARK-26311][YARN] New 
feature: custom log URL for stdout/stderr
URL: https://github.com/apache/spark/pull/23260#discussion_r240401528
 
 

 ##########
 File path: 
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
 ##########
 @@ -246,13 +246,56 @@ private[yarn] class ExecutorRunnable(
       sys.env.get("SPARK_USER").foreach { user =>
         val containerId = ConverterUtils.toString(c.getId)
         val address = c.getNodeHttpAddress
-        val baseUrl = 
s"$httpScheme$address/node/containerlogs/$containerId/$user"
 
-        env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096"
-        env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096"
+        sparkConf.get(config.CUSTOM_LOG_URL) match {
+          case Some(customUrl) =>
+            val pathVariables = 
ExecutorRunnable.buildPathVariables(httpScheme, address,
+              YarnConfiguration.getClusterId(conf), containerId, user)
+            val envNameToFileNameMap = Map("SPARK_LOG_URL_STDERR" -> "stderr",
+              "SPARK_LOG_URL_STDOUT" -> "stdout")
+            val logUrls = ExecutorRunnable.replaceLogUrls(customUrl, 
pathVariables,
+              envNameToFileNameMap)
+
+            logUrls.foreach { case (envName, url) =>
+              env(envName) = url
+            }
+          case None =>
+            val baseUrl = 
s"$httpScheme$address/node/containerlogs/$containerId/$user"
+            env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096"
+            env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096"
+          }
       }
     }
 
     env
   }
 }
+
+private[yarn] object ExecutorRunnable {
+  val LOG_URL_PATTERN_HTTP_SCHEME = "{{HttpScheme}}"
 
 Review comment:
   These constants are only used in the methods below. Also, the methods below 
are only called from a single place.
   
   Seems to me you should have a single method that implements all this logic. 
You could also avoid this new object, for the same reasons.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to