HeartSaVioR commented on a change in pull request #23260: [SPARK-26311][YARN] 
New feature: custom log URL for stdout/stderr
URL: https://github.com/apache/spark/pull/23260#discussion_r240446659
 
 

 ##########
 File path: 
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
 ##########
 @@ -246,13 +246,56 @@ private[yarn] class ExecutorRunnable(
       sys.env.get("SPARK_USER").foreach { user =>
         val containerId = ConverterUtils.toString(c.getId)
         val address = c.getNodeHttpAddress
-        val baseUrl = 
s"$httpScheme$address/node/containerlogs/$containerId/$user"
 
-        env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096"
-        env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096"
+        sparkConf.get(config.CUSTOM_LOG_URL) match {
+          case Some(customUrl) =>
+            val pathVariables = 
ExecutorRunnable.buildPathVariables(httpScheme, address,
+              YarnConfiguration.getClusterId(conf), containerId, user)
+            val envNameToFileNameMap = Map("SPARK_LOG_URL_STDERR" -> "stderr",
+              "SPARK_LOG_URL_STDOUT" -> "stdout")
+            val logUrls = ExecutorRunnable.replaceLogUrls(customUrl, 
pathVariables,
+              envNameToFileNameMap)
+
+            logUrls.foreach { case (envName, url) =>
+              env(envName) = url
+            }
+          case None =>
+            val baseUrl = 
s"$httpScheme$address/node/containerlogs/$containerId/$user"
+            env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096"
+            env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096"
+          }
       }
     }
 
     env
   }
 }
+
+private[yarn] object ExecutorRunnable {
+  val LOG_URL_PATTERN_HTTP_SCHEME = "{{HttpScheme}}"
 
 Review comment:
   Ah OK. I'm in favor of avoiding to use string constant directly, but not 
strong opinion on it. Will address.
   
   And yes I can put them in a single method, but placing a new method into 
class will bring unnecessary burden to the test code, since ExecutorRunnable 
receives lots of parameters to be instantiated.
   
   If we want to add an end-to-end test (instantiating YARN cluster and running 
executors) we still need to instantiate ExecutorRunnable (I think we are 
already covering it from here [1]), but if we just want to make sure the logic 
works properly, we might want to keep this as new object and add a test against 
the object to avoid instantiating ExecutorRunnable. WDYT?
   
   1. 
https://github.com/apache/spark/blob/05cf81e6de3d61ddb0af81cd179665693f23351f/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala#L442-L461

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to