vanzin commented on a change in pull request #23260: [SPARK-26311][YARN] New feature: custom log URL for stdout/stderr URL: https://github.com/apache/spark/pull/23260#discussion_r240401528
########## File path: resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala ########## @@ -246,13 +246,56 @@ private[yarn] class ExecutorRunnable( sys.env.get("SPARK_USER").foreach { user => val containerId = ConverterUtils.toString(c.getId) val address = c.getNodeHttpAddress - val baseUrl = s"$httpScheme$address/node/containerlogs/$containerId/$user" - env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096" - env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096" + sparkConf.get(config.CUSTOM_LOG_URL) match { + case Some(customUrl) => + val pathVariables = ExecutorRunnable.buildPathVariables(httpScheme, address, + YarnConfiguration.getClusterId(conf), containerId, user) + val envNameToFileNameMap = Map("SPARK_LOG_URL_STDERR" -> "stderr", + "SPARK_LOG_URL_STDOUT" -> "stdout") + val logUrls = ExecutorRunnable.replaceLogUrls(customUrl, pathVariables, + envNameToFileNameMap) + + logUrls.foreach { case (envName, url) => + env(envName) = url + } + case None => + val baseUrl = s"$httpScheme$address/node/containerlogs/$containerId/$user" + env("SPARK_LOG_URL_STDERR") = s"$baseUrl/stderr?start=-4096" + env("SPARK_LOG_URL_STDOUT") = s"$baseUrl/stdout?start=-4096" + } } } env } } + +private[yarn] object ExecutorRunnable { + val LOG_URL_PATTERN_HTTP_SCHEME = "{{HttpScheme}}" Review comment: These constants are only used in the methods below. Also, the methods below are only called from a single place. Seems to me you should have a single method that implements all this logic. You could also avoid this new object, for the same reasons. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org