[ https://issues.apache.org/jira/browse/SPARK-23155?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16726287#comment-16726287 ]
ASF GitHub Bot commented on SPARK-23155: ---------------------------------------- vanzin closed pull request #20326: [SPARK-23155][DEPLOY] log.server.url links in SHS URL: https://github.com/apache/spark/pull/20326 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala index 974697890dd03..79a81d3c74713 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala @@ -17,6 +17,7 @@ package org.apache.spark.status.api.v1 import java.io.OutputStream +import java.net.URI import java.util.{List => JList} import java.util.zip.ZipOutputStream import javax.ws.rs._ @@ -49,7 +50,7 @@ private[v1] class AbstractApplicationResource extends BaseAppResource { @GET @Path("executors") - def executorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(true)) + def executorList(): Seq[ExecutorSummary] = fetchExecutors(true) @GET @Path("executors/{executorId}/threads") @@ -76,7 +77,7 @@ private[v1] class AbstractApplicationResource extends BaseAppResource { @GET @Path("allexecutors") - def allExecutorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(false)) + def allExecutorList(): Seq[ExecutorSummary] = fetchExecutors(false) @Path("stages") def stages(): Class[StagesResource] = classOf[StagesResource] @@ -160,6 +161,62 @@ private[v1] class AbstractApplicationResource extends BaseAppResource { classOf[OneApplicationAttemptResource] } + private def fetchExecutors(activeOnly: Boolean): Seq[ExecutorSummary] = { + withUI(ui => { + val tmpExecutorList = ui.store.executorList(activeOnly) + ui.yarnLogServerUrl.map(lurl => + tmpExecutorList.map(withYarnLogServerLogs(toYarnLogServerUrl(lurl, ui.nmRpcPort))) + ).getOrElse(tmpExecutorList) + }) + } + + private def toYarnLogServerUrl(logServerUrl: String, nmPort: Int)(nmLogUrl: String): String = { + val containerSuffixPos = nmLogUrl.indexOf("container_") + if (containerSuffixPos >= 0) { + val nodeId = URI.create(nmLogUrl).getHost + ":" + nmPort + val containerSuffix = nmLogUrl.substring(containerSuffixPos) + val containerEndPos = containerSuffix.indexOf("/") + if (containerEndPos >= 0) { + val container = containerSuffix.substring(0, containerEndPos) + s"$logServerUrl/$nodeId/$container/$containerSuffix" + } else { + nmLogUrl + } + } else { + nmLogUrl + } + } + + private def withYarnLogServerLogs( + logRewrite: String => String)( + info: ExecutorSummary): ExecutorSummary = { + new ExecutorSummary( + id = info.id, + hostPort = info.hostPort, + isActive = info.isActive, + rddBlocks = info.rddBlocks, + memoryUsed = info.memoryUsed, + diskUsed = info.diskUsed, + totalCores = info.totalCores, + maxTasks = info.maxTasks, + activeTasks = info.activeTasks, + failedTasks = info.failedTasks, + completedTasks = info.completedTasks, + totalTasks = info.totalTasks, + totalDuration = info.totalDuration, + totalGCTime = info.totalGCTime, + totalInputBytes = info.totalInputBytes, + totalShuffleRead = info.totalShuffleRead, + totalShuffleWrite = info.totalShuffleWrite, + isBlacklisted = info.isBlacklisted, + maxMemory = info.maxMemory, + addTime = info.addTime, + removeTime = info.removeTime, + removeReason = info.removeReason, + executorLogs = info.executorLogs.mapValues(logRewrite), + memoryMetrics = info.memoryMetrics + ) + } } private[v1] class OneApplicationResource extends AbstractApplicationResource { diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index b44ac0ea1febc..8acfe7161ff51 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -17,11 +17,12 @@ package org.apache.spark.ui -import java.util.{Date, List => JList, ServiceLoader} +import java.util.Date -import scala.collection.JavaConverters._ +import org.apache.hadoop.yarn.conf.YarnConfiguration -import org.apache.spark.{JobExecutionStatus, SecurityManager, SparkConf, SparkContext} +import org.apache.spark.{SecurityManager, SparkConf, SparkContext} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging import org.apache.spark.scheduler._ import org.apache.spark.status.AppStatusStore @@ -31,7 +32,6 @@ import org.apache.spark.ui.env.EnvironmentTab import org.apache.spark.ui.exec.ExecutorsTab import org.apache.spark.ui.jobs.{JobsTab, StagesTab} import org.apache.spark.ui.storage.StorageTab -import org.apache.spark.util.Utils /** * Top level user interface for a Spark application. @@ -52,6 +52,22 @@ private[spark] class SparkUI private ( val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false) + private val yarnConf = SparkHadoopUtil.get.newConfiguration(conf) + private val portReg = "^.*:([0-9]+)$".r + private[spark] val nmRpcPort = Option(yarnConf.get(YarnConfiguration.NM_ADDRESS)) + .map { case portReg(port) => port.toInt } + .getOrElse(0) + + private val isHistoryUI = sc.isEmpty + private val useAggregatedLogs = isHistoryUI && nmRpcPort > 0 && + yarnConf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false) + + private[spark] val yarnLogServerUrl = if (useAggregatedLogs) { + Option(yarnConf.get(YarnConfiguration.YARN_LOG_SERVER_URL)) + } else { + None + } + var appId: String = _ private var streamingJobProgressListener: Option[SparkListener] = None ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > YARN-aggregated executor/driver logs appear unavailable when NM is down > ----------------------------------------------------------------------- > > Key: SPARK-23155 > URL: https://issues.apache.org/jira/browse/SPARK-23155 > Project: Spark > Issue Type: Improvement > Components: Deploy > Affects Versions: 2.2.1 > Reporter: Gera Shegalov > Priority: Major > > Unlike MapReduce JobHistory Server, Spark history server isn't rewriting > container log URL's to point to the aggregated yarn.log.server.url location > and relies on the NodeManager webUI to trigger a redirect. This fails when > the NM is down. Note that NM may be down permanently after decommissioning in > traditional environments or when used in a cloud environment such as AWS EMR > where either worker nodes are taken away with autoscale, the whole cluster is > used to run a single job. -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org