Ron Hu created SPARK-34488: ------------------------------ Summary: Support task Metrics Distributions and executor Metrics Distributions in the REST API entry for a specified stage Key: SPARK-34488 URL: https://issues.apache.org/jira/browse/SPARK-34488 Project: Spark Issue Type: Sub-task Components: Spark Core Affects Versions: 3.0.2 Reporter: Ron Hu
For a specific stage, it is useful to show the task metrics in percentile distribution. This information can help users know whether or not there is a skew/bottleneck among tasks in a given stage. Here is an example, "taskMetricsDistributions" : { "quantiles" : [ 0.0, 0.25, 0.5, 0.75, 1.0 ], "executorDeserializeTime" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ], "executorDeserializeCpuTime" : [ 1195000.0, 1195000.0, 1195000.0, 1195000.0, 1195000.0 ], "executorRunTime" : [ 3.0, 3.0, 3.0, 3.0, 3.0 ], "executorCpuTime" : [ 3210000.0, 3210000.0, 3210000.0, 3210000.0, 3210000.0 ], "resultSize" : [ 2648.0, 2648.0, 2648.0, 2648.0, 2648.0 ], "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "gettingResultTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "schedulerDelay" : [ 2.0, 2.0, 2.0, 2.0, 2.0 ], "peakExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "inputMetrics" : { "bytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "recordsRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] }, "outputMetrics" : { "bytesWritten" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "recordsWritten" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] }, "shuffleReadMetrics" : { "readBytes" : [ 236.0, 236.0, 236.0, 236.0, 236.0 ], "readRecords" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "localBlocksFetched" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], "fetchWaitTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "totalBlocksFetched" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ] }, "shuffleWriteMetrics" : { "writeBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "writeRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "writeTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] } } Similarly, it is useful to show the executor metrics in percentile distribution for a specific stage. Here is an example: "executorMetricsDistributions" : { "quantiles" : [ 0.0, 0.25, 0.5, 0.75, 1.0 ], "taskTime" : [ 6.0, 6.0, 6.0, 6.0, 6.0 ], "failedTasks" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "succeededTasks" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ], "killedTasks" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "inputBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "inputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "outputBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "outputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "shuffleRead" : [ 236.0, 236.0, 236.0, 236.0, 236.0 ], "shuffleReadRecords" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], "shuffleWrite" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "shuffleWriteRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "peakMemoryMetrics" : { "JVMHeapMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "JVMOffHeapMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OnHeapExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OffHeapExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OnHeapStorageMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OffHeapStorageMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OnHeapUnifiedMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "OffHeapUnifiedMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "DirectPoolMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MappedPoolMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreeJVMVMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreeJVMRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreePythonVMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreePythonRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreeOtherVMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreeOtherRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MinorGCCount" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MinorGCTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MajorGCCount" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MajorGCTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] } } We use withSummaries query parameter in the REST API for a specific stage as: applications/<application_id>/<application_attempt/stages/<stage_id>/<stage_attempt>?withSummaries=[true|false] -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org