This is an automated email from the ASF dual-hosted git repository. irashid pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 387ce89 [SPARK-27324][DOC][CORE] Document configurations related to executor metrics and modify a configuration 387ce89 is described below commit 387ce89a0631f1a4c6668b90ff2a7bbcf11919cd Author: Wing Yew Poon <wyp...@cloudera.com> AuthorDate: Fri Jan 31 14:28:02 2020 -0600 [SPARK-27324][DOC][CORE] Document configurations related to executor metrics and modify a configuration ### What changes were proposed in this pull request? Add a section to the Configuration page to document configurations for executor metrics. At the same time, rename spark.eventLog.logStageExecutorProcessTreeMetrics.enabled to spark.executor.processTreeMetrics.enabled and make it independent of spark.eventLog.logStageExecutorMetrics.enabled. ### Why are the changes needed? Executor metrics are new in Spark 3.0. They lack documentation. Memory metrics as a whole are always collected, but the ones obtained from the process tree have to be optionally enabled. Making this depend on a single configuration makes for more intuitive behavior. Given this, the configuration property is renamed to better reflect its meaning. ### Does this PR introduce any user-facing change? Yes, only in that the configurations are all new to 3.0. ### How was this patch tested? Not necessary. Closes #27329 from wypoon/SPARK-27324. Authored-by: Wing Yew Poon <wyp...@cloudera.com> Signed-off-by: Imran Rashid <iras...@cloudera.com> --- .../spark/executor/ExecutorMetricsSource.scala | 3 +- .../spark/executor/ProcfsMetricsGetter.scala | 8 ++--- .../org/apache/spark/internal/config/package.scala | 17 +++++++--- .../spark/deploy/history/HistoryServerSuite.scala | 2 +- docs/configuration.md | 37 ++++++++++++++++++++++ docs/monitoring.md | 20 ++++++------ 6 files changed, 65 insertions(+), 22 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala index b052e43..14645f7 100644 --- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala @@ -32,8 +32,7 @@ import org.apache.spark.metrics.source.Source * spark.executor.metrics.pollingInterval=<interval in ms>. * (2) Procfs metrics are gathered all in one-go and only conditionally: * if the /proc filesystem exists - * and spark.eventLog.logStageExecutorProcessTreeMetrics.enabled=true - * and spark.eventLog.logStageExecutorMetrics.enabled=true. + * and spark.executor.processTreeMetrics.enabled=true. */ private[spark] class ExecutorMetricsSource extends Source { diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala index 0d5dcfb4..80ef757 100644 --- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala +++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala @@ -58,11 +58,9 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L logWarning("Exception checking for procfs dir", ioe) false } - val shouldLogStageExecutorMetrics = - SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS) - val shouldLogStageExecutorProcessTreeMetrics = - SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS) - procDirExists.get && shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics + val shouldPollProcessTreeMetrics = + SparkEnv.get.conf.get(config.EXECUTOR_PROCESS_TREE_METRICS_ENABLED) + procDirExists.get && shouldPollProcessTreeMetrics } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 40b05cf..e68368f 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -148,11 +148,8 @@ package object config { private[spark] val EVENT_LOG_STAGE_EXECUTOR_METRICS = ConfigBuilder("spark.eventLog.logStageExecutorMetrics.enabled") - .booleanConf - .createWithDefault(false) - - private[spark] val EVENT_LOG_PROCESS_TREE_METRICS = - ConfigBuilder("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled") + .doc("Whether to write per-stage peaks of executor metrics (for each executor) " + + "to the event log.") .booleanConf .createWithDefault(false) @@ -215,8 +212,18 @@ package object config { private[spark] val EXECUTOR_HEARTBEAT_MAX_FAILURES = ConfigBuilder("spark.executor.heartbeat.maxFailures").internal().intConf.createWithDefault(60) + private[spark] val EXECUTOR_PROCESS_TREE_METRICS_ENABLED = + ConfigBuilder("spark.executor.processTreeMetrics.enabled") + .doc("Whether to collect process tree metrics (from the /proc filesystem) when collecting " + + "executor metrics.") + .booleanConf + .createWithDefault(false) + private[spark] val EXECUTOR_METRICS_POLLING_INTERVAL = ConfigBuilder("spark.executor.metrics.pollingInterval") + .doc("How often to collect executor metrics (in milliseconds). " + + "If 0, the polling is done on executor heartbeats. " + + "If positive, the polling is done at this interval.") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("0") diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 06554e5..206db0f 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -84,7 +84,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers .set(IS_TESTING, true) .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath()) .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true) - .set(EVENT_LOG_PROCESS_TREE_METRICS, true) + .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true) conf.setAll(extraConf) provider = new FsHistoryProvider(conf) provider.checkForLogs() diff --git a/docs/configuration.md b/docs/configuration.md index 559c5cd..8164ed4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1624,6 +1624,43 @@ Apart from these, the following properties are also available, and may be useful </tr> </table> +### Executor Metrics + +<table class="table"> +<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> +<tr> + <td><code>spark.eventLog.logStageExecutorMetrics.enabled</code></td> + <td>false</td> + <td> + Whether to write per-stage peaks of executor metrics (for each executor) to the event log. + <br /> + <em>Note:</em> The metrics are polled (collected) and sent in the executor heartbeat, + and this is always done; this configuration is only to determine if aggregated metric peaks + are written to the event log. + </td> +</tr> + <td><code>spark.executor.processTreeMetrics.enabled</code></td> + <td>false</td> + <td> + Whether to collect process tree metrics (from the /proc filesystem) when collecting + executor metrics. + <br /> + <em>Note:</em> The process tree metrics are collected only if the /proc filesystem + exists. + </td> +<tr> + <td><code>spark.executor.metrics.pollingInterval</code></td> + <td>0</td> + <td> + How often to collect executor metrics (in milliseconds). + <br /> + If 0, the polling is done on executor heartbeats (thus at the heartbeat interval, + specified by <code>spark.executor.heartbeatInterval</code>). + If positive, the polling is done at this interval. + </td> +</tr> +</table> + ### Networking <table class="table"> diff --git a/docs/monitoring.md b/docs/monitoring.md index 31bf1eb..fc95f6a 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -659,7 +659,10 @@ A list of the available metrics, with a short description: ### Executor Metrics -Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC information. Metrics `peakExecutorMetrics.*` are only enabled if `spark.eventLog.logStageExecutorMetrics.enabled` is true. +Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC information. +Executor metric values and their measured peak values per executor are exposed via the REST API at the end point `/applications/[app-id]/executors`. +In addition, aggregated per-stage peak values of the executor metrics are written to the event log if `spark.eventLog.logStageExecutorMetrics.enabled` is true. +Executor metrics are also exposed via the Spark metrics system based on the Dropwizard metrics library. A list of the available metrics, with a short description: <table class="table"> @@ -755,7 +758,7 @@ A list of the available metrics, with a short description: </tr> <tr> <td> .ProcessTreeJVMVMemory</td> - <td>Virtual memory size in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + <td>Virtual memory size in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .ProcessTreeJVMRSSMemory</td> @@ -763,23 +766,23 @@ A list of the available metrics, with a short description: in real memory. This is just the pages which count toward text, data, or stack space. This does not include pages which have not been demand-loaded in, - or which are swapped out. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + or which are swapped out. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .ProcessTreePythonVMemory</td> - <td>Virtual memory size for Python in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + <td>Virtual memory size for Python in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .ProcessTreePythonRSSMemory</td> - <td>Resident Set Size for Python. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + <td>Resident Set Size for Python. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .ProcessTreeOtherVMemory</td> - <td>Virtual memory size for other kind of process in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + <td>Virtual memory size for other kind of process in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .ProcessTreeOtherRSSMemory</td> - <td>Resident Set Size for other kind of process. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td> + <td>Resident Set Size for other kind of process. Enabled if spark.executor.processTreeMetrics.enabled is true.</td> </tr> <tr> <td> .MinorGCCount</td> @@ -1102,8 +1105,7 @@ when running in local mode. - ProcessTreeOtherRSSMemory - **note:** "ProcessTree*" metrics are collected only under certain conditions. The conditions are the logical AND of the following: `/proc` filesystem exists, - `spark.eventLog.logStageExecutorProcessTreeMetrics.enabled=true`, - `spark.eventLog.logStageExecutorMetrics.enabled=true`. + `spark.executor.processTreeMetrics.enabled=true`. "ProcessTree*" metrics report 0 when those conditions are not met. - namespace=JVMCPU --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org