[ https://issues.apache.org/jira/browse/SPARK-26644?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Takeshi Yamamuro updated SPARK-26644: ------------------------------------- Description: To make Spark users understand SQL metrics more, it would be nice to explain about them in documents (See https://github.com/apache/spark/pull/23551#issuecomment-454561158). This is a list of the current SQL metrics I roughly checked; {code:java} - Aggregate/Join ./execution/aggregate/HashAggregateExec.scala:63: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/aggregate/HashAggregateExec.scala:64: "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"), ./execution/aggregate/HashAggregateExec.scala:65: "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"), ./execution/aggregate/HashAggregateExec.scala:66: "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time"), ./execution/aggregate/HashAggregateExec.scala:67: "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe")) ./execution/aggregate/ObjectHashAggregateExec.scala:79: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/aggregate/ObjectHashAggregateExec.scala:80: "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time") ./execution/aggregate/SortAggregateExec.scala:53: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/BroadcastHashJoinExec.scala:50: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/BroadcastNestedLoopJoinExec.scala:40: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/CartesianProductExec.scala:67: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/ShuffledHashJoinExec.scala:43: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/joins/ShuffledHashJoinExec.scala:44: "buildDataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size of build side"), ./execution/joins/ShuffledHashJoinExec.scala:45: "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map")) ./execution/joins/SortMergeJoinExec.scala:46: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) - Scan/Write ./execution/ColumnarBatchScan.scala:38: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/ColumnarBatchScan.scala:39: "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time")) ./execution/DataSourceScanExec.scala:87: Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/DataSourceScanExec.scala:321: Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/DataSourceScanExec.scala:322: "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files"), ./execution/DataSourceScanExec.scala:323: "metadataTime" -> SQLMetrics.createMetric(sparkContext, "metadata time"), ./execution/DataSourceScanExec.scala:324: "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time")) ./execution/LocalTableScanExec.scala:37: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/HiveTableScanExec.scala:68: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/datasources/BasicWriteStatsTracker.scala:175: NUM_FILES_KEY -> SQLMetrics.createMetric(sparkContext, "number of written files"), ./execution/datasources/BasicWriteStatsTracker.scala:176: NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createMetric(sparkContext, "bytes of written output"), ./execution/datasources/BasicWriteStatsTracker.scala:177: NUM_OUTPUT_ROWS_KEY -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/datasources/BasicWriteStatsTracker.scala:178: NUM_PARTS_KEY -> SQLMetrics.createMetric(sparkContext, "number of dynamic part") - Streaming ./execution/streaming/statefulOperators.scala:72: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/streaming/statefulOperators.scala:79: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/streaming/statefulOperators.scala:80: "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"), ./execution/streaming/statefulOperators.scala:81: "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"), ./execution/streaming/statefulOperators.scala:82: "allUpdatesTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to update rows"), ./execution/streaming/statefulOperators.scala:83: "allRemovalsTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to remove rows"), ./execution/streaming/statefulOperators.scala:84: "commitTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "time to commit changes"), ./execution/streaming/statefulOperators.scala:85: "stateMemory" -> SQLMetrics.createSizeMetric(sparkContext, "memory used by state") ./execution/streaming/statefulOperators.scala:127: name -> SQLMetrics.createMetric(sparkContext, desc) ./execution/streaming/statefulOperators.scala:129: name -> SQLMetrics.createSizeMetric(sparkContext, desc) ./execution/streaming/statefulOperators.scala:131: name -> SQLMetrics.createTimingMetric(sparkContext, desc) - Shuffles ./execution/exchange/BroadcastExchangeExec.scala:47: "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"), ./execution/exchange/BroadcastExchangeExec.scala:48: "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"), ./execution/exchange/BroadcastExchangeExec.scala:49: "buildTime" -> SQLMetrics.createMetric(sparkContext, "time to build (ms)"), ./execution/exchange/BroadcastExchangeExec.scala:50: "broadcastTime" -> SQLMetrics.createMetric(sparkContext, "time to broadcast (ms)")) ./execution/exchange/ShuffleExchangeExec.scala:55: "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size") ./execution/metric/SQLShuffleMetricsReporter.scala:91: REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks read"), ./execution/metric/SQLShuffleMetricsReporter.scala:92: LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks read"), ./execution/metric/SQLShuffleMetricsReporter.scala:93: REMOTE_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "remote bytes read"), ./execution/metric/SQLShuffleMetricsReporter.scala:94: REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes read to disk"), ./execution/metric/SQLShuffleMetricsReporter.scala:95: LOCAL_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "local bytes read"), ./execution/metric/SQLShuffleMetricsReporter.scala:96: FETCH_WAIT_TIME -> SQLMetrics.createTimingMetric(sc, "fetch wait time"), ./execution/metric/SQLShuffleMetricsReporter.scala:97: RECORDS_READ -> SQLMetrics.createMetric(sc, "records read")) ./execution/metric/SQLShuffleMetricsReporter.scala:147: SQLMetrics.createSizeMetric(sc, "shuffle bytes written"), ./execution/metric/SQLShuffleMetricsReporter.scala:149: SQLMetrics.createMetric(sc, "shuffle records written"), ./execution/metric/SQLShuffleMetricsReporter.scala:151: SQLMetrics.createNanoTimingMetric(sc, "shuffle write time")) - Other oprators ./execution/basicPhysicalOperators.scala:118: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:250: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:363: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:666: "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"), ./execution/basicPhysicalOperators.scala:667: "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)")) ./execution/SortExec.scala:58: "sortTime" -> SQLMetrics.createTimingMetric(sparkContext, "sort time"), ./execution/SortExec.scala:59: "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"), ./execution/SortExec.scala:60: "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size")) ./execution/ExistingRDD.scala:64: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/ExistingRDD.scala:145: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/ExpandExec.scala:44: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/GenerateExec.scala:69: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/WholeStageCodegenExec.scala:620: "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext, {code} was: To make Spark users understand SQL metrics more, it would be nice to explain about them in documents. This is a list of the current SQL metrics I roughly checked; {code:java} - Aggregate/Join ./execution/aggregate/HashAggregateExec.scala:63: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/aggregate/HashAggregateExec.scala:64: "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"), ./execution/aggregate/HashAggregateExec.scala:65: "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"), ./execution/aggregate/HashAggregateExec.scala:66: "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time"), ./execution/aggregate/HashAggregateExec.scala:67: "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe")) ./execution/aggregate/ObjectHashAggregateExec.scala:79: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/aggregate/ObjectHashAggregateExec.scala:80: "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "aggregate time") ./execution/aggregate/SortAggregateExec.scala:53: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/BroadcastHashJoinExec.scala:50: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/BroadcastNestedLoopJoinExec.scala:40: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/CartesianProductExec.scala:67: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/joins/ShuffledHashJoinExec.scala:43: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/joins/ShuffledHashJoinExec.scala:44: "buildDataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size of build side"), ./execution/joins/ShuffledHashJoinExec.scala:45: "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build hash map")) ./execution/joins/SortMergeJoinExec.scala:46: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) - Scan/Write ./execution/ColumnarBatchScan.scala:38: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/ColumnarBatchScan.scala:39: "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time")) ./execution/DataSourceScanExec.scala:87: Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/DataSourceScanExec.scala:321: Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/DataSourceScanExec.scala:322: "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files"), ./execution/DataSourceScanExec.scala:323: "metadataTime" -> SQLMetrics.createMetric(sparkContext, "metadata time"), ./execution/DataSourceScanExec.scala:324: "scanTime" -> SQLMetrics.createTimingMetric(sparkContext, "scan time")) ./execution/LocalTableScanExec.scala:37: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/HiveTableScanExec.scala:68: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/datasources/BasicWriteStatsTracker.scala:175: NUM_FILES_KEY -> SQLMetrics.createMetric(sparkContext, "number of written files"), ./execution/datasources/BasicWriteStatsTracker.scala:176: NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createMetric(sparkContext, "bytes of written output"), ./execution/datasources/BasicWriteStatsTracker.scala:177: NUM_OUTPUT_ROWS_KEY -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/datasources/BasicWriteStatsTracker.scala:178: NUM_PARTS_KEY -> SQLMetrics.createMetric(sparkContext, "number of dynamic part") - Streaming ./execution/streaming/statefulOperators.scala:72: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/streaming/statefulOperators.scala:79: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), ./execution/streaming/statefulOperators.scala:80: "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"), ./execution/streaming/statefulOperators.scala:81: "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"), ./execution/streaming/statefulOperators.scala:82: "allUpdatesTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to update rows"), ./execution/streaming/statefulOperators.scala:83: "allRemovalsTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "total time to remove rows"), ./execution/streaming/statefulOperators.scala:84: "commitTimeMs" -> SQLMetrics.createTimingMetric(sparkContext, "time to commit changes"), ./execution/streaming/statefulOperators.scala:85: "stateMemory" -> SQLMetrics.createSizeMetric(sparkContext, "memory used by state") ./execution/streaming/statefulOperators.scala:127: name -> SQLMetrics.createMetric(sparkContext, desc) ./execution/streaming/statefulOperators.scala:129: name -> SQLMetrics.createSizeMetric(sparkContext, desc) ./execution/streaming/statefulOperators.scala:131: name -> SQLMetrics.createTimingMetric(sparkContext, desc) - Shuffles ./execution/exchange/BroadcastExchangeExec.scala:47: "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"), ./execution/exchange/BroadcastExchangeExec.scala:48: "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"), ./execution/exchange/BroadcastExchangeExec.scala:49: "buildTime" -> SQLMetrics.createMetric(sparkContext, "time to build (ms)"), ./execution/exchange/BroadcastExchangeExec.scala:50: "broadcastTime" -> SQLMetrics.createMetric(sparkContext, "time to broadcast (ms)")) ./execution/exchange/ShuffleExchangeExec.scala:55: "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size") ./execution/metric/SQLShuffleMetricsReporter.scala:91: REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks read"), ./execution/metric/SQLShuffleMetricsReporter.scala:92: LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks read"), ./execution/metric/SQLShuffleMetricsReporter.scala:93: REMOTE_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "remote bytes read"), ./execution/metric/SQLShuffleMetricsReporter.scala:94: REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes read to disk"), ./execution/metric/SQLShuffleMetricsReporter.scala:95: LOCAL_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "local bytes read"), ./execution/metric/SQLShuffleMetricsReporter.scala:96: FETCH_WAIT_TIME -> SQLMetrics.createTimingMetric(sc, "fetch wait time"), ./execution/metric/SQLShuffleMetricsReporter.scala:97: RECORDS_READ -> SQLMetrics.createMetric(sc, "records read")) ./execution/metric/SQLShuffleMetricsReporter.scala:147: SQLMetrics.createSizeMetric(sc, "shuffle bytes written"), ./execution/metric/SQLShuffleMetricsReporter.scala:149: SQLMetrics.createMetric(sc, "shuffle records written"), ./execution/metric/SQLShuffleMetricsReporter.scala:151: SQLMetrics.createNanoTimingMetric(sc, "shuffle write time")) - Other oprators ./execution/basicPhysicalOperators.scala:118: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:250: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:363: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/basicPhysicalOperators.scala:666: "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"), ./execution/basicPhysicalOperators.scala:667: "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)")) ./execution/SortExec.scala:58: "sortTime" -> SQLMetrics.createTimingMetric(sparkContext, "sort time"), ./execution/SortExec.scala:59: "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"), ./execution/SortExec.scala:60: "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size")) ./execution/ExistingRDD.scala:64: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/ExistingRDD.scala:145: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/ExpandExec.scala:44: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/GenerateExec.scala:69: "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ./execution/WholeStageCodegenExec.scala:620: "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext, {code} > Add documents for SQL metrics > ----------------------------- > > Key: SPARK-26644 > URL: https://issues.apache.org/jira/browse/SPARK-26644 > Project: Spark > Issue Type: Documentation > Components: SQL > Affects Versions: 2.4.0 > Reporter: Takeshi Yamamuro > Priority: Major > > To make Spark users understand SQL metrics more, it would be nice to explain > about them in documents (See > https://github.com/apache/spark/pull/23551#issuecomment-454561158). > This is a list of the current SQL metrics I roughly checked; > {code:java} > - Aggregate/Join > ./execution/aggregate/HashAggregateExec.scala:63: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/aggregate/HashAggregateExec.scala:64: "peakMemory" -> > SQLMetrics.createSizeMetric(sparkContext, "peak memory"), > ./execution/aggregate/HashAggregateExec.scala:65: "spillSize" -> > SQLMetrics.createSizeMetric(sparkContext, "spill size"), > ./execution/aggregate/HashAggregateExec.scala:66: "aggTime" -> > SQLMetrics.createTimingMetric(sparkContext, "aggregate time"), > ./execution/aggregate/HashAggregateExec.scala:67: "avgHashProbe" -> > SQLMetrics.createAverageMetric(sparkContext, "avg hash probe")) > ./execution/aggregate/ObjectHashAggregateExec.scala:79: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/aggregate/ObjectHashAggregateExec.scala:80: "aggTime" -> > SQLMetrics.createTimingMetric(sparkContext, "aggregate time") > ./execution/aggregate/SortAggregateExec.scala:53: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/joins/BroadcastHashJoinExec.scala:50: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/joins/BroadcastNestedLoopJoinExec.scala:40: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/joins/CartesianProductExec.scala:67: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/joins/ShuffledHashJoinExec.scala:43: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/joins/ShuffledHashJoinExec.scala:44: "buildDataSize" -> > SQLMetrics.createSizeMetric(sparkContext, "data size of build side"), > ./execution/joins/ShuffledHashJoinExec.scala:45: "buildTime" -> > SQLMetrics.createTimingMetric(sparkContext, "time to build hash map")) > ./execution/joins/SortMergeJoinExec.scala:46: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > - Scan/Write > ./execution/ColumnarBatchScan.scala:38: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/ColumnarBatchScan.scala:39: "scanTime" -> > SQLMetrics.createTimingMetric(sparkContext, "scan time")) > ./execution/DataSourceScanExec.scala:87: Map("numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/DataSourceScanExec.scala:321: Map("numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/DataSourceScanExec.scala:322: "numFiles" -> > SQLMetrics.createMetric(sparkContext, "number of files"), > ./execution/DataSourceScanExec.scala:323: "metadataTime" -> > SQLMetrics.createMetric(sparkContext, "metadata time"), > ./execution/DataSourceScanExec.scala:324: "scanTime" -> > SQLMetrics.createTimingMetric(sparkContext, "scan time")) > ./execution/LocalTableScanExec.scala:37: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/HiveTableScanExec.scala:68: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/datasources/BasicWriteStatsTracker.scala:175: NUM_FILES_KEY > -> SQLMetrics.createMetric(sparkContext, "number of written files"), > ./execution/datasources/BasicWriteStatsTracker.scala:176: > NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createMetric(sparkContext, "bytes of > written output"), > ./execution/datasources/BasicWriteStatsTracker.scala:177: > NUM_OUTPUT_ROWS_KEY -> SQLMetrics.createMetric(sparkContext, "number of > output rows"), > ./execution/datasources/BasicWriteStatsTracker.scala:178: NUM_PARTS_KEY > -> SQLMetrics.createMetric(sparkContext, "number of dynamic part") > - Streaming > ./execution/streaming/statefulOperators.scala:72: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/streaming/statefulOperators.scala:79: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows"), > ./execution/streaming/statefulOperators.scala:80: "numTotalStateRows" -> > SQLMetrics.createMetric(sparkContext, "number of total state rows"), > ./execution/streaming/statefulOperators.scala:81: "numUpdatedStateRows" -> > SQLMetrics.createMetric(sparkContext, "number of updated state rows"), > ./execution/streaming/statefulOperators.scala:82: "allUpdatesTimeMs" -> > SQLMetrics.createTimingMetric(sparkContext, "total time to update rows"), > ./execution/streaming/statefulOperators.scala:83: "allRemovalsTimeMs" -> > SQLMetrics.createTimingMetric(sparkContext, "total time to remove rows"), > ./execution/streaming/statefulOperators.scala:84: "commitTimeMs" -> > SQLMetrics.createTimingMetric(sparkContext, "time to commit changes"), > ./execution/streaming/statefulOperators.scala:85: "stateMemory" -> > SQLMetrics.createSizeMetric(sparkContext, "memory used by state") > ./execution/streaming/statefulOperators.scala:127: name -> > SQLMetrics.createMetric(sparkContext, desc) > ./execution/streaming/statefulOperators.scala:129: name -> > SQLMetrics.createSizeMetric(sparkContext, desc) > ./execution/streaming/statefulOperators.scala:131: name -> > SQLMetrics.createTimingMetric(sparkContext, desc) > - Shuffles > ./execution/exchange/BroadcastExchangeExec.scala:47: "dataSize" -> > SQLMetrics.createMetric(sparkContext, "data size (bytes)"), > ./execution/exchange/BroadcastExchangeExec.scala:48: "collectTime" -> > SQLMetrics.createMetric(sparkContext, "time to collect (ms)"), > ./execution/exchange/BroadcastExchangeExec.scala:49: "buildTime" -> > SQLMetrics.createMetric(sparkContext, "time to build (ms)"), > ./execution/exchange/BroadcastExchangeExec.scala:50: "broadcastTime" -> > SQLMetrics.createMetric(sparkContext, "time to broadcast (ms)")) > ./execution/exchange/ShuffleExchangeExec.scala:55: "dataSize" -> > SQLMetrics.createSizeMetric(sparkContext, "data size") > ./execution/metric/SQLShuffleMetricsReporter.scala:91: > REMOTE_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote blocks read"), > ./execution/metric/SQLShuffleMetricsReporter.scala:92: > LOCAL_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local blocks read"), > ./execution/metric/SQLShuffleMetricsReporter.scala:93: REMOTE_BYTES_READ > -> SQLMetrics.createSizeMetric(sc, "remote bytes read"), > ./execution/metric/SQLShuffleMetricsReporter.scala:94: > REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes > read to disk"), > ./execution/metric/SQLShuffleMetricsReporter.scala:95: LOCAL_BYTES_READ -> > SQLMetrics.createSizeMetric(sc, "local bytes read"), > ./execution/metric/SQLShuffleMetricsReporter.scala:96: FETCH_WAIT_TIME -> > SQLMetrics.createTimingMetric(sc, "fetch wait time"), > ./execution/metric/SQLShuffleMetricsReporter.scala:97: RECORDS_READ -> > SQLMetrics.createMetric(sc, "records read")) > ./execution/metric/SQLShuffleMetricsReporter.scala:147: > SQLMetrics.createSizeMetric(sc, "shuffle bytes written"), > ./execution/metric/SQLShuffleMetricsReporter.scala:149: > SQLMetrics.createMetric(sc, "shuffle records written"), > ./execution/metric/SQLShuffleMetricsReporter.scala:151: > SQLMetrics.createNanoTimingMetric(sc, "shuffle write time")) > - Other oprators > ./execution/basicPhysicalOperators.scala:118: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/basicPhysicalOperators.scala:250: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/basicPhysicalOperators.scala:363: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/basicPhysicalOperators.scala:666: "dataSize" -> > SQLMetrics.createMetric(sparkContext, "data size (bytes)"), > ./execution/basicPhysicalOperators.scala:667: "collectTime" -> > SQLMetrics.createMetric(sparkContext, "time to collect (ms)")) > ./execution/SortExec.scala:58: "sortTime" -> > SQLMetrics.createTimingMetric(sparkContext, "sort time"), > ./execution/SortExec.scala:59: "peakMemory" -> > SQLMetrics.createSizeMetric(sparkContext, "peak memory"), > ./execution/SortExec.scala:60: "spillSize" -> > SQLMetrics.createSizeMetric(sparkContext, "spill size")) > ./execution/ExistingRDD.scala:64: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/ExistingRDD.scala:145: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/ExpandExec.scala:44: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/GenerateExec.scala:69: "numOutputRows" -> > SQLMetrics.createMetric(sparkContext, "number of output rows")) > ./execution/WholeStageCodegenExec.scala:620: "pipelineTime" -> > SQLMetrics.createTimingMetric(sparkContext, > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org