Zhongwei Zhu created SPARK-32314: ------------------------------------ Summary: [SHS] Add config to control whether log old format of stacktrace Key: SPARK-32314 URL: https://issues.apache.org/jira/browse/SPARK-32314 Project: Spark Issue Type: Improvement Components: Spark Core Affects Versions: 3.0.0 Reporter: Zhongwei Zhu
Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields contains same info, and the former one is kept for backward compatibility of spark history before version 1.2.0. We can remove 1st field in default setting and add one config to control whether log 1st field. This will help reduce eventlog size significantly when lots of task are failed due to ExceptionFailure. The sample json of current format as below: {noformat} { "Event": "SparkListenerTaskEnd", "Stage ID": 1237, "Stage Attempt ID": 0, "Task Type": "ShuffleMapTask", "Task End Reason": { "Reason": "ExceptionFailure", "Class Name": "java.io.IOException", "Description": "org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of broadcast_1405", "Stack Trace": [ { "Declaring Class": "org.apache.spark.util.Utils$", "Method Name": "tryOrIOException", "File Name": "Utils.scala", "Line Number": 1350 }, { "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast", "Method Name": "readBroadcastBlock", "File Name": "TorrentBroadcast.scala", "Line Number": 218 }, { "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast", "Method Name": "getValue", "File Name": "TorrentBroadcast.scala", "Line Number": 103 }, { "Declaring Class": "org.apache.spark.broadcast.Broadcast", "Method Name": "value", "File Name": "Broadcast.scala", "Line Number": 70 }, { "Declaring Class": "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9", "Method Name": "wholestagecodegen_init_0_0$", "File Name": "generated.java", "Line Number": 466 }, { "Declaring Class": "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9", "Method Name": "init", "File Name": "generated.java", "Line Number": 33 }, { "Declaring Class": "org.apache.spark.sql.execution.WholeStageCodegenExec", "Method Name": "$anonfun$doExecute$4", "File Name": "WholeStageCodegenExec.scala", "Line Number": 750 }, { "Declaring Class": "org.apache.spark.sql.execution.WholeStageCodegenExec", "Method Name": "$anonfun$doExecute$4$adapted", "File Name": "WholeStageCodegenExec.scala", "Line Number": 747 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "$anonfun$mapPartitionsWithIndex$2", "File Name": "RDD.scala", "Line Number": 915 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted", "File Name": "RDD.scala", "Line Number": 915 }, { "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD", "Method Name": "compute", "File Name": "MapPartitionsRDD.scala", "Line Number": 52 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "computeOrReadCheckpoint", "File Name": "RDD.scala", "Line Number": 373 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "iterator", "File Name": "RDD.scala", "Line Number": 337 }, { "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD", "Method Name": "compute", "File Name": "MapPartitionsRDD.scala", "Line Number": 52 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "computeOrReadCheckpoint", "File Name": "RDD.scala", "Line Number": 373 }, { "Declaring Class": "org.apache.spark.rdd.RDD", "Method Name": "iterator", "File Name": "RDD.scala", "Line Number": 337 }, { "Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor", "Method Name": "write", "File Name": "ShuffleWriteProcessor.scala", "Line Number": 59 }, { "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask", "Method Name": "runTask", "File Name": "ShuffleMapTask.scala", "Line Number": 99 }, { "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask", "Method Name": "runTask", "File Name": "ShuffleMapTask.scala", "Line Number": 52 }, { "Declaring Class": "org.apache.spark.scheduler.Task", "Method Name": "run", "File Name": "Task.scala", "Line Number": 127 }, { "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner", "Method Name": "$anonfun$run$3", "File Name": "Executor.scala", "Line Number": 464 }, { "Declaring Class": "org.apache.spark.util.Utils$", "Method Name": "tryWithSafeFinally", "File Name": "Utils.scala", "Line Number": 1377 }, { "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner", "Method Name": "run", "File Name": "Executor.scala", "Line Number": 467 }, { "Declaring Class": "java.util.concurrent.ThreadPoolExecutor", "Method Name": "runWorker", "File Name": "ThreadPoolExecutor.java", "Line Number": 1142 }, { "Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker", "Method Name": "run", "File Name": "ThreadPoolExecutor.java", "Line Number": 617 }, { "Declaring Class": "java.lang.Thread", "Method Name": "run", "File Name": "Thread.java", "Line Number": 745 } ], "Full Stack Trace": "java.io.IOException: org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of broadcast_1405\r\n\tat org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\nCaused by: org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of broadcast_1405\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat scala.Option.getOrElse(Option.scala:189)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25 more\r\n", "Accumulator Updates": [ { "ID": 108750, "Update": "185972", "Internal": false, "Count Failed Values": true }, { "ID": 108752, "Update": "0", "Internal": false, "Count Failed Values": true }, { "ID": 108757, "Update": "32768", "Internal": false, "Count Failed Values": true }, { "ID": 108759, "Update": "0", "Internal": false, "Count Failed Values": true }, { "ID": 108760, "Update": "1", "Internal": false, "Count Failed Values": true }, { "ID": 108761, "Update": "0", "Internal": false, "Count Failed Values": true }, { "ID": 108762, "Update": "0", "Internal": false, "Count Failed Values": true }, { "ID": 108763, "Update": "21283988", "Internal": false, "Count Failed Values": true }, { "ID": 108764, "Update": "0", "Internal": false, "Count Failed Values": true }, { "ID": 108765, "Update": "0", "Internal": false, "Count Failed Values": true } ] }, "Task Info": { "Task ID": 922051, "Index": 2030, "Attempt": 0, "Launch Time": 1593809400002, "Executor ID": "513", "Host": "BN01AP9EB5816D8", "Locality": "NODE_LOCAL", "Speculative": false, "Getting Result Time": 0, "Finish Time": 1593809585998, "Failed": true, "Killed": false, "Accumulables": [] }, "Task Executor Metrics": { "JVMHeapMemory": 3186191224, "JVMOffHeapMemory": 169078760, "OnHeapExecutionMemory": 131072, "OffHeapExecutionMemory": 0, "OnHeapStorageMemory": 843822088, "OffHeapStorageMemory": 0, "OnHeapUnifiedMemory": 843953160, "OffHeapUnifiedMemory": 0, "DirectPoolMemory": 47740224, "MappedPoolMemory": 0, "ProcessTreeJVMVMemory": 0, "ProcessTreeJVMRSSMemory": 0, "ProcessTreePythonVMemory": 0, "ProcessTreePythonRSSMemory": 0, "ProcessTreeOtherVMemory": 0, "ProcessTreeOtherRSSMemory": 0, "MinorGCCount": 550, "MinorGCTime": 28316, "MajorGCCount": 28, "MajorGCTime": 18669 }, "Task Metrics": { "Executor Deserialize Time": 0, "Executor Deserialize CPU Time": 0, "Executor Run Time": 185972, "Executor CPU Time": 0, "Peak Execution Memory": 32768, "Result Size": 0, "JVM GC Time": 0, "Result Serialization Time": 0, "Memory Bytes Spilled": 0, "Disk Bytes Spilled": 0, "Shuffle Read Metrics": { "Remote Blocks Fetched": 0, "Local Blocks Fetched": 1, "Fetch Wait Time": 0, "Remote Bytes Read": 0, "Remote Bytes Read To Disk": 0, "Local Bytes Read": 21283988, "Total Records Read": 0 }, "Shuffle Write Metrics": { "Shuffle Bytes Written": 0, "Shuffle Write Time": 0, "Shuffle Records Written": 0 }, "Input Metrics": { "Bytes Read": 0, "Records Read": 0 }, "Output Metrics": { "Bytes Written": 0, "Records Written": 0 }, "Updated Blocks": [] } } {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org