Zhongwei Zhu created SPARK-32314:
------------------------------------

             Summary: [SHS] Add config to control whether log old format of 
stacktrace
                 Key: SPARK-32314
                 URL: https://issues.apache.org/jira/browse/SPARK-32314
             Project: Spark
          Issue Type: Improvement
          Components: Spark Core
    Affects Versions: 3.0.0
            Reporter: Zhongwei Zhu


Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack 
Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields contains 
same info, and the former one is kept for backward compatibility of spark 
history before version 1.2.0. We can remove 1st field in default setting and 
add one config to control whether log 1st field. This will help reduce eventlog 
size significantly when lots of task are failed due to ExceptionFailure.

 

The sample json of current format as below:

 
{noformat}
{
  "Event": "SparkListenerTaskEnd",
  "Stage ID": 1237,
  "Stage Attempt ID": 0,
  "Task Type": "ShuffleMapTask",
  "Task End Reason": {
    "Reason": "ExceptionFailure",
    "Class Name": "java.io.IOException",
    "Description": "org.apache.spark.SparkException: Failed to get 
broadcast_1405_piece10 of broadcast_1405",
    "Stack Trace": [
      {
        "Declaring Class": "org.apache.spark.util.Utils$",
        "Method Name": "tryOrIOException",
        "File Name": "Utils.scala",
        "Line Number": 1350
      },
      {
        "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
        "Method Name": "readBroadcastBlock",
        "File Name": "TorrentBroadcast.scala",
        "Line Number": 218
      },
      {
        "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
        "Method Name": "getValue",
        "File Name": "TorrentBroadcast.scala",
        "Line Number": 103
      },
      {
        "Declaring Class": "org.apache.spark.broadcast.Broadcast",
        "Method Name": "value",
        "File Name": "Broadcast.scala",
        "Line Number": 70
      },
      {
        "Declaring Class": 
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
        "Method Name": "wholestagecodegen_init_0_0$",
        "File Name": "generated.java",
        "Line Number": 466
      },
      {
        "Declaring Class": 
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
        "Method Name": "init",
        "File Name": "generated.java",
        "Line Number": 33
      },
      {
        "Declaring Class": 
"org.apache.spark.sql.execution.WholeStageCodegenExec",
        "Method Name": "$anonfun$doExecute$4",
        "File Name": "WholeStageCodegenExec.scala",
        "Line Number": 750
      },
      {
        "Declaring Class": 
"org.apache.spark.sql.execution.WholeStageCodegenExec",
        "Method Name": "$anonfun$doExecute$4$adapted",
        "File Name": "WholeStageCodegenExec.scala",
        "Line Number": 747
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "$anonfun$mapPartitionsWithIndex$2",
        "File Name": "RDD.scala",
        "Line Number": 915
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted",
        "File Name": "RDD.scala",
        "Line Number": 915
      },
      {
        "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
        "Method Name": "compute",
        "File Name": "MapPartitionsRDD.scala",
        "Line Number": 52
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "computeOrReadCheckpoint",
        "File Name": "RDD.scala",
        "Line Number": 373
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "iterator",
        "File Name": "RDD.scala",
        "Line Number": 337
      },
      {
        "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
        "Method Name": "compute",
        "File Name": "MapPartitionsRDD.scala",
        "Line Number": 52
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "computeOrReadCheckpoint",
        "File Name": "RDD.scala",
        "Line Number": 373
      },
      {
        "Declaring Class": "org.apache.spark.rdd.RDD",
        "Method Name": "iterator",
        "File Name": "RDD.scala",
        "Line Number": 337
      },
      {
        "Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor",
        "Method Name": "write",
        "File Name": "ShuffleWriteProcessor.scala",
        "Line Number": 59
      },
      {
        "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
        "Method Name": "runTask",
        "File Name": "ShuffleMapTask.scala",
        "Line Number": 99
      },
      {
        "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
        "Method Name": "runTask",
        "File Name": "ShuffleMapTask.scala",
        "Line Number": 52
      },
      {
        "Declaring Class": "org.apache.spark.scheduler.Task",
        "Method Name": "run",
        "File Name": "Task.scala",
        "Line Number": 127
      },
      {
        "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
        "Method Name": "$anonfun$run$3",
        "File Name": "Executor.scala",
        "Line Number": 464
      },
      {
        "Declaring Class": "org.apache.spark.util.Utils$",
        "Method Name": "tryWithSafeFinally",
        "File Name": "Utils.scala",
        "Line Number": 1377
      },
      {
        "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
        "Method Name": "run",
        "File Name": "Executor.scala",
        "Line Number": 467
      },
      {
        "Declaring Class": "java.util.concurrent.ThreadPoolExecutor",
        "Method Name": "runWorker",
        "File Name": "ThreadPoolExecutor.java",
        "Line Number": 1142
      },
      {
        "Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker",
        "Method Name": "run",
        "File Name": "ThreadPoolExecutor.java",
        "Line Number": 617
      },
      {
        "Declaring Class": "java.lang.Thread",
        "Method Name": "run",
        "File Name": "Thread.java",
        "Line Number": 745
      }
    ],
    "Full Stack Trace": "java.io.IOException: org.apache.spark.SparkException: 
Failed to get broadcast_1405_piece10 of broadcast_1405\r\n\tat 
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat 
org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat
 
org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat
 org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat
 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat
 
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat
 
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat
 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat
 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat
 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
 org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat 
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
 org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat 
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat
 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat
 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat
 org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat
 org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat
 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat
 java.lang.Thread.run(Thread.java:745)\r\nCaused by: 
org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of 
broadcast_1405\r\n\tat 
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat
 
scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat
 scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat 
org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat
 
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat
 scala.Option.getOrElse(Option.scala:189)\r\n\tat 
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat
 org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat 
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat
 org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25 
more\r\n",
    "Accumulator Updates": [
      {
        "ID": 108750,
        "Update": "185972",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108752,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108757,
        "Update": "32768",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108759,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108760,
        "Update": "1",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108761,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108762,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108763,
        "Update": "21283988",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108764,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      },
      {
        "ID": 108765,
        "Update": "0",
        "Internal": false,
        "Count Failed Values": true
      }
    ]
  },
  "Task Info": {
    "Task ID": 922051,
    "Index": 2030,
    "Attempt": 0,
    "Launch Time": 1593809400002,
    "Executor ID": "513",
    "Host": "BN01AP9EB5816D8",
    "Locality": "NODE_LOCAL",
    "Speculative": false,
    "Getting Result Time": 0,
    "Finish Time": 1593809585998,
    "Failed": true,
    "Killed": false,
    "Accumulables": []
  },
  "Task Executor Metrics": {
    "JVMHeapMemory": 3186191224,
    "JVMOffHeapMemory": 169078760,
    "OnHeapExecutionMemory": 131072,
    "OffHeapExecutionMemory": 0,
    "OnHeapStorageMemory": 843822088,
    "OffHeapStorageMemory": 0,
    "OnHeapUnifiedMemory": 843953160,
    "OffHeapUnifiedMemory": 0,
    "DirectPoolMemory": 47740224,
    "MappedPoolMemory": 0,
    "ProcessTreeJVMVMemory": 0,
    "ProcessTreeJVMRSSMemory": 0,
    "ProcessTreePythonVMemory": 0,
    "ProcessTreePythonRSSMemory": 0,
    "ProcessTreeOtherVMemory": 0,
    "ProcessTreeOtherRSSMemory": 0,
    "MinorGCCount": 550,
    "MinorGCTime": 28316,
    "MajorGCCount": 28,
    "MajorGCTime": 18669
  },
  "Task Metrics": {
    "Executor Deserialize Time": 0,
    "Executor Deserialize CPU Time": 0,
    "Executor Run Time": 185972,
    "Executor CPU Time": 0,
    "Peak Execution Memory": 32768,
    "Result Size": 0,
    "JVM GC Time": 0,
    "Result Serialization Time": 0,
    "Memory Bytes Spilled": 0,
    "Disk Bytes Spilled": 0,
    "Shuffle Read Metrics": {
      "Remote Blocks Fetched": 0,
      "Local Blocks Fetched": 1,
      "Fetch Wait Time": 0,
      "Remote Bytes Read": 0,
      "Remote Bytes Read To Disk": 0,
      "Local Bytes Read": 21283988,
      "Total Records Read": 0
    },
    "Shuffle Write Metrics": {
      "Shuffle Bytes Written": 0,
      "Shuffle Write Time": 0,
      "Shuffle Records Written": 0
    },
    "Input Metrics": {
      "Bytes Read": 0,
      "Records Read": 0
    },
    "Output Metrics": {
      "Bytes Written": 0,
      "Records Written": 0
    },
    "Updated Blocks": []
  }
}
{noformat}


 

 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to