Re: how to debug spark app?
Related question: what are good profiling tools other than watching along the application master with the running code? Are there things that can be logged during the run? If I have say 2 ways of accomplishing the same thing, and I want to learn about the time/memory/general resource blocking performance of both, what is the best way of doing that? What tic, toc does in Matlab, or profile on, profile report. > On Aug 4, 2016, at 3:19 AM, Sumit Khanna wrote: > > Am not really sure of the best practices on this , but I either consult the > localhost:4040/jobs/ etc > or better this : > > val customSparkListener: CustomSparkListener = new CustomSparkListener() > sc.addSparkListener(customSparkListener) > class CustomSparkListener extends SparkListener { > override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) { > debug(s"application ended at time : ${applicationEnd.time}") > } > override def onApplicationStart(applicationStart: > SparkListenerApplicationStart): Unit ={ > debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id : > ${applicationStart.appAttemptId}") > debug(s"[SPARK LISTENER DEBUGS] application Start app id : > ${applicationStart.appId}") > debug(s"[SPARK LISTENER DEBUGS] application start app name : > ${applicationStart.appName}") > debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs : > ${applicationStart.driverLogs}") > debug(s"[SPARK LISTENER DEBUGS] application start spark user : > ${applicationStart.sparkUser}") > debug(s"[SPARK LISTENER DEBUGS] application start time : > ${applicationStart.time}") > } > override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): > Unit = { > debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}") > debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}") > debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}") > } > override def onExecutorRemoved(executorRemoved: > SparkListenerExecutorRemoved): Unit = { > debug(s"[SPARK LISTENER DEBUGS] the executor removed Id : > ${executorRemoved.executorId}") > debug(s"[SPARK LISTENER DEBUGS] the executor removed reason : > ${executorRemoved.reason}") > debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time : > ${executorRemoved.time}") > } > > override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { > debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}") > debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}") > debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}") > } > override def onJobStart(jobStart: SparkListenerJobStart) { > debug(s"[SPARK LISTENER DEBUGS] Job started with properties > ${jobStart.properties}") > debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}") > debug(s"[SPARK LISTENER DEBUGS] Job started with job id > ${jobStart.jobId.toString}") > debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids > ${jobStart.stageIds.toString()}") > debug(s"[SPARK LISTENER DEBUGS] Job started with stages > ${jobStart.stageInfos.size} : $jobStart") > } > > override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): > Unit = { > debug(s"[SPARK LISTENER DEBUGS] Stage ${stageCompleted.stageInfo.stageId} > completed with ${stageCompleted.stageInfo.numTasks} tasks.") > debug(s"[SPARK LISTENER DEBUGS] Stage details : > ${stageCompleted.stageInfo.details.toString}") > debug(s"[SPARK LISTENER DEBUGS] Stage completion time : > ${stageCompleted.stageInfo.completionTime}") > debug(s"[SPARK LISTENER DEBUGS] Stage details : > ${stageCompleted.stageInfo.rddInfos.toString()}") > } > override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): > Unit = { > debug(s"[SPARK LISTENER DEBUGS] Stage properties : > ${stageSubmitted.properties}") > debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos : > ${stageSubmitted.stageInfo.rddInfos.toString()}") > debug(s"[SPARK LISTENER DEBUGS] Stage submission Time : > ${stageSubmitted.stageInfo.submissionTime}") > debug(s"[SPARK LISTENER DEBUGS] Stage submission details : > ${stageSubmitted.stageInfo.details.toString()}") > } > override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { > debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") > debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}") > debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}") > debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}") > debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}") > debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id : > ${taskEnd.stageAttemptId}") > debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") > } > override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { > debug(s"[SPARK LISTENER DEBUGS] stage Attempt id : > ${taskStart.stageAttemptId}") > debug(s"[SPARK LISTENER DEBUGS] stage Id : ${
Re: how to debug spark app?
Am not really sure of the best practices on this , but I either consult the localhost:4040/jobs/ etc or better this : val customSparkListener: CustomSparkListener = new CustomSparkListener() sc.addSparkListener(customSparkListener) class CustomSparkListener extends SparkListener { override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) { debug(s"application ended at time : ${applicationEnd.time}") } override def onApplicationStart(applicationStart: SparkListenerApplicationStart): Unit ={ debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id : ${applicationStart.appAttemptId}") debug(s"[SPARK LISTENER DEBUGS] application Start app id : ${applicationStart.appId}") debug(s"[SPARK LISTENER DEBUGS] application start app name : ${applicationStart.appName}") debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs : ${applicationStart.driverLogs}") debug(s"[SPARK LISTENER DEBUGS] application start spark user : ${applicationStart.sparkUser}") debug(s"[SPARK LISTENER DEBUGS] application start time : ${applicationStart.time}") } override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = { debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}") debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}") debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}") } override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = { debug(s"[SPARK LISTENER DEBUGS] the executor removed Id : ${executorRemoved.executorId}") debug(s"[SPARK LISTENER DEBUGS] the executor removed reason : ${executorRemoved.reason}") debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time : ${executorRemoved.time}") } override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}") debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}") debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}") } override def onJobStart(jobStart: SparkListenerJobStart) { debug(s"[SPARK LISTENER DEBUGS] Job started with properties ${jobStart.properties}") debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}") debug(s"[SPARK LISTENER DEBUGS] Job started with job id ${jobStart.jobId.toString}") debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids ${jobStart.stageIds.toString()}") debug(s"[SPARK LISTENER DEBUGS] Job started with stages ${jobStart.stageInfos.size} : $jobStart") } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { debug(s"[SPARK LISTENER DEBUGS] Stage ${stageCompleted.stageInfo.stageId} completed with ${stageCompleted.stageInfo.numTasks} tasks.") debug(s"[SPARK LISTENER DEBUGS] Stage details : ${stageCompleted.stageInfo.details.toString}") debug(s"[SPARK LISTENER DEBUGS] Stage completion time : ${stageCompleted.stageInfo.completionTime}") debug(s"[SPARK LISTENER DEBUGS] Stage details : ${stageCompleted.stageInfo.rddInfos.toString()}") } override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = { debug(s"[SPARK LISTENER DEBUGS] Stage properties : ${stageSubmitted.properties}") debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos : ${stageSubmitted.stageInfo.rddInfos.toString()}") debug(s"[SPARK LISTENER DEBUGS] Stage submission Time : ${stageSubmitted.stageInfo.submissionTime}") debug(s"[SPARK LISTENER DEBUGS] Stage submission details : ${stageSubmitted.stageInfo.details.toString()}") } override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}") debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}") debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}") debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}") debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id : ${taskEnd.stageAttemptId}") debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") } override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { debug(s"[SPARK LISTENER DEBUGS] stage Attempt id : ${taskStart.stageAttemptId}") debug(s"[SPARK LISTENER DEBUGS] stage Id : ${taskStart.stageId}") debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskStart.taskInfo}") } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = { debug(s"[SPARK LISTENER DEBUGS] the unpersist RDD id : ${unpersistRDD.rddId}") } } and then usually check for logs. P.S :I am running it as a jar. Thanks, On Thu, Aug 4, 2016 at 6:46 AM, Ted Yu wrote: > Have you looked at: > > https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application > > If you use Mesos: > > https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging > > On Wed, Aug 3, 2016 at 6:13 PM, glen wrote: > >> Any tool like gdb? Which
Re: how to debug spark app?
Have you looked at: https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application If you use Mesos: https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging On Wed, Aug 3, 2016 at 6:13 PM, glen wrote: > Any tool like gdb? Which support break point at some line or some function? > > > > > >
how to debug spark app?
Any tool like gdb? Which support break point at some line or some function?