Am not really sure of the best practices on this , but I either consult the localhost:4040/jobs/ etc or better this :
val customSparkListener: CustomSparkListener = new CustomSparkListener() sc.addSparkListener(customSparkListener) class CustomSparkListener extends SparkListener { override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) { debug(s"application ended at time : ${applicationEnd.time}") } override def onApplicationStart(applicationStart: SparkListenerApplicationStart): Unit ={ debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id : ${applicationStart.appAttemptId}") debug(s"[SPARK LISTENER DEBUGS] application Start app id : ${applicationStart.appId}") debug(s"[SPARK LISTENER DEBUGS] application start app name : ${applicationStart.appName}") debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs : ${applicationStart.driverLogs}") debug(s"[SPARK LISTENER DEBUGS] application start spark user : ${applicationStart.sparkUser}") debug(s"[SPARK LISTENER DEBUGS] application start time : ${applicationStart.time}") } override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = { debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}") debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}") debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}") } override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = { debug(s"[SPARK LISTENER DEBUGS] the executor removed Id : ${executorRemoved.executorId}") debug(s"[SPARK LISTENER DEBUGS] the executor removed reason : ${executorRemoved.reason}") debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time : ${executorRemoved.time}") } override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}") debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}") debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}") } override def onJobStart(jobStart: SparkListenerJobStart) { debug(s"[SPARK LISTENER DEBUGS] Job started with properties ${jobStart.properties}") debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}") debug(s"[SPARK LISTENER DEBUGS] Job started with job id ${jobStart.jobId.toString}") debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids ${jobStart.stageIds.toString()}") debug(s"[SPARK LISTENER DEBUGS] Job started with stages ${jobStart.stageInfos.size} : $jobStart") } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { debug(s"[SPARK LISTENER DEBUGS] Stage ${stageCompleted.stageInfo.stageId} completed with ${stageCompleted.stageInfo.numTasks} tasks.") debug(s"[SPARK LISTENER DEBUGS] Stage details : ${stageCompleted.stageInfo.details.toString}") debug(s"[SPARK LISTENER DEBUGS] Stage completion time : ${stageCompleted.stageInfo.completionTime}") debug(s"[SPARK LISTENER DEBUGS] Stage details : ${stageCompleted.stageInfo.rddInfos.toString()}") } override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = { debug(s"[SPARK LISTENER DEBUGS] Stage properties : ${stageSubmitted.properties}") debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos : ${stageSubmitted.stageInfo.rddInfos.toString()}") debug(s"[SPARK LISTENER DEBUGS] Stage submission Time : ${stageSubmitted.stageInfo.submissionTime}") debug(s"[SPARK LISTENER DEBUGS] Stage submission details : ${stageSubmitted.stageInfo.details.toString()}") } override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}") debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}") debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}") debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}") debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id : ${taskEnd.stageAttemptId}") debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}") } override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { debug(s"[SPARK LISTENER DEBUGS] stage Attempt id : ${taskStart.stageAttemptId}") debug(s"[SPARK LISTENER DEBUGS] stage Id : ${taskStart.stageId}") debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskStart.taskInfo}") } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = { debug(s"[SPARK LISTENER DEBUGS] the unpersist RDD id : ${unpersistRDD.rddId}") } } and then usually check for logs. P.S :I am running it as a jar. Thanks, On Thu, Aug 4, 2016 at 6:46 AM, Ted Yu <yuzhih...@gmail.com> wrote: > Have you looked at: > > https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application > > If you use Mesos: > > https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging > > On Wed, Aug 3, 2016 at 6:13 PM, glen <cng...@126.com> wrote: > >> Any tool like gdb? Which support break point at some line or some >> function? >> >> >> >> >> >> >