Related question: what are good profiling tools other than watching along the 
application master with the running code? 
Are there things that can be logged during the run? If I have say 2 ways of 
accomplishing the same thing, and I want to learn about the time/memory/general 
resource blocking performance of both, what is the best way of doing that? What 
tic, toc does in Matlab, or profile on, profile report.

> On Aug 4, 2016, at 3:19 AM, Sumit Khanna <sumit.kha...@askme.in> wrote:
> 
> Am not really sure of the best practices on this , but I either consult the 
> localhost:4040/jobs/ etc 
> or better this :
> 
> val customSparkListener: CustomSparkListener = new CustomSparkListener()
> sc.addSparkListener(customSparkListener)
> class CustomSparkListener extends SparkListener {
>  override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
>   debug(s"application ended at time : ${applicationEnd.time}")
>  }
>  override def onApplicationStart(applicationStart: 
> SparkListenerApplicationStart): Unit ={
>   debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id : 
> ${applicationStart.appAttemptId}")
>   debug(s"[SPARK LISTENER DEBUGS] application Start app id : 
> ${applicationStart.appId}")
>   debug(s"[SPARK LISTENER DEBUGS] application start app name : 
> ${applicationStart.appName}")
>   debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs : 
> ${applicationStart.driverLogs}")
>   debug(s"[SPARK LISTENER DEBUGS] application start spark user : 
> ${applicationStart.sparkUser}")
>   debug(s"[SPARK LISTENER DEBUGS] application start time : 
> ${applicationStart.time}")
>  }
>  override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}")
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}")
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}")
>  }
>  override  def onExecutorRemoved(executorRemoved: 
> SparkListenerExecutorRemoved): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] the executor removed Id : 
> ${executorRemoved.executorId}")
>   debug(s"[SPARK LISTENER DEBUGS] the executor removed reason : 
> ${executorRemoved.reason}")
>   debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time : 
> ${executorRemoved.time}")
>  }
> 
>  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}")
>   debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}")
>   debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}")
>  }
>  override def onJobStart(jobStart: SparkListenerJobStart) {
>   debug(s"[SPARK LISTENER DEBUGS] Job started with properties 
> ${jobStart.properties}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with job id 
> ${jobStart.jobId.toString}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids 
> ${jobStart.stageIds.toString()}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with stages 
> ${jobStart.stageInfos.size} : $jobStart")
>  }
> 
>  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] Stage ${stageCompleted.stageInfo.stageId} 
> completed with ${stageCompleted.stageInfo.numTasks} tasks.")
>   debug(s"[SPARK LISTENER DEBUGS] Stage details : 
> ${stageCompleted.stageInfo.details.toString}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage completion time : 
> ${stageCompleted.stageInfo.completionTime}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage details : 
> ${stageCompleted.stageInfo.rddInfos.toString()}")
>  }
>  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] Stage properties : 
> ${stageSubmitted.properties}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos : 
> ${stageSubmitted.stageInfo.rddInfos.toString()}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage submission Time : 
> ${stageSubmitted.stageInfo.submissionTime}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage submission details : 
> ${stageSubmitted.stageInfo.details.toString()}")
>  }
>  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
>   debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}")
>   debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}")
>   debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}")
>   debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}")
>   debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id : 
> ${taskEnd.stageAttemptId}")
>   debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
>  }
>  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] stage Attempt id : 
> ${taskStart.stageAttemptId}")
>   debug(s"[SPARK LISTENER DEBUGS] stage Id : ${taskStart.stageId}")
>   debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskStart.taskInfo}")
>  }
>  override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = 
> {
>   debug(s"[SPARK LISTENER DEBUGS] the unpersist RDD id : 
> ${unpersistRDD.rddId}")
>  }
> }
> and then usually check for logs. P.S :I am running it as a jar.
> Thanks,
> 
> On Thu, Aug 4, 2016 at 6:46 AM, Ted Yu <yuzhih...@gmail.com 
> <mailto:yuzhih...@gmail.com>> wrote:
> Have you looked at:
> https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application
>  
> <https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application>
> 
> If you use Mesos:
> https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging
>  
> <https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging>
> 
> On Wed, Aug 3, 2016 at 6:13 PM, glen <cng...@126.com <mailto:cng...@126.com>> 
> wrote:
> Any tool like gdb? Which support break point at some line or some function?
> 
> 
> 
> 
> 
> 
> 

Reply via email to