Re: how to debug spark app?

2016-08-04 Thread Ben Teeuwen
Related question: what are good profiling tools other than watching along the 
application master with the running code? 
Are there things that can be logged during the run? If I have say 2 ways of 
accomplishing the same thing, and I want to learn about the time/memory/general 
resource blocking performance of both, what is the best way of doing that? What 
tic, toc does in Matlab, or profile on, profile report.

> On Aug 4, 2016, at 3:19 AM, Sumit Khanna  wrote:
> 
> Am not really sure of the best practices on this , but I either consult the 
> localhost:4040/jobs/ etc 
> or better this :
> 
> val customSparkListener: CustomSparkListener = new CustomSparkListener()
> sc.addSparkListener(customSparkListener)
> class CustomSparkListener extends SparkListener {
>  override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
>   debug(s"application ended at time : ${applicationEnd.time}")
>  }
>  override def onApplicationStart(applicationStart: 
> SparkListenerApplicationStart): Unit ={
>   debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id : 
> ${applicationStart.appAttemptId}")
>   debug(s"[SPARK LISTENER DEBUGS] application Start app id : 
> ${applicationStart.appId}")
>   debug(s"[SPARK LISTENER DEBUGS] application start app name : 
> ${applicationStart.appName}")
>   debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs : 
> ${applicationStart.driverLogs}")
>   debug(s"[SPARK LISTENER DEBUGS] application start spark user : 
> ${applicationStart.sparkUser}")
>   debug(s"[SPARK LISTENER DEBUGS] application start time : 
> ${applicationStart.time}")
>  }
>  override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}")
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}")
>   debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}")
>  }
>  override  def onExecutorRemoved(executorRemoved: 
> SparkListenerExecutorRemoved): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] the executor removed Id : 
> ${executorRemoved.executorId}")
>   debug(s"[SPARK LISTENER DEBUGS] the executor removed reason : 
> ${executorRemoved.reason}")
>   debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time : 
> ${executorRemoved.time}")
>  }
> 
>  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}")
>   debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}")
>   debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}")
>  }
>  override def onJobStart(jobStart: SparkListenerJobStart) {
>   debug(s"[SPARK LISTENER DEBUGS] Job started with properties 
> ${jobStart.properties}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with job id 
> ${jobStart.jobId.toString}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids 
> ${jobStart.stageIds.toString()}")
>   debug(s"[SPARK LISTENER DEBUGS] Job started with stages 
> ${jobStart.stageInfos.size} : $jobStart")
>  }
> 
>  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] Stage ${stageCompleted.stageInfo.stageId} 
> completed with ${stageCompleted.stageInfo.numTasks} tasks.")
>   debug(s"[SPARK LISTENER DEBUGS] Stage details : 
> ${stageCompleted.stageInfo.details.toString}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage completion time : 
> ${stageCompleted.stageInfo.completionTime}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage details : 
> ${stageCompleted.stageInfo.rddInfos.toString()}")
>  }
>  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): 
> Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] Stage properties : 
> ${stageSubmitted.properties}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos : 
> ${stageSubmitted.stageInfo.rddInfos.toString()}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage submission Time : 
> ${stageSubmitted.stageInfo.submissionTime}")
>   debug(s"[SPARK LISTENER DEBUGS] Stage submission details : 
> ${stageSubmitted.stageInfo.details.toString()}")
>  }
>  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
>   debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}")
>   debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}")
>   debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}")
>   debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}")
>   debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id : 
> ${taskEnd.stageAttemptId}")
>   debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
>  }
>  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
>   debug(s"[SPARK LISTENER DEBUGS] stage Attempt id : 
> ${taskStart.stageAttemptId}")
>   debug(s"[SPARK 

Re: how to debug spark app?

2016-08-03 Thread Sumit Khanna
Am not really sure of the best practices on this , but I either consult the
localhost:4040/jobs/ etc
or better this :

val customSparkListener: CustomSparkListener = new CustomSparkListener()
sc.addSparkListener(customSparkListener)

class CustomSparkListener extends SparkListener {
 override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
  debug(s"application ended at time : ${applicationEnd.time}")
 }
 override def onApplicationStart(applicationStart:
SparkListenerApplicationStart): Unit ={
  debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id :
${applicationStart.appAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] application Start app id :
${applicationStart.appId}")
  debug(s"[SPARK LISTENER DEBUGS] application start app name :
${applicationStart.appName}")
  debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs :
${applicationStart.driverLogs}")
  debug(s"[SPARK LISTENER DEBUGS] application start spark user :
${applicationStart.sparkUser}")
  debug(s"[SPARK LISTENER DEBUGS] application start time :
${applicationStart.time}")
 }
 override def onExecutorAdded(executorAdded:
SparkListenerExecutorAdded): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}")
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}")
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}")
 }
 override  def onExecutorRemoved(executorRemoved:
SparkListenerExecutorRemoved): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] the executor removed Id :
${executorRemoved.executorId}")
  debug(s"[SPARK LISTENER DEBUGS] the executor removed reason :
${executorRemoved.reason}")
  debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time :
${executorRemoved.time}")
 }

 override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}")
  debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}")
  debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}")
 }
 override def onJobStart(jobStart: SparkListenerJobStart) {
  debug(s"[SPARK LISTENER DEBUGS] Job started with properties
${jobStart.properties}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with job id
${jobStart.jobId.toString}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids
${jobStart.stageIds.toString()}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with stages
${jobStart.stageInfos.size} : $jobStart")
 }

 override def onStageCompleted(stageCompleted:
SparkListenerStageCompleted): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] Stage
${stageCompleted.stageInfo.stageId} completed with
${stageCompleted.stageInfo.numTasks} tasks.")
  debug(s"[SPARK LISTENER DEBUGS] Stage details :
${stageCompleted.stageInfo.details.toString}")
  debug(s"[SPARK LISTENER DEBUGS] Stage completion time :
${stageCompleted.stageInfo.completionTime}")
  debug(s"[SPARK LISTENER DEBUGS] Stage details :
${stageCompleted.stageInfo.rddInfos.toString()}")
 }
 override def onStageSubmitted(stageSubmitted:
SparkListenerStageSubmitted): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] Stage properties :
${stageSubmitted.properties}")
  debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos :
${stageSubmitted.stageInfo.rddInfos.toString()}")
  debug(s"[SPARK LISTENER DEBUGS] Stage submission Time :
${stageSubmitted.stageInfo.submissionTime}")
  debug(s"[SPARK LISTENER DEBUGS] Stage submission details :
${stageSubmitted.stageInfo.details.toString()}")
 }
 override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
  debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}")
  debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}")
  debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}")
  debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}")
  debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id :
${taskEnd.stageAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
 }
 override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] stage Attempt id :
${taskStart.stageAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] stage Id : ${taskStart.stageId}")
  debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskStart.taskInfo}")
 }
 override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] the unpersist RDD id : ${unpersistRDD.rddId}")
 }
}

and then usually check for logs. P.S :I am running it as a jar.

Thanks,


On Thu, Aug 4, 2016 at 6:46 AM, Ted Yu  wrote:

> Have you looked at:
>
> https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application
>
> If you use Mesos:
>
> https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging
>
> On Wed, Aug 3, 2016 at 6:13 PM, glen  

Re: how to debug spark app?

2016-08-03 Thread Ted Yu
Have you looked at:
https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application

If you use Mesos:
https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging

On Wed, Aug 3, 2016 at 6:13 PM, glen  wrote:

> Any tool like gdb? Which support break point at some line or some function?
>
>
>
>
>
>


how to debug spark app?

2016-08-03 Thread glen
Any tool like gdb? Which support break point at some line or some function?