[jira] [Updated] (KYLIN-3537) Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
[ https://issues.apache.org/jira/browse/KYLIN-3537?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Alexander updated KYLIN-3537: - Attachment: KYLIN-3537.master.001.patch > Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0 > > > Key: KYLIN-3537 > URL: https://issues.apache.org/jira/browse/KYLIN-3537 > Project: Kylin > Issue Type: Bug > Environment: HDP3.0 >Reporter: Lijun Cao >Assignee: Lijun Cao >Priority: Major > Attachments: KYLIN-3537.master.001.patch > > > *The log on Yarn:* > 18/09/05 03:34:00 INFO scheduler.DAGScheduler: Job 0 failed: > saveAsNewAPIHadoopDataset at SparkCubeHFile.java:227, took 13.734642 s > 18/09/05 03:34:00 ERROR yarn.ApplicationMaster: User class threw exception: > java.lang.RuntimeException: error execute > org.apache.kylin.storage.hbase.steps.SparkCubeHFile > java.lang.RuntimeException: error execute > org.apache.kylin.storage.hbase.steps.SparkCubeHFile > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42) > at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:636) > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in > stage 1.0 (TID 14, ignite02.com, executor 2): java.lang.NoClassDefFoundError: > org/apache/hadoop/hbase/io/hfile/HFileWriterImpl > at > org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209) > at > org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1928) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1941) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1961) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.spark.rdd.PairRDDFunct
[jira] [Updated] (KYLIN-3537) Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
[ https://issues.apache.org/jira/browse/KYLIN-3537?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Shaofeng SHI updated KYLIN-3537: Fix Version/s: v2.6.1 > Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0 > > > Key: KYLIN-3537 > URL: https://issues.apache.org/jira/browse/KYLIN-3537 > Project: Kylin > Issue Type: Bug > Environment: HDP3.0 >Reporter: Lijun Cao >Assignee: Lijun Cao >Priority: Major > Fix For: v2.6.1 > > Attachments: KYLIN-3537.master.001.patch > > > *The log on Yarn:* > 18/09/05 03:34:00 INFO scheduler.DAGScheduler: Job 0 failed: > saveAsNewAPIHadoopDataset at SparkCubeHFile.java:227, took 13.734642 s > 18/09/05 03:34:00 ERROR yarn.ApplicationMaster: User class threw exception: > java.lang.RuntimeException: error execute > org.apache.kylin.storage.hbase.steps.SparkCubeHFile > java.lang.RuntimeException: error execute > org.apache.kylin.storage.hbase.steps.SparkCubeHFile > at > org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42) > at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:636) > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in > stage 1.0 (TID 14, ignite02.com, executor 2): java.lang.NoClassDefFoundError: > org/apache/hadoop/hbase/io/hfile/HFileWriterImpl > at > org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209) > at > org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1928) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1941) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1961) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) > at > org.apache.sp
[jira] [Updated] (KYLIN-3537) Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
[ https://issues.apache.org/jira/browse/KYLIN-3537?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lijun Cao updated KYLIN-3537: - Description: *The log on Yarn:* 18/09/05 03:34:00 INFO scheduler.DAGScheduler: Job 0 failed: saveAsNewAPIHadoopDataset at SparkCubeHFile.java:227, took 13.734642 s 18/09/05 03:34:00 ERROR yarn.ApplicationMaster: User class threw exception: java.lang.RuntimeException: error execute org.apache.kylin.storage.hbase.steps.SparkCubeHFile java.lang.RuntimeException: error execute org.apache.kylin.storage.hbase.steps.SparkCubeHFile at org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42) at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:636) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 14, ignite02.com, executor 2): java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/io/hfile/HFileWriterImpl at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209) at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1928) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1941) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1961) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) at org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopDataset(JavaPairRDD.scala:831) at org.apache.kylin.storage.hba
[jira] [Updated] (KYLIN-3537) Use Spark to build Cube on Yarn faild at Setp8 on HDP3.0
[ https://issues.apache.org/jira/browse/KYLIN-3537?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lijun Cao updated KYLIN-3537: - Description: *The log on Yarn:* 18/09/05 03:34:00 INFO scheduler.DAGScheduler: Job 0 failed: saveAsNewAPIHadoopDataset at SparkCubeHFile.java:227, took 13.734642 s 18/09/05 03:34:00 ERROR yarn.ApplicationMaster: User class threw exception: java.lang.RuntimeException: error execute org.apache.kylin.storage.hbase.steps.SparkCubeHFile java.lang.RuntimeException: error execute org.apache.kylin.storage.hbase.steps.SparkCubeHFile at org.apache.kylin.common.util.AbstractApplication.execute(AbstractApplication.java:42) at org.apache.kylin.common.util.SparkEntry.main(SparkEntry.java:44) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:636) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 14, ignite02.com, executor 2): java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/io/hfile/HFileWriterImpl at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.createRecordWriter(HFileOutputFormat2.java:209) at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.getRecordWriter(HFileOutputFormat2.java:181) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1102) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1928) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1941) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1961) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1158) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1085) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1085) at org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopDataset(JavaPairRDD.scala:831) at org.apache.kylin.storage.hba