[jira] [Updated] (HIVE-18030) HCatalog can't be used with Pig on Spark
[ https://issues.apache.org/jira/browse/HIVE-18030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adam Szita updated HIVE-18030: -- Description: When using Pig on Spark in cluster mode, all queries containing HCatalog access are failing: {code} 2017-11-03 12:39:19,268 [dispatcher-event-loop-19] INFO org.apache.spark.storage.BlockManagerInfo - Added broadcast_6_piece0 in memory on <>:<> (size: 83.0 KB, free: 408.5 MB) 2017-11-03 12:39:19,277 [task-result-getter-0] WARN org.apache.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID 0, <>, executor 2): java.lang.NullPointerException at org.apache.hadoop.security.Credentials.addAll(Credentials.java:401) at org.apache.hadoop.security.Credentials.addAll(Credentials.java:388) at org.apache.hive.hcatalog.pig.HCatLoader.setLocation(HCatLoader.java:128) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.mergeSplitSpecificConf(PigInputFormat.java:147) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat$RecordReaderFactory.(PigInputFormat.java:115) at org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark$SparkRecordReaderFactory.(PigInputFormatSpark.java:126) at org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark.createRecordReader(PigInputFormatSpark.java:70) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:180) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:179) at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134) at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} was: When using Pig on Spark in cluster mode, all queries containing HCatalog access are failing: {code} 2017-11-03 12:39:19,268 [dispatcher-event-loop-19] INFO org.apache.spark.storage.BlockManagerInfo - Added broadcast_6_piece0 in memory on <>:<> (size: 83.0 KB, free: 408.5 MB) 2017-11-03 12:39:19,277 [task-result-getter-0] WARN org.apache.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID 0, vc0918.halxg.cloudera.com, executor 2): java.lang.NullPointerException at org.apache.hadoop.security.Credentials.addAll(Credentials.java:401) at org.apache.hadoop.security.Credentials.addAll(Credentials.java:388) at org.apache.hive.hcatalog.pig.HCatLoader.setLocation(HCatLoader.java:128) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.mergeSplitSpecificConf(PigInputFormat.java:147) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat$RecordReaderFactory.(PigInputFormat.java:115) at org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark$SparkRecordReaderFactory.(PigInputFormatSpark.java:126) at org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark.createRecordReader(PigInputFormatSpark.java:70) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:180) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:179) at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134) at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at or
[jira] [Updated] (HIVE-18030) HCatalog can't be used with Pig on Spark
[ https://issues.apache.org/jira/browse/HIVE-18030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adam Szita updated HIVE-18030: -- Attachment: HIVE-18030.0.patch > HCatalog can't be used with Pig on Spark > > > Key: HIVE-18030 > URL: https://issues.apache.org/jira/browse/HIVE-18030 > Project: Hive > Issue Type: Bug > Components: HCatalog >Reporter: Adam Szita >Assignee: Adam Szita > Attachments: HIVE-18030.0.patch > > > When using Pig on Spark in cluster mode, all queries containing HCatalog > access are failing: > {code} > 2017-11-03 12:39:19,268 [dispatcher-event-loop-19] INFO > org.apache.spark.storage.BlockManagerInfo - Added broadcast_6_piece0 in > memory on <>:<> (size: 83.0 KB, free: 408.5 > MB) > 2017-11-03 12:39:19,277 [task-result-getter-0] WARN > org.apache.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID > 0, <>, executor 2): java.lang.NullPointerException > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:401) > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:388) > at > org.apache.hive.hcatalog.pig.HCatLoader.setLocation(HCatLoader.java:128) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.mergeSplitSpecificConf(PigInputFormat.java:147) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat$RecordReaderFactory.(PigInputFormat.java:115) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark$SparkRecordReaderFactory.(PigInputFormatSpark.java:126) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark.createRecordReader(PigInputFormatSpark.java:70) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:180) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:179) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Updated] (HIVE-18030) HCatalog can't be used with Pig on Spark
[ https://issues.apache.org/jira/browse/HIVE-18030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adam Szita updated HIVE-18030: -- Status: Patch Available (was: Open) > HCatalog can't be used with Pig on Spark > > > Key: HIVE-18030 > URL: https://issues.apache.org/jira/browse/HIVE-18030 > Project: Hive > Issue Type: Bug > Components: HCatalog >Reporter: Adam Szita >Assignee: Adam Szita > Attachments: HIVE-18030.0.patch > > > When using Pig on Spark in cluster mode, all queries containing HCatalog > access are failing: > {code} > 2017-11-03 12:39:19,268 [dispatcher-event-loop-19] INFO > org.apache.spark.storage.BlockManagerInfo - Added broadcast_6_piece0 in > memory on <>:<> (size: 83.0 KB, free: 408.5 > MB) > 2017-11-03 12:39:19,277 [task-result-getter-0] WARN > org.apache.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID > 0, <>, executor 2): java.lang.NullPointerException > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:401) > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:388) > at > org.apache.hive.hcatalog.pig.HCatLoader.setLocation(HCatLoader.java:128) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.mergeSplitSpecificConf(PigInputFormat.java:147) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat$RecordReaderFactory.(PigInputFormat.java:115) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark$SparkRecordReaderFactory.(PigInputFormatSpark.java:126) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark.createRecordReader(PigInputFormatSpark.java:70) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:180) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:179) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)
[jira] [Updated] (HIVE-18030) HCatalog can't be used with Pig on Spark
[ https://issues.apache.org/jira/browse/HIVE-18030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adam Szita updated HIVE-18030: -- Resolution: Won't Fix Status: Resolved (was: Patch Available) > HCatalog can't be used with Pig on Spark > > > Key: HIVE-18030 > URL: https://issues.apache.org/jira/browse/HIVE-18030 > Project: Hive > Issue Type: Bug > Components: HCatalog >Reporter: Adam Szita >Assignee: Adam Szita > Attachments: HIVE-18030.0.patch > > > When using Pig on Spark in cluster mode, all queries containing HCatalog > access are failing: > {code} > 2017-11-03 12:39:19,268 [dispatcher-event-loop-19] INFO > org.apache.spark.storage.BlockManagerInfo - Added broadcast_6_piece0 in > memory on <>:<> (size: 83.0 KB, free: 408.5 > MB) > 2017-11-03 12:39:19,277 [task-result-getter-0] WARN > org.apache.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID > 0, <>, executor 2): java.lang.NullPointerException > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:401) > at org.apache.hadoop.security.Credentials.addAll(Credentials.java:388) > at > org.apache.hive.hcatalog.pig.HCatLoader.setLocation(HCatLoader.java:128) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.mergeSplitSpecificConf(PigInputFormat.java:147) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat$RecordReaderFactory.(PigInputFormat.java:115) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark$SparkRecordReaderFactory.(PigInputFormatSpark.java:126) > at > org.apache.pig.backend.hadoop.executionengine.spark.running.PigInputFormatSpark.createRecordReader(PigInputFormatSpark.java:70) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.liftedTree1$1(NewHadoopRDD.scala:180) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.(NewHadoopRDD.scala:179) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:134) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:69) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)