[ https://issues.apache.org/jira/browse/HIVE-28441?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Raghav Aggarwal updated HIVE-28441: ----------------------------------- Description: Steps to reproduce (tested on hive4 docker image): {code:java} set hive.orc.splits.include.file.footer=true; set hive.fetch.task.conversion=none; CREATE TABLE tbl (id INT, name STRING) STORED AS ORC; INSERT INTO tbl VALUES (1, 'abc'); SELECT * FROM tbl;{code} Stacktrace: {code:java} at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:111) at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:58) at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:75) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: java.lang.NullPointerException at org.apache.orc.impl.BufferChunk.<init>(BufferChunk.java:41) at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:56) at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:50) at org.apache.hadoop.hive.ql.io.orc.OrcSplit.readFields(OrcSplit.java:230) at org.apache.hadoop.hive.ql.io.HiveInputFormat$HiveInputSplit.readFields(HiveInputFormat.java:223) at org.apache.hadoop.mapred.split.TezGroupedSplit.readWrappedSplit(TezGroupedSplit.java:161) at org.apache.hadoop.mapred.split.TezGroupedSplit.readFields(TezGroupedSplit.java:132) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:71) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:42) at org.apache.tez.mapreduce.hadoop.MRInputHelpers.createOldFormatSplitFromUserPayload(MRInputHelpers.java:176) at org.apache.tez.mapreduce.lib.MRInputUtils.getOldSplitDetailsFromEvent(MRInputUtils.java:132) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:693) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:664) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292) ... 16 more {code} was: Steps to reproduce (tests on hive4 docker image): {code:java} set hive.orc.splits.include.file.footer=true; set hive.fetch.task.conversion=none; CREATE TABLE tbl (id INT, name STRING) STORED AS ORC; INSERT INTO tbl VALUES (1, 'abc'); SELECT * FROM tbl;{code} Stacktrace: {code:java} at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:111) at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:58) at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:75) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: java.lang.NullPointerException at org.apache.orc.impl.BufferChunk.<init>(BufferChunk.java:41) at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:56) at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:50) at org.apache.hadoop.hive.ql.io.orc.OrcSplit.readFields(OrcSplit.java:230) at org.apache.hadoop.hive.ql.io.HiveInputFormat$HiveInputSplit.readFields(HiveInputFormat.java:223) at org.apache.hadoop.mapred.split.TezGroupedSplit.readWrappedSplit(TezGroupedSplit.java:161) at org.apache.hadoop.mapred.split.TezGroupedSplit.readFields(TezGroupedSplit.java:132) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:71) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:42) at org.apache.tez.mapreduce.hadoop.MRInputHelpers.createOldFormatSplitFromUserPayload(MRInputHelpers.java:176) at org.apache.tez.mapreduce.lib.MRInputUtils.getOldSplitDetailsFromEvent(MRInputUtils.java:132) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:693) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:664) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292) ... 16 more {code} > NPE in ORC tables when hive.orc.splits.include.file.footer is enabled > --------------------------------------------------------------------- > > Key: HIVE-28441 > URL: https://issues.apache.org/jira/browse/HIVE-28441 > Project: Hive > Issue Type: Bug > Components: ORC > Affects Versions: 4.0.0 > Reporter: Raghav Aggarwal > Assignee: Raghav Aggarwal > Priority: Major > > Steps to reproduce (tested on hive4 docker image): > {code:java} > set hive.orc.splits.include.file.footer=true; > set hive.fetch.task.conversion=none; > CREATE TABLE tbl (id INT, name STRING) STORED AS ORC; > INSERT INTO tbl VALUES (1, 'abc'); > SELECT * FROM tbl;{code} > Stacktrace: > {code:java} > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:111) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:58) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:75) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: java.lang.NullPointerException > at org.apache.orc.impl.BufferChunk.<init>(BufferChunk.java:41) > at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:56) > at org.apache.orc.impl.OrcTail.<init>(OrcTail.java:50) > at org.apache.hadoop.hive.ql.io.orc.OrcSplit.readFields(OrcSplit.java:230) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat$HiveInputSplit.readFields(HiveInputFormat.java:223) > at > org.apache.hadoop.mapred.split.TezGroupedSplit.readWrappedSplit(TezGroupedSplit.java:161) > at > org.apache.hadoop.mapred.split.TezGroupedSplit.readFields(TezGroupedSplit.java:132) > at > org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:71) > at > org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:42) > at > org.apache.tez.mapreduce.hadoop.MRInputHelpers.createOldFormatSplitFromUserPayload(MRInputHelpers.java:176) > at > org.apache.tez.mapreduce.lib.MRInputUtils.getOldSplitDetailsFromEvent(MRInputUtils.java:132) > at > org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:693) > at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:664) > at > org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) > at > org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292) > ... 16 more {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)