[ https://issues.apache.org/jira/browse/HIVE-26318?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
László Pintér reassigned HIVE-26318: ------------------------------------ > Select on migrated iceberg table fails with NPE > ----------------------------------------------- > > Key: HIVE-26318 > URL: https://issues.apache.org/jira/browse/HIVE-26318 > Project: Hive > Issue Type: Bug > Reporter: László Pintér > Assignee: László Pintér > Priority: Major > > Enable vectorization: > {code:sql} > set hive.vectorized.execution.enabled=true; > {code} > Create a hive table with the following schema: > {code:sql} > CREATE EXTERNAL TABLE tbl_complex ( > a int, > arrayofprimitives array<string>, > arrayofarrays array<array<string>>, > arrayofmaps array<map<string, string>>, > arrayofstructs array<struct<something:string, someone:string, > somewhere:string>>, > mapofprimitives map<string, string>, > mapofarrays map<string, array<string>>, > mapofmaps map<string, map<string, string>>, > mapofstructs map<string, struct<something:string, someone:string, > somewhere:string>>, > structofprimitives struct<something:string, somewhere:string>, > structofarrays struct<names:array<string>, birthdays:array<string>>, > structofmaps struct<map1:map<string, string>, map2:map<string, string>> > ) STORED AS PARQUET" {code} > Insert some data: > {code:sql} > INSERT INTO tbl_complex VALUES ( > 1, > array('a','b','c'), > array(array('a'), array('b', 'c')), > array(map('a','b'), map('e','f')), > array(named_struct('something', 'a', 'someone', 'b', 'somewhere', > 'c'), > named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), > map('a', 'b'), > map('a', array('b','c')), > map('a', map('b','c')), > map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', > 'd')), > named_struct('something', 'a', 'somewhere', 'b'), > named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', > 'e')), > named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) > ) > {code} > Migrate the table to iceberg: > {code:sql} > ALTER TABLE tbl_complex SET TBLPROPERTIES > ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'); > {code} > Run a simple query: > {code:sql} > SELECT * FROM tbl_complex ORDER BY a; > {code} > It will fail with: > {code:txt} > TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : > attempt_1655110825475_0001_3_00_000000_1:java.lang.RuntimeException: > java.lang.RuntimeException: java.io.IOException: > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: java.io.IOException: > java.lang.NullPointerException > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:200) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:139) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:105) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:164) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) > at > org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:706) > at > org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:665) > at > org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) > at > org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292) > ... 16 more > Caused by: java.io.IOException: java.lang.NullPointerException > at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) > at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:458) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:197) > ... 27 more > Caused by: java.lang.NullPointerException > at > org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:101) > at > org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:38) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:52) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:83) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitFields(TypeWithSchemaVisitor.java:169) > at > org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:47) > at > org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.parquetRecordReader(HiveVectorizedReader.java:203) > at > org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.reader(HiveVectorizedReader.java:138) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.iceberg.common.DynMethods$UnboundMethod.invokeChecked(DynMethods.java:65) > at > org.apache.iceberg.common.DynMethods$UnboundMethod.invoke(DynMethods.java:77) > at > org.apache.iceberg.common.DynMethods$StaticMethod.invoke(DynMethods.java:196) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.newParquetIterable(IcebergInputFormat.java:417) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.openTask(IcebergInputFormat.java:336) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.open(IcebergInputFormat.java:353) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextTask(IcebergInputFormat.java:263) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.initialize(IcebergInputFormat.java:259) > at > org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader.<init>(AbstractMapredIcebergRecordReader.java:40) > at > org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.<init>(HiveIcebergVectorizedRecordReader.java:41) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:423) > at > org.apache.iceberg.common.DynConstructors$Ctor.newInstanceChecked(DynConstructors.java:60) > at > org.apache.iceberg.common.DynConstructors$Ctor.newInstance(DynConstructors.java:73) > at > org.apache.iceberg.mr.hive.HiveIcebergInputFormat.getRecordReader(HiveIcebergInputFormat.java:163) > at > org.apache.hadoop.hive.ql.io.RecordReaderWrapper.create(RecordReaderWrapper.java:72) > at > org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:449) > ... 28 more > {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)