[ https://issues.apache.org/jira/browse/HIVE-26317?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
László Pintér reassigned HIVE-26317: ------------------------------------ > Select on iceberg table stored as parquet and vectorization enabled fails > with Runtime Exception > ------------------------------------------------------------------------------------------------ > > Key: HIVE-26317 > URL: https://issues.apache.org/jira/browse/HIVE-26317 > Project: Hive > Issue Type: Bug > Reporter: László Pintér > Assignee: László Pintér > Priority: Major > > Create an iceberg table having the following schema: > {code:sql} > CREATE EXTERNAL tbl_complex TABLE (a int, arrayofarrays > array<array<string>>) STORED BY ICEBERG STORED AS PARQUET > {code} > Insert some data into it: > {code:sql} > INSERT INTO tbl_complex VALUES (1, array(array('a'), array('b', 'c'))) > {code} > Turn on vectorization and run a simple query: > {code:sql} > set hive.vectorized.execution.enabled=true; > SELECT * FROM tbl_complex; > {code} > The query will fail with > {code:java} > Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, > Vertex vertex_1655109552551_0001_2_00 [Map 1] killed/failed due > to:OWN_TASK_FAILURE]DAG did not succeed due to VERTEX_FAILURE. > failedVertices:1 killedVertices:0 > at > org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:367) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:246) > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:270) > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:281) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:545) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatement(HiveSessionImpl.java:513) > at > org.apache.hive.service.cli.CLIService.executeStatement(CLIService.java:271) > at > org.apache.iceberg.mr.hive.TestHiveShell.executeStatement(TestHiveShell.java:142) > ... 16 more > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Vertex failed, > vertexName=Map 1, vertexId=vertex_1655109552551_0001_2_00, diagnostics=[Task > failed, taskId=task_1655109552551_0001_2_00_000000, diagnostics=[TaskAttempt > 0 failed, info=[Error: Error while running task ( failure ) : > attempt_1655109552551_0001_2_00_000000_0:java.lang.RuntimeException: > org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: > java.lang.RuntimeException: Unsupported type used in list:array<string> > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: > java.io.IOException: java.lang.RuntimeException: Unsupported type used in > list:array<string> > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:89) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:414) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:293) > ... 16 more > Caused by: java.io.IOException: java.lang.RuntimeException: Unsupported type > used in list:array<string> > at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121) > at > org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77) > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:381) > at > org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:82) > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:119) > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:59) > at > org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.next(TezGroupedSplitsInputFormat.java:145) > at > org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:116) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:75) > ... 18 more > Caused by: java.lang.RuntimeException: Unsupported type used in > list:array<string> > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.checkListColumnSupport(VectorizedParquetRecordReader.java:632) > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.buildVectorizedParquetReader(VectorizedParquetRecordReader.java:568) > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:470) > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:431) > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:379) > at > org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader.next(VectorizedParquetRecordReader.java:100) > at > org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.advance(VectorizedRowBatchIterator.java:62) > at > org.apache.iceberg.mr.hive.vector.VectorizedRowBatchIterator.hasNext(VectorizedRowBatchIterator.java:84) > at > org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextKeyValue(IcebergInputFormat.java:273) > at > org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:48) > at > org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.next(HiveIcebergVectorizedRecordReader.java:34) > at > org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:376) > ... 24 more > {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)