[ https://issues.apache.org/jira/browse/HIVE-23688?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
范宜臻 updated HIVE-23688: ----------------------- Attachment: HIVE-23688.patch Fix Version/s: 3.0.0 4.0.0 Status: Patch Available (was: Open) > Vectorization: IndexArrayOutOfBoundsException For map type column which > includes null value > ------------------------------------------------------------------------------------------- > > Key: HIVE-23688 > URL: https://issues.apache.org/jira/browse/HIVE-23688 > Project: Hive > Issue Type: Bug > Components: Parquet, storage-api, Vectorization > Affects Versions: All Versions > Reporter: 范宜臻 > Assignee: 范宜臻 > Priority: Critical > Labels: pull-request-available > Fix For: 4.0.0, 3.0.0 > > Attachments: HIVE-23688.patch > > Time Spent: 10m > Remaining Estimate: 0h > > {color:#de350b}start{color} and {color:#de350b}length{color} are empty arrays > in MapColumnVector.values(BytesColumnVector) when values in map contain > {color:#de350b}null{color} > reproduce in master branch: > {code:java} > set hive.vectorized.execution.enabled=true; > CREATE TABLE parquet_map_type (id int,stringMap map<string, string>) > stored as parquet; > insert overwrite table parquet_map_typeSELECT 1, MAP('k1', null, 'k2', > 'bar'); > select id, stringMap['k1'] from parquet_map_type group by 1,2; > {code} > query explain: > {code:java} > Stage-0 > Fetch Operator > limit:-1 > Stage-1 > Reducer 2 vectorized > File Output Operator [FS_12] > Group By Operator [GBY_11] (rows=5 width=2) > Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 > <-Map 1 [SIMPLE_EDGE] vectorized > SHUFFLE [RS_10] > PartitionCols:_col0, _col1 > Group By Operator [GBY_9] (rows=10 width=2) > Output:["_col0","_col1"],keys:_col0, _col1 > Select Operator [SEL_8] (rows=10 width=2) > Output:["_col0","_col1"] > TableScan [TS_0] (rows=10 width=2) > > temp@parquet_map_type_fyz,parquet_map_type_fyz,Tbl:COMPLETE,Col:NONE,Output:["id","stringmap"] > {code} > runtime error: > {code:java} > Vertex failed, vertexName=Map 1, vertexId=vertex_1592040015150_0001_3_00, > diagnostics=[Task failed, taskId=task_1592040015150_0001_3_00_000000, > diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( > failure ) : > attempt_1592040015150_0001_3_00_000000_0:java.lang.RuntimeException: > java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: > Hive Runtime Error while processing row > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108) > at > com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41) > at > com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.RuntimeException: > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while > processing row > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:101) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:76) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:403) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:267) > ... 16 more > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row > at > org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:970) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:92) > ... 19 more > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating > id > at > org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:149) > at > org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:918) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:172) > at > org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:809) > at > org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:842) > ... 20 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:506) > at > org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexBaseScalar.evaluate(VectorUDFMapIndexBaseScalar.java:83) > at > org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146) > ... 24 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)