Nemon Lou created HIVE-15638: -------------------------------- Summary: ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned Key: HIVE-15638 URL: https://issues.apache.org/jira/browse/HIVE-15638 Project: Hive Issue Type: Bug Components: Query Planning Affects Versions: 2.1.0, 1.3.0 Reporter: Nemon Lou
{noformat} Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ArrayIndexOutOfBoundsException: 151 at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202) at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64) at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:364) at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:200) at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:186) at org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:525) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494) at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:180) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1710) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:174) ] at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:499) at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160) ... 8 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ArrayIndexOutOfBoundsException: 151 at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:416) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:878) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:149) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489) ... 9 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 151 at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142) at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202) at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64) at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:94) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65) at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.populateCachedDistributionKeys(ReduceSinkOperator.java:443) at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:350) ... 13 more {noformat} The way to reproduce : DDL: {noformat} create table tb_a(data_dt string,key string,src string,data_id string,tag_id string, entity_src string); create table tb_b(pos_tagging string,src string,data_id string); create table tb_c(key string,start_time string,data_dt string); insert into tb_a values('20160901','CPI','04','data_id','tag_id','entity_src'); insert into tb_b values('pos_tagging','04','data_id'); insert into tb_c values('data_id','start_time_0000','20160901'); create function hwrl as 'HotwordRelationUDTF' using jar 'hdfs:///tmp/nemon/udf/hotword.jar'; {noformat} UDF File : {code} import java.util.ArrayList; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; /** */ public class HotwordRelationUDTF extends GenericUDTF { private int argsNumber = 6; @Override public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException{ if (args.length != argsNumber) { String log = ""; { for (int i = 0; i < args.length; i++) log += args[i].toString() + ","; } throw new UDFArgumentLengthException( " OrgIdentifyUDTF (" + log + ") has wrong arguments. " + "The function ProductHotWordUDTF(data_dt,data_src,data_id,word_type,primary_word,txt_For_Handle)" + " have and only have " + argsNumber + " arguments."); } ArrayList<String> fieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); for (int i = 0; i < argsNumber; i++){ if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " + args[i].getTypeName() + " is passed"); } } fieldNames.add("data_dt"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("data_src"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("data_id"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("word_type"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("primary_word"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("primary_nature"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("primary_offset"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); fieldNames.add("related_word"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("related_nature"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("related_offset"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector( fieldNames, fieldOIs); } @Override public void process(Object[] args) throws HiveException { ArrayList<Object> result = new ArrayList<Object>(); result.add("20160901"); result.add("data_src"); result.add("data_id"); result.add("word_type"); result.add("primary_word"); result.add("primary_nature"); result.add(6); result.add("related_word"); result.add("related_nature"); result.add(0); Object[] ret = result.toArray(new Object[] {}); forward(ret); } @Override public void close() throws HiveException { } } {code} query: {noformat} set hive.auto.convert.join=false; select substring(c.start_time,1,10) create_date, tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset from ( select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging) as (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset) from ( select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging from tb_a a, tb_b b where a.key like 'CP%' and a.data_dt='20160901' and a.data_id=b.data_id and b.src='04' ) t ) tt, (select key,start_time from tb_c where data_dt='20160901') c where tt.data_id=c.key ; {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)