HIVE-11462: Constant fold struct() UDF (Gopal V, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16546cc4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16546cc4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16546cc4 Branch: refs/heads/hbase-metastore Commit: 16546cc4b8f6944f5ea4ad13f480dcc402e6757c Parents: 0140df7 Author: Gopal V <gop...@apache.org> Authored: Wed Aug 12 16:45:58 2015 -0700 Committer: Gopal V <gop...@apache.org> Committed: Wed Aug 12 16:45:58 2015 -0700 ---------------------------------------------------------------------- .../optimizer/ConstantPropagateProcFactory.java | 40 +++++++++++++++----- .../hive/ql/plan/ExprNodeConstantDesc.java | 29 ++++++++++++-- .../hive/ql/udf/generic/GenericUDFIn.java | 3 +- .../hive/ql/udf/generic/GenericUDFStruct.java | 25 +++++++++--- .../test/results/clientpositive/null_cast.q.out | 2 +- .../test/results/clientpositive/structin.q.out | 2 +- .../results/clientpositive/udf_inline.q.out | 2 +- .../results/clientpositive/udf_struct.q.out | 2 +- .../test/results/clientpositive/udf_union.q.out | 2 +- .../objectinspector/ObjectInspectorUtils.java | 3 ++ 10 files changed, 87 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index cf10c52..55ad0ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -75,10 +75,14 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -758,6 +762,10 @@ public final class ConstantPropagateProcFactory { return null; } } + if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) { + // nested complex types cannot be folded cleanly + return null; + } Object value = constant.getValue(); PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo(); Object writableValue = null == value ? value : @@ -774,6 +782,10 @@ public final class ConstantPropagateProcFactory { return null; } ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn; + if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) { + // nested complex types cannot be folded cleanly + return null; + } Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( (PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue()); arguments[i] = new DeferredJavaObject(writableValue); @@ -790,28 +802,38 @@ public final class ConstantPropagateProcFactory { LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o); } if (o == null) { - return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o); + return new ExprNodeConstantDesc( + TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o); } Class<?> clz = o.getClass(); if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; TypeInfo typeInfo = poi.getTypeInfo(); o = poi.getPrimitiveJavaObject(o); - if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) || - typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) || - typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { + if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) + || typeInfo.getTypeName() + .contains(serdeConstants.VARCHAR_TYPE_NAME) + || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { return new ExprNodeConstantDesc(typeInfo, o); } - } else if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) { - - } else { + } else if (udf instanceof GenericUDFStruct + && oi instanceof StandardConstantStructObjectInspector) { + // do not fold named_struct, only struct() + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi); + return new ExprNodeConstantDesc(structType, + ObjectInspectorUtils.copyToStandardJavaObject(o, coi)); + } else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) { if (LOG.isErrorEnabled()) { - LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable."); + LOG.error("Unable to evaluate " + udf + + ". Return value unrecoginizable."); } return null; + } else { + // fall through } String constStr = null; - if(arguments.length == 1 && FunctionRegistry.isOpCast(udf)) { + if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) { // remember original string representation of constant. constStr = arguments[0].get().toString(); } http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java index 2674fe3..a5221a2 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java @@ -19,12 +19,15 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import java.util.List; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -73,6 +76,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable { } public void setValue(Object value) { + // Kryo setter this.value = value; } @@ -92,8 +96,7 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable { return "Const " + typeInfo.toString() + " " + value; } - @Override - public String getExprString() { + private static String getFormatted(TypeInfo typeInfo, Object value) { if (value == null) { return "null"; } @@ -109,8 +112,28 @@ public class ExprNodeConstantDesc extends ExprNodeDesc implements Serializable { hexChars[j * 2 + 1] = hexArray[v & 0x0F]; } return new String(hexChars); + } + return value.toString(); + } + + @Override + public String getExprString() { + if (typeInfo.getCategory() == Category.PRIMITIVE) { + return getFormatted(typeInfo, value); + } else if (typeInfo.getCategory() == Category.STRUCT) { + StringBuilder sb = new StringBuilder(); + sb.append("const struct("); + List<?> items = (List<?>) getWritableObjectInspector().getWritableConstantValue(); + List<TypeInfo> structTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); + for (int i = 0; i < structTypes.size(); i++) { + final Object o = (i < items.size()) ? items.get(i) : null; + sb.append(getFormatted(structTypes.get(i), o)).append(","); + } + sb.setCharAt(sb.length() - 1, ')'); + return sb.toString(); } else { - return value.toString(); + // unknown type + return toString(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java index 56ac3e1..7660ca4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java @@ -60,7 +60,8 @@ import com.esotericsoftware.minlog.Log; public class GenericUDFIn extends GenericUDF { private transient ObjectInspector[] argumentOIs; - private Set<Object> constantInSet; + // this set is a copy of the arguments objects - avoid serializing + private transient Set<Object> constantInSet; private boolean isInSetConstant = true; //are variables from IN(...) constant private final BooleanWritable bw = new BooleanWritable(); http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java index 7df3f7d..7e286fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStruct.java @@ -21,12 +21,13 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList; import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @Description(name = "struct", value = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values") @@ -44,9 +45,23 @@ public class GenericUDFStruct extends GenericUDF { for (int f = 1; f <= numFields; f++) { fname.add("col" + f); } - StructObjectInspector soi = - ObjectInspectorFactory.getStandardStructObjectInspector(fname, Arrays.asList(arguments)); - return soi; + boolean constantStruct = true; + for (int i = 0; i < arguments.length; i++) { + ObjectInspector oi = arguments[i]; + constantStruct &= (oi.getCategory() == Category.PRIMITIVE) + && (oi instanceof ConstantObjectInspector); + if (constantStruct) { + // nested complex types trigger Kryo issue #216 in plan deserialization + ret[i] = ((ConstantObjectInspector) oi).getWritableConstantValue(); + } + } + if (constantStruct) { + return ObjectInspectorFactory.getStandardConstantStructObjectInspector(fname, + Arrays.asList(arguments), Arrays.asList(ret)); + } else { + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, + Arrays.asList(arguments)); + } } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/null_cast.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out index b5af69b..ff37fe7 100644 --- a/ql/src/test/results/clientpositive/null_cast.q.out +++ b/ql/src/test/results/clientpositive/null_cast.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,struct(0)) (type: array<struct<col1:int>>) + expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,const struct(0)) (type: array<struct<col1:int>>) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/structin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out index e36fceb..81c792a 100644 --- a/ql/src/test/results/clientpositive/structin.q.out +++ b/ql/src/test/results/clientpositive/structin.q.out @@ -44,7 +44,7 @@ STAGE PLANS: alias: t11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (struct(id,lineid)) IN (struct('1234-1111-0074578664','3'), struct('1234-1111-0074578695','1'), struct('1234-1111-0074580704','1'), struct('1234-1111-0074581619','2'), struct('1234-1111-0074582745','1'), struct('1234-1111-0074586625','1'), struct('1234-1111-0074019112','1'), struct('1234-1111-0074019610','1'), struct('1234-1111-0074022106','1')) (type: boolean) + predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695','1'), const struct('1234-1111-0074580704','1'), const struct('1234-1111-0074581619','2'), const struct('1234-1111-0074582745','1'), const struct('1234-1111-0074586625','1'), const struct('1234-1111-0074019112','1'), const struct('1234-1111-0074019610','1'), const struct('1234-1111-0074022106','1')) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id (type: string), lineid (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_inline.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out index 7d372f3..f986abf 100644 --- a/ql/src/test/results/clientpositive/udf_inline.q.out +++ b/ql/src/test/results/clientpositive/udf_inline.q.out @@ -31,7 +31,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>) + expressions: array(const struct(1,'dude!'),const struct(2,'Wheres'),const struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_struct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_struct.q.out b/ql/src/test/results/clientpositive/udf_struct.q.out index d0c56c7..0d2d71d 100644 --- a/ql/src/test/results/clientpositive/udf_struct.q.out +++ b/ql/src/test/results/clientpositive/udf_struct.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Row Limit Per Split: 1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: struct(1) (type: struct<col1:int>), struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string) + expressions: const struct(1) (type: struct<col1:int>), const struct(1,'a') (type: struct<col1:int,col2:string>), struct(1,'b',1.5).col1 (type: int), struct(1,struct('a',1.5)).col2.col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/ql/src/test/results/clientpositive/udf_union.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_union.q.out b/ql/src/test/results/clientpositive/udf_union.q.out index 73d4bdd..114040f 100644 --- a/ql/src/test/results/clientpositive/udf_union.q.out +++ b/ql/src/test/results/clientpositive/udf_union.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Row Limit Per Split: 2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>) + expressions: create_union(0,key) (type: uniontype<string>), create_union(if((key < 100), 0, 1),2.0,value) (type: uniontype<double,string>), create_union(1,'a',const struct(2,'b')) (type: uniontype<string,struct<col1:int,col2:string>>) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/16546cc4/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 64dd512..00a6384 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -1084,6 +1084,9 @@ public final class ObjectInspectorUtils { fieldObjectInspectors.add(getStandardObjectInspector(f .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE)); } + if (value != null && (writableValue.getClass().isArray())) { + writableValue = java.util.Arrays.asList((Object[])writableValue); + } return ObjectInspectorFactory.getStandardConstantStructObjectInspector( fieldNames, fieldObjectInspectors,