HIVE-11602: Support Struct with different field types in query (Jesus Camacho Rodriguez, reviewed by Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0012864b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0012864b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0012864b Branch: refs/heads/llap Commit: 0012864b8d370f948ac2cb875e4264100390d1d4 Parents: ab03dc9 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Wed Aug 19 12:52:16 2015 +0300 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Thu Aug 20 15:17:40 2015 +0300 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 61 ++++++++++++++++++-- .../ql/udf/generic/GenericUDFBaseNumeric.java | 4 +- ql/src/test/queries/clientpositive/structin.q | 6 ++ .../test/results/clientpositive/structin.q.out | 44 ++++++++++++++ 4 files changed, 109 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 9edcc4d..4c1c53e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -132,6 +132,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -765,10 +766,11 @@ public final class FunctionRegistry { return null; } - public static PrimitiveCategory getCommonCategory(TypeInfo a, TypeInfo b) { + public static PrimitiveCategory getPrimitiveCommonCategory(TypeInfo a, TypeInfo b) { if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { return null; } + PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); @@ -802,10 +804,61 @@ public final class FunctionRegistry { return a; } - PrimitiveCategory commonCat = getCommonCategory(a, b); - if (commonCat == null) + // We try to infer a common primitive category + PrimitiveCategory commonCat = getPrimitiveCommonCategory(a, b); + if (commonCat != null) { + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat); + } + // It is not primitive; check if it is a struct and we can infer a common class + if (a.getCategory() == Category.STRUCT && b.getCategory() == Category.STRUCT) { + return getCommonClassForStruct((StructTypeInfo)a, (StructTypeInfo)b); + } + return null; + } + + /** + * Find a common class that objects of both StructTypeInfo a and StructTypeInfo b can + * convert to. This is used for places other than comparison. + * + * @return null if no common class could be found. + */ + public static TypeInfo getCommonClassForStruct(StructTypeInfo a, StructTypeInfo b) { + if (a == b || a.equals(b)) { + return a; + } + + List<String> names = new ArrayList<String>(); + List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(); + + Iterator<String> namesIterator = a.getAllStructFieldNames().iterator(); + Iterator<String> otherNamesIterator = b.getAllStructFieldNames().iterator(); + + // Compare the field names using ignore-case semantics + while (namesIterator.hasNext() && otherNamesIterator.hasNext()) { + String name = namesIterator.next(); + if (!name.equalsIgnoreCase(otherNamesIterator.next())) { + return null; + } + names.add(name); + } + + // Different number of field names + if (namesIterator.hasNext() || otherNamesIterator.hasNext()) { return null; - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, commonCat); + } + + // Compare the field types + ArrayList<TypeInfo> fromTypes = a.getAllStructFieldTypeInfos(); + ArrayList<TypeInfo> toTypes = b.getAllStructFieldTypeInfos(); + for (int i = 0; i < fromTypes.size(); i++) { + TypeInfo commonType = getCommonClass(fromTypes.get(i), toTypes.get(i)); + if (commonType == null) { + return null; + } + typeInfos.add(commonType); + } + + return TypeInfoFactory.getStructTypeInfo(names, typeInfos); } public static boolean implicitConvertible(PrimitiveCategory from, PrimitiveCategory to) { http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java index ca5c459..ef6ef11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseNumeric.java @@ -245,7 +245,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary { } // Use type promotion - PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right); + PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right); if (commonCat == PrimitiveCategory.DECIMAL) { // Hive 0.12 behavior where double * decimal -> decimal is gone. return TypeInfoFactory.doubleTypeInfo; @@ -267,7 +267,7 @@ public abstract class GenericUDFBaseNumeric extends GenericUDFBaseBinary { PrimitiveTypeInfo right = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(rightOI); // Now we are handling exact types. Base implementation handles type promotion. - PrimitiveCategory commonCat = FunctionRegistry.getCommonCategory(left, right); + PrimitiveCategory commonCat = FunctionRegistry.getPrimitiveCommonCategory(left, right); if (commonCat == PrimitiveCategory.DECIMAL) { return deriveResultDecimalTypeInfo(); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/queries/clientpositive/structin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q index 48b31f3..35498bf 100644 --- a/ql/src/test/queries/clientpositive/structin.q +++ b/ql/src/test/queries/clientpositive/structin.q @@ -15,3 +15,9 @@ struct('1234-1111-0074019112','1'), struct('1234-1111-0074019610','1'), struct('1234-1111-0074022106','1') ); + +explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695',1) +); http://git-wip-us.apache.org/repos/asf/hive/blob/0012864b/ql/src/test/results/clientpositive/structin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out index 81c792a..f176e79 100644 --- a/ql/src/test/results/clientpositive/structin.q.out +++ b/ql/src/test/results/clientpositive/structin.q.out @@ -64,3 +64,47 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695',1) +) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t11 where struct(`id`, `lineid`) +IN ( +struct('1234-1111-0074578664','3'), +struct('1234-1111-0074578695',1) +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t11 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695',1)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +