Repository: hive Updated Branches: refs/heads/master 0d93438a3 -> bbdba9f44
HIVE-10789: union distinct query with NULL constant on both the sides throws "Unsuported vector output type: void" error (Matt McCline reviewed by Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbdba9f4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbdba9f4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbdba9f4 Branch: refs/heads/master Commit: bbdba9f442636a6dce2c0aba9b3f133e2e2de496 Parents: 0d93438 Author: Matt McCline <mmccl...@hortonworks.com> Authored: Fri May 22 23:57:19 2015 -0700 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Fri May 22 23:57:19 2015 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/optimizer/physical/Vectorizer.java | 18 +- .../clientpositive/vector_null_projection.q | 18 ++ .../tez/vector_null_projection.q.out | 164 +++++++++++++++++++ .../clientpositive/vector_null_projection.q.out | 163 ++++++++++++++++++ 5 files changed, 358 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9e95d1b..b7c7e03 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -234,6 +234,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_nullsafe_join.q,\ + vector_null_projection.q,\ vector_orderby_5.q,\ vector_outer_join0.q,\ vector_outer_join1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index b429c56..3ed3c7e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1191,7 +1191,7 @@ public class Vectorizer implements PhysicalPlanResolver { return true; } - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) { + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). @@ -1201,7 +1201,7 @@ public class Vectorizer implements PhysicalPlanResolver { } } String typeName = desc.getTypeInfo().getTypeName(); - boolean ret = validateDataType(typeName); + boolean ret = validateDataType(typeName, mode); if (!ret) { LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); return false; @@ -1215,7 +1215,8 @@ public class Vectorizer implements PhysicalPlanResolver { } if (desc.getChildren() != null) { for (ExprNodeDesc d: desc.getChildren()) { - boolean r = validateExprNodeDescRecursive(d); + // Don't restrict child expressions for projection. Always use looser FILTER mode. + boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER); if (!r) { return false; } @@ -1229,7 +1230,7 @@ public class Vectorizer implements PhysicalPlanResolver { } boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc)) { + if (!validateExprNodeDescRecursive(desc, mode)) { return false; } try { @@ -1312,8 +1313,13 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } - private boolean validateDataType(String type) { - return supportedDataTypesPattern.matcher(type.toLowerCase()).matches(); + private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { + type = type.toLowerCase(); + boolean result = supportedDataTypesPattern.matcher(type).matches(); + if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { + return false; + } + return result; } private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/queries/clientpositive/vector_null_projection.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_null_projection.q b/ql/src/test/queries/clientpositive/vector_null_projection.q new file mode 100644 index 0000000..765e45f --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_null_projection.q @@ -0,0 +1,18 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table a(s string) stored as orc; +create table b(s string) stored as orc; +insert into table a values('aaa'); +insert into table b values('aaa'); + +-- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a; + +select NULL from a; + +explain +select NULL as x from a union distinct select NULL as x from b; + +select NULL as x from a union distinct select NULL as x from b; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out new file mode 100644 index 0000000..9b7b698 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: create table a(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table a values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into table b values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +PREHOOK: type: QUERY +POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select NULL from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL +PREHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Select Operator + Select Operator + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Map 4 + Map Operator Tree: + TableScan + alias: b + Select Operator + Select Operator + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: void) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +NULL http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/vector_null_projection.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_null_projection.q.out b/ql/src/test/results/clientpositive/vector_null_projection.q.out new file mode 100644 index 0000000..7c3136f --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -0,0 +1,163 @@ +PREHOOK: query: create table a(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b(s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b(s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: insert into table a values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@a +POSTHOOK: query: insert into table a values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@a +POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into table b values('aaa') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@b +POSTHOOK: query: insert into table b values('aaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@b +POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +PREHOOK: type: QUERY +POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type. +explain +select NULL from a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL from a +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select NULL from a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL +PREHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + keys: null (type: void) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: void) + sort order: + + Map-reduce partition columns: _col0 (type: void) + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: void) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select NULL as x from a union distinct select NULL as x from b +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@b +#### A masked pattern was here #### +POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@b +#### A masked pattern was here #### +NULL