Author: namit Date: Mon Mar 4 06:01:23 2013 New Revision: 1452189 URL: http://svn.apache.org/r1452189 Log: HIVE-3490 Implement * or a.* for arguments to UDFs (Navis via namit)
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1452189&r1=1452188&r2=1452189&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Mon Mar 4 06:01:23 2013 @@ -160,11 +160,11 @@ function LPAREN ( (star=STAR) - | (dist=KW_DISTINCT)? (expression (COMMA expression)*)? + | (dist=KW_DISTINCT)? (selectExpression (COMMA selectExpression)*)? ) RPAREN -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName) - -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?) - -> ^(TOK_FUNCTIONDI functionName (expression+)?) + -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)?) + -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?) ; functionName Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1452189&r1=1452188&r2=1452189&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Mar 4 06:01:23 2013 @@ -125,6 +125,7 @@ import org.apache.hadoop.hive.ql.plan.Cr import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; @@ -8727,6 +8728,9 @@ public class SemanticAnalyzer extends Ba } throw new SemanticException(errMsg); } + if (desc instanceof ExprNodeColumnListDesc) { + throw new SemanticException("TOK_ALLCOLREF is not supported in current context"); + } if (!unparseTranslator.isEnabled()) { // Not creating a view, so no need to track view expansions. Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1452189&r1=1452188&r2=1452189&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Mon Mar 4 06:01:23 2013 @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.Nod import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; @@ -518,14 +519,11 @@ public final class TypeCheckProcFactory serdeConstants.DECIMAL_TYPE_NAME); } - public static boolean isRedundantConversionFunction(ASTNode expr, + private static boolean isRedundantConversionFunction(ASTNode expr, boolean isFunction, ArrayList<ExprNodeDesc> children) { if (!isFunction) { return false; } - // children is always one less than the expr.getChildCount(), since the - // latter contains function name. - assert (children.size() == expr.getChildCount() - 1); // conversion functions take a single parameter if (children.size() != 1) { return false; @@ -862,6 +860,43 @@ public final class TypeCheckProcFactory ASTNode expr = (ASTNode) nd; + if (expr.getType() == HiveParser.TOK_TABNAME) { + return null; + } + + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + RowResolver input = ctx.getInputRR(); + ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); + assert expr.getChildCount() <= 1; + if (expr.getChildCount() == 1) { + // table aliased (select a.*, for example) + ASTNode child = (ASTNode) expr.getChild(0); + assert child.getType() == HiveParser.TOK_TABNAME; + assert child.getChildCount() == 1; + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText()); + HashMap<String, ColumnInfo> columns = input.getFieldMap(tableAlias); + if (columns == null) { + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child)); + } + for (Map.Entry<String, ColumnInfo> colMap : columns.entrySet()) { + ColumnInfo colInfo = colMap.getValue(); + if (!colInfo.getIsVirtualCol()) { + columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), + colInfo.getInternalName(), colInfo.getTabAlias(), false)); + } + } + } else { + // all columns (select *, for example) + for (ColumnInfo colInfo : input.getColumnInfos()) { + if (!colInfo.getIsVirtualCol()) { + columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), + colInfo.getInternalName(), colInfo.getTabAlias(), false)); + } + } + } + return columnList; + } + // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, // and the operator is a DOT, then it's a table column reference. if (expr.getType() == HiveParser.DOT @@ -893,7 +928,9 @@ public final class TypeCheckProcFactory return null; } - boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION); + boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION || + expr.getType() == HiveParser.TOK_FUNCTIONSTAR || + expr.getType() == HiveParser.TOK_FUNCTIONDI); // Create all children int childrenBegin = (isFunction ? 1 : 0); @@ -901,7 +938,21 @@ public final class TypeCheckProcFactory .getChildCount() - childrenBegin); for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { - children.add((ExprNodeDesc) nodeOutputs[ci]); + if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { + children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren()); + } else { + children.add((ExprNodeDesc) nodeOutputs[ci]); + } + } + + if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) { + RowResolver input = ctx.getInputRR(); + for (ColumnInfo colInfo : input.getColumnInfos()) { + if (!colInfo.getIsVirtualCol()) { + children.add(new ExprNodeColumnDesc(colInfo.getType(), + colInfo.getInternalName(), colInfo.getTabAlias(), false)); + } + } } // If any of the children contains null, then return a null Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java?rev=1452189&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java Mon Mar 4 06:01:23 2013 @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Dummy desc only for populating TOK_ALLCOLREF and should not be used + * outside of TypeCheckProcFactory + */ +public class ExprNodeColumnListDesc extends ExprNodeDesc { + + List<ExprNodeColumnDesc> columns = new ArrayList<ExprNodeColumnDesc>(); + + public void addColumn(ExprNodeColumnDesc column) { + columns.add(column); + } + + @Override + public ExprNodeDesc clone() { + ExprNodeColumnListDesc clone = new ExprNodeColumnListDesc(); + clone.columns = new ArrayList<ExprNodeColumnDesc>(columns); + return clone; + } + + @Override + public boolean isSame(Object o) { + if (o instanceof ExprNodeColumnListDesc) { + return columns.equals(((ExprNodeColumnListDesc)o).columns); + } + return false; + } + + @Override + public TypeInfo getTypeInfo() { + throw new IllegalStateException(); + } + + @Override + public void setTypeInfo(TypeInfo typeInfo) { + throw new IllegalStateException(); + } + + @Override + public ObjectInspector getWritableObjectInspector() { + throw new IllegalStateException(); + } + + @Override + public String getTypeString() { + throw new IllegalStateException(); + } + + @Override + public List<String> getCols() { + List<String> cols = new ArrayList<String>(); + for (ExprNodeColumnDesc column : columns) { + cols.add(column.getColumn()); + } + return cols; + } + + @Override + public List<ExprNodeDesc> getChildren() { + return new ArrayList<ExprNodeDesc>(columns); + } +} Added: hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q?rev=1452189&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q Mon Mar 4 06:01:23 2013 @@ -0,0 +1,16 @@ +explain +select concat(*),array(*) from src where key < 100 limit 10; + +select concat(*),array(*) from src where key < 100 limit 10; + +-- The order of columns is decided by row schema of prev operator +-- Like join which has two or more aliases, it's from left most aias to right aliases. + +explain +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10; + +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10; Added: hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out?rev=1452189&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out Mon Mar 4 06:01:23 2013 @@ -0,0 +1,188 @@ +PREHOOK: query: explain +select concat(*),array(*) from src where key < 100 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select concat(*),array(*) from src where key < 100 limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTIONSTAR array))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 100.0) + type: boolean + Select Operator + expressions: + expr: concat(key, value) + type: string + expr: array(key,value) + type: array<string> + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select concat(*),array(*) from src where key < 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86val_86 ["86","val_86"] +27val_27 ["27","val_27"] +98val_98 ["98","val_98"] +66val_66 ["66","val_66"] +37val_37 ["37","val_37"] +15val_15 ["15","val_15"] +82val_82 ["82","val_82"] +17val_17 ["17","val_17"] +0val_0 ["0","val_0"] +57val_57 ["57","val_57"] +PREHOOK: query: -- The order of columns is decided by row schema of prev operator +-- Like join which has two or more aliases, it's from left most aias to right aliases. + +explain +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- The order of columns is decided by row schema of prev operator +-- Like join which has two or more aliases, it's from left most aias to right aliases. + +explain +select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (+ (. (TOK_TABLE_OR_COL a) key) 1) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)) (. (TOK_TABLE_OR_COL b) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) key) (TOK_ALLCOLREF (TOK_TABNAME b))))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION stack 2 TOK_ALLCOLREF) e1 e2 e3)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + x:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 100.0) + type: boolean + Reduce Output Operator + key expressions: + expr: (key + 1) + type: double + sort order: + + Map-reduce partition columns: + expr: (key + 1) + type: double + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + x:b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: UDFToDouble(key) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(key) + type: double + tag: 1 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: 2 + type: int + expr: concat(_col0, _col1, _col4, _col5) + type: string + expr: concat(_col0, _col1) + type: string + expr: concat(_col4, _col5) + type: string + expr: concat(_col0, _col1, _col4) + type: string + expr: concat(_col0, _col4, _col5) + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + UDTF Operator + function name: stack + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select stack(2, *) as (e1,e2,e3) from ( + select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) + from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +4val_45val_5 4val_4 5val_5 +4val_45 NULL 5val_5 +8val_89val_9 8val_8 9val_9 +8val_89 NULL 9val_9 +9val_910val_10 9val_9 10val_10 +9val_910 NULL 10val_10