HIVE-16091 : Support subqueries in project/select (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6955cf71 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6955cf71 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6955cf71 Branch: refs/heads/hive-14535 Commit: 6955cf7130cb54d0a5a3d1a33216f09a8321769d Parents: a086d6c Author: Vineet Garg <[email protected]> Authored: Wed Mar 15 11:53:21 2017 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Wed Mar 15 11:53:21 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../calcite/reloperators/HiveFilter.java | 6 +- .../calcite/rules/HiveSubQueryRemoveRule.java | 36 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 200 +- .../hadoop/hive/ql/parse/SubQueryUtils.java | 47 + .../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 6 +- .../queries/clientnegative/subquery_in_select.q | 6 - .../clientnegative/subquery_select_aggregate.q | 2 + .../subquery_select_complex_expr.q | 3 + .../clientnegative/subquery_select_distinct.q | 2 + .../clientnegative/subquery_select_distinct2.q | 2 + .../clientnegative/subquery_select_udf.q | 2 + .../queries/clientpositive/subquery_select.q | 133 + .../clientnegative/subquery_in_select.q.out | 2 +- .../subquery_select_aggregate.q.out | 1 + .../subquery_select_distinct.q.out | 1 + .../subquery_select_distinct2.q.out | 1 + .../clientnegative/subquery_select_udf.q.out | 1 + .../clientpositive/llap/subquery_select.q.out | 2942 ++++++++++++++++++ 19 files changed, 3289 insertions(+), 105 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e445d3b..0c590c8 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -572,6 +572,7 @@ minillaplocal.query.files=acid_globallimit.q,\ stats_based_fetch_decision.q,\ subquery_notin.q,\ subquery_nested_subquery.q, \ + subquery_select.q, \ subquery_shared_alias.q, \ subquery_null_agg.q,\ table_access_keys_stats.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java index b7b16b8..f19241e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java @@ -49,7 +49,7 @@ public class HiveFilter extends Filter implements HiveRelNode { public void implement(Implementor implementor) { } - private void findCorrelatedVar(RexNode node, Set<CorrelationId> allVars) { + private static void findCorrelatedVar(RexNode node, Set<CorrelationId> allVars) { if(node instanceof RexCall) { RexCall nd = (RexCall)node; for (RexNode rn : nd.getOperands()) { @@ -67,7 +67,7 @@ public class HiveFilter extends Filter implements HiveRelNode { //traverse the given node to find all correlated variables // Note that correlated variables are supported in Filter only i.e. Where & Having - private void traverseFilter(RexNode node, Set<CorrelationId> allVars) { + private static void traverseFilter(RexNode node, Set<CorrelationId> allVars) { if(node instanceof RexSubQuery) { //we expect correlated variables in HiveFilter only for now. // Also check for case where operator has 0 inputs .e.g TableScan @@ -103,7 +103,7 @@ public class HiveFilter extends Filter implements HiveRelNode { return allCorrVars; } - public Set<CorrelationId> getVariablesSet(RexSubQuery e) { + public static Set<CorrelationId> getVariablesSet(RexSubQuery e) { Set<CorrelationId> allCorrVars = new HashSet<>(); traverseFilter(e, allCorrVars); return allCorrVars; http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index c1768f4..76e0780 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -26,6 +26,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rex.LogicVisitor; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; @@ -44,6 +45,7 @@ import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.Pair; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.List; @@ -70,6 +72,32 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; */ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ + public static final HiveSubQueryRemoveRule PROJECT = + new HiveSubQueryRemoveRule( + operand(Project.class, null, RexUtil.SubQueryFinder.PROJECT_PREDICATE, + any()), + HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Project") { + public void onMatch(RelOptRuleCall call) { + final Project project = call.rel(0); + //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released + final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null); + final RexSubQuery e = + RexUtil.SubQueryFinder.find(project.getProjects()); + assert e != null; + final RelOptUtil.Logic logic = + LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, + project.getProjects(), e); + builder.push(project.getInput()); + final int fieldCount = builder.peek().getRowType().getFieldCount(); + final RexNode target = apply(e, HiveFilter.getVariablesSet(e), + logic, builder, 1, fieldCount, false); + final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); + builder.project(shuttle.apply(project.getProjects()), + project.getRowType().getFieldNames()); + call.transformTo(builder.build()); + } + }; + public static final HiveSubQueryRemoveRule FILTER = new HiveSubQueryRemoveRule( operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE, @@ -94,7 +122,7 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ Set<RelNode> corrScalarQueries = filter.getCluster().getPlanner().getContext().unwrap(Set.class); boolean isCorrScalarQuery = corrScalarQueries.contains(e.rel); - final RexNode target = apply(e, ((HiveFilter)filter).getVariablesSet(e), logic, + final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic, builder, 1, fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); builder.filter(shuttle.apply(filter.getCondition())); @@ -368,16 +396,16 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ // we are creating filter here so should not be returning NULL. Not sure why Calcite return NULL //operands.add(builder.or(keyIsNulls), builder.literal(false)); } - Boolean b = true; + RexNode b = builder.literal(true); switch (logic) { case TRUE_FALSE_UNKNOWN: - b = null; + b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); // fall through case UNKNOWN_AS_TRUE: operands.add( builder.call(SqlStdOperatorTable.LESS_THAN, builder.field("ct", "ck"), builder.field("ct", "c")), - builder.literal(b)); + b); break; } operands.add(builder.literal(false)); http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index bf0a11b..34a6dd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1340,7 +1340,7 @@ public class CalcitePlanner extends SemanticAnalyzer { //Remove subquery LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, - HiveSubQueryRemoveRule.FILTER); + HiveSubQueryRemoveRule.FILTER, HiveSubQueryRemoveRule.PROJECT); LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); @@ -2363,7 +2363,7 @@ public class CalcitePlanner extends SemanticAnalyzer { } private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, - boolean forHavingClause, Map<String, RelNode> aliasToRel, + boolean forHavingClause, Set<ASTNode> corrScalarQueries) throws SemanticException { List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); @@ -2407,11 +2407,6 @@ public class CalcitePlanner extends SemanticAnalyzer { String havingInputAlias = null; - if (forHavingClause) { - havingInputAlias = "gby_sq" + sqIdx; - aliasToRel.put(havingInputAlias, srcRel); - } - boolean isCorrScalarWithAgg = subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, havingInputAlias); if(isCorrScalarWithAgg) { corrScalarQueries.add(originalSubQueryAST); @@ -2419,12 +2414,11 @@ public class CalcitePlanner extends SemanticAnalyzer { } } private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, - Map<ASTNode, RelNode> subQueryToRelNode, - Map<String, RelNode> aliasToRel) throws SemanticException { + Map<ASTNode, RelNode> subQueryToRelNode) throws SemanticException { Set<ASTNode> corrScalarQueriesWithAgg = new HashSet<ASTNode>(); //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, aliasToRel, corrScalarQueriesWithAgg); + subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg); Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); stack.push(node); @@ -2474,7 +2468,7 @@ public class CalcitePlanner extends SemanticAnalyzer { Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>(); boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, - subQueryToRelNode, aliasToRel); + subQueryToRelNode); if(isSubQuery) { ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel), outerRR, subQueryToRelNode, forHavingClause); @@ -2821,11 +2815,12 @@ public class CalcitePlanner extends SemanticAnalyzer { // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match // SEL%SEL% rule. ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); + SubQueryUtils.checkForTopLevelSubqueries(selExprList); if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { - srcRel = genSelectLogicalPlan(qb, srcRel, srcRel); + srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null,null); RowResolver rr = this.relToHiveRR.get(srcRel); qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); } @@ -3452,7 +3447,8 @@ public class CalcitePlanner extends SemanticAnalyzer { * * @throws SemanticException */ - private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) + private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR) throws SemanticException { // 0. Generate a Select Node for Windowing // Exclude the newly-generated select columns from */etc. resolution. @@ -3467,6 +3463,9 @@ public class CalcitePlanner extends SemanticAnalyzer { String selClauseName = qbp.getClauseNames().iterator().next(); ASTNode selExprList = qbp.getSelForClause(selClauseName); + // make sure if there is subquery it is top level expression + SubQueryUtils.checkForTopLevelSubqueries(selExprList); + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); @@ -3599,98 +3598,119 @@ public class CalcitePlanner extends SemanticAnalyzer { // 6.3 Get rid of TOK_SELEXPR expr = (ASTNode) child.getChild(0); String[] colRef = SemanticAnalyzer.getColAlias(child, getAutogenColAliasPrfxLbl(), - inputRR, autogenColAliasPrfxIncludeFuncName(), i); + inputRR, autogenColAliasPrfxIncludeFuncName(), i); tabAlias = colRef[0]; colAlias = colRef[1]; if (hasAsClause) { unparseTranslator.addIdentifierTranslation((ASTNode) child - .getChild(1)); + .getChild(1)); } } - // 6.4 Build ExprNode corresponding to colums - if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer - .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, - excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); - selectStar = true; - } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL - && !hasAsClause - && !inputRR.getIsExprResolver() - && SemanticAnalyzer.isRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) { - // In case the expression is a regex COL. - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), - null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, - qb.getAliases(), true); - } else if (expr.getType() == HiveParser.DOT - && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0) - .getChild(0).getText().toLowerCase())) - && !hasAsClause - && !inputRR.getIsExprResolver() - && SemanticAnalyzer.isRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) { - // In case the expression is TABLE.COL (col can be regex). - // This can only happen without AS clause - // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), - SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos, - out_rwsch, qb.getAliases(), true); - } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) - && !(srcRel instanceof HiveAggregate)) { - // Likely a malformed query eg, select hash(distinct c1) from t1; - throw new CalciteSemanticException("Distinct without an aggregation.", - UnsupportedFeature.Distinct_without_an_aggreggation); - } else { - // Case when this is an expression - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - if (!qbp.getDestToGroupBy().isEmpty()) { - // Special handling of grouping function - expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr, - !cubeRollupGrpSetPresent); - } - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); - String recommended = recommendName(exp, colAlias); - if (recommended != null && out_rwsch.get(null, recommended) == null) { - colAlias = recommended; - } - col_list.add(exp); - - ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); - colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); - if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { - throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." - + colAlias + " => " + colInfo + " due to duplication, see previous warnings", - UnsupportedFeature.Duplicates_in_RR); - } + Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>(); + boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false, + subQueryToRelNode); + if(isSubQuery) { + ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, relToHiveRR.get(srcRel), + outerRR, subQueryToRelNode, false); + col_list.add(subQueryExpr); + + ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), + subQueryExpr.getWritableObjectInspector(), tabAlias, false); + if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + + colAlias + " => " + colInfo + " due to duplication, see previous warnings", + UnsupportedFeature.Duplicates_in_RR); + } + } else { - if (exp instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; - String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); - if (altMapping != null) { - // TODO: this can overwrite the mapping. Should this be allowed? - out_rwsch.put(altMapping[0], altMapping[1], colInfo); + // 6.4 Build ExprNode corresponding to colums + if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer + .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, + excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); + selectStar = true; + } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) { + // In case the expression is a regex COL. + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), + null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, + qb.getAliases(), true); + } else if (expr.getType() == HiveParser.DOT + && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL + && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0) + .getChild(0).getText().toLowerCase())) + && !hasAsClause + && !inputRR.getIsExprResolver() + && SemanticAnalyzer.isRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) { + // In case the expression is TABLE.COL (col can be regex). + // This can only happen without AS clause + // We don't allow this for ExprResolver - the Group By case + pos = genColListRegex( + SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), + SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText() + .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos, + out_rwsch, qb.getAliases(), true); + } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) + && !(srcRel instanceof HiveAggregate)) { + // Likely a malformed query eg, select hash(distinct c1) from t1; + throw new CalciteSemanticException("Distinct without an aggregation.", + UnsupportedFeature.Distinct_without_an_aggreggation); } - } + else { + // Case when this is an expression + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + if (!qbp.getDestToGroupBy().isEmpty()) { + // Special handling of grouping function + expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr, + !cubeRollupGrpSetPresent); + } + ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); + String recommended = recommendName(exp, colAlias); + if (recommended != null && out_rwsch.get(null, recommended) == null) { + colAlias = recommended; + } + col_list.add(exp); + + ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), + exp.getWritableObjectInspector(), tabAlias, false); + colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) + .isSkewedCol() : false); + if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + + colAlias + " => " + colInfo + " due to duplication, see previous warnings", + UnsupportedFeature.Duplicates_in_RR); + } - pos = Integer.valueOf(pos.intValue() + 1); - } + if (exp instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; + String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); + if (altMapping != null) { + // TODO: this can overwrite the mapping. Should this be allowed? + out_rwsch.put(altMapping[0], altMapping[1], colInfo); + } + } + + pos = Integer.valueOf(pos.intValue() + 1); + } + } } selectStar = selectStar && exprList.getChildCount() == posn + 1; // 7. Convert Hive projections to Calcite List<RexNode> calciteColLst = new ArrayList<RexNode>(); + RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), - buildHiveColNameToInputPosMap(col_list, inputRR), 0, false); + outerNameToPosMap, buildHiveColNameToInputPosMap(col_list, inputRR), relToHiveRR.get(srcRel), + outerRR, 0, false, subqueryId); for (ExprNodeDesc colExpr : col_list) { calciteColLst.add(rexNodeConv.convert(colExpr)); } @@ -3946,7 +3966,7 @@ public class CalcitePlanner extends SemanticAnalyzer { srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; // 5. Build Rel for Select Clause - selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); + selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap, outerRR); srcRel = (selectRel == null) ? srcRel : selectRel; // 6. Build Rel for OB Clause http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index f0165dd..0ec24b5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType; import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryTypeDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; public class SubQueryUtils { @@ -651,6 +652,52 @@ public class SubQueryUtils { return eq; } + + static void checkForSubqueries(ASTNode node) throws SemanticException { + // allow NOT but throw an error for rest + if(node.getType() == HiveParser.TOK_SUBQUERY_EXPR + && node.getParent().getType() != HiveParser.KW_NOT) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + "Invalid subquery. Subquery in SELECT could only be top-level expression")); + } + for(int i=0; i<node.getChildCount(); i++) { + checkForSubqueries((ASTNode)node.getChild(i)); + } + } + /* + * Given a TOK_SELECT this checks IF there is a subquery + * it is top level expression, else it throws an error + */ + public static void checkForTopLevelSubqueries(ASTNode selExprList) throws SemanticException{ + // should be either SELECT or SELECT DISTINCT + assert(selExprList.getType() == HiveParser.TOK_SELECT + || selExprList.getType() == HiveParser.TOK_SELECTDI); + for(int i=0; i<selExprList.getChildCount(); i++) { + ASTNode selExpr = (ASTNode)selExprList.getChild(i); + // could get either query hint or select expr + assert(selExpr.getType() == HiveParser.TOK_SELEXPR + || selExpr.getType() == HiveParser.QUERY_HINT); + + if(selExpr.getType() == HiveParser.QUERY_HINT) { + // skip query hints + continue; + } + + if(selExpr.getChildCount() == 1 + && selExpr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + if(selExprList.getType() == HiveParser.TOK_SELECTDI) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + "Invalid subquery. Subquery with DISTINCT clause is not supported!")); + + } + continue; //we are good since subquery is top level expression + } + // otherwise we need to make sure that there is no subquery at any level + for(int j=0; j<selExpr.getChildCount(); j++) { + checkForSubqueries((ASTNode) selExpr.getChild(j)); + } + } + } public static interface ISubQueryJoinInfo { public String getAlias(); http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 6c10704..fac60c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -504,7 +504,11 @@ public class ExprNodeDescUtils { } else if (exprDesc instanceof ExprNodeFieldDesc) { getExprNodeColumnDesc(((ExprNodeFieldDesc) exprDesc).getDesc(), hashCodeToColumnDescMap); - } + } else if( exprDesc instanceof ExprNodeSubQueryDesc) { + getExprNodeColumnDesc(((ExprNodeSubQueryDesc) exprDesc).getSubQueryLhs(), + hashCodeToColumnDescMap); + } + } public static boolean isConstant(ExprNodeDesc value) { http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_in_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_in_select.q b/ql/src/test/queries/clientnegative/subquery_in_select.q deleted file mode 100644 index 1365389..0000000 --- a/ql/src/test/queries/clientnegative/subquery_in_select.q +++ /dev/null @@ -1,6 +0,0 @@ - - - -select src.key in (select key from src s1 where s1.key > '9') -from src -; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_aggregate.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_select_aggregate.q b/ql/src/test/queries/clientnegative/subquery_select_aggregate.q new file mode 100644 index 0000000..8da8521 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_select_aggregate.q @@ -0,0 +1,2 @@ +-- subqueries in UDFs are not allowed +explain SELECT count((SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)) from part; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q b/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q new file mode 100644 index 0000000..8e6cf66 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q @@ -0,0 +1,3 @@ + +-- since subquery is not top level expression this should throw an error +explain SELECT p_size, 1+(SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) from part; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_distinct.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_select_distinct.q b/ql/src/test/queries/clientnegative/subquery_select_distinct.q new file mode 100644 index 0000000..5682ae7 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_select_distinct.q @@ -0,0 +1,2 @@ +-- subqueries in UDFs are not allowed +explain SELECT distinct (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) from part; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_distinct2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_select_distinct2.q b/ql/src/test/queries/clientnegative/subquery_select_distinct2.q new file mode 100644 index 0000000..90c0588 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_select_distinct2.q @@ -0,0 +1,2 @@ +-- subqueries in UDFs are not allowed +explain SELECT distinct p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) from part; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_udf.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_select_udf.q b/ql/src/test/queries/clientnegative/subquery_select_udf.q new file mode 100644 index 0000000..a52a5a9 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_select_udf.q @@ -0,0 +1,2 @@ +-- subqueries in UDFs are not allowed +explain SELECT p_size, exp((SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)) from part; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientpositive/subquery_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_select.q b/ql/src/test/queries/clientpositive/subquery_select.q new file mode 100644 index 0000000..cb7d7aa --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_select.q @@ -0,0 +1,133 @@ +-- following tests test queries in SELECT +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; + +-- IN, non corr +explain SELECT p_size, p_size IN ( + SELECT MAX(p_size) FROM part) +FROM part; + +SELECT p_size, p_size IN ( + SELECT MAX(p_size) FROM part) +FROM part ; + +-- IN, corr +EXPLAIN SELECT p_size, p_size IN ( + SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) +FROM part; + +SELECT p_size, p_size IN ( + SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) +FROM part; + +-- NOT IN, non corr +explain SELECT p_size, p_size NOT IN ( + SELECT MAX(p_size) FROM part) +FROM part; + +SELECT p_size, p_size NOT IN ( + SELECT MAX(p_size) FROM part) +FROM part ; + +-- NOT IN, corr +EXPLAIN SELECT p_size, p_size NOT IN ( + SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) +FROM part; + +SELECT p_size, p_size NOT IN ( + SELECT MAX(p_size) FROM part p where p.p_type = part.p_type) +FROM part; + +-- EXISTS, non corr +explain SELECT p_size, EXISTS(SELECT p_size FROM part) +FROM part; + +SELECT p_size, EXISTS(SELECT p_size FROM part) +FROM part; + +-- EXISTS, corr +explain SELECT p_size, EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type) +FROM part; + +SELECT p_size, EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type) +FROM part; + +-- NOT EXISTS, non corr +explain SELECT p_size, NOT EXISTS(SELECT p_size FROM part) +FROM part; + +SELECT p_size, NOT EXISTS(SELECT p_size FROM part) +FROM part; + +-- NOT EXISTS, corr +explain SELECT p_size, NOT EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type) +FROM part; + +SELECT p_size, NOT EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type) +FROM part; + +-- SCALAR, corr +explain SELECT p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) +FROM part; + +SELECT p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) +FROM part; + +-- SCALAR, non corr +explain SELECT p_size, (SELECT max(p_size) FROM part) + FROM part; + +SELECT p_size, (SELECT max(p_size) FROM part) + FROM part; + +-- IN, corr with scalar +explain +select * +from src b +where b.key in + (select (select max(key) from src) + from src a + where b.value = a.value and a.key > '9' + ); +select * +from src b +where b.key in + (select (select max(key) from src) + from src a + where b.value = a.value and a.key > '9' + ); + +-- corr within corr..correcionnn.. +explain +select * +from src b +where b.key in + (select (select max(key) from src sc where sc.value = a.value) + from src a + where b.value = a.value and a.key > '9' + ); + +select * +from src b +where b.key in + (select (select max(key) from src sc where sc.value = a.value) + from src a + where b.value = a.value and a.key > '9' ); + +CREATE table tnull(i int); +insert into tnull values(null); + +-- IN query returns unknown/NULL instead of true/false +explain select p_size, p_size IN (select i from tnull) from part; +select p_size, p_size IN (select i from tnull) from part; + +CREATE TABLE tempty(i int); + +explain select p_size, (select count(*) from tempty) from part; +select p_size, (select count(*) from tempty) from part; + +explain select p_size, (select max(i) from tempty) from part; +select p_size, (select max(i) from tempty) from part; + +DROP table tempty; +DROP table tnull; http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_in_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_in_select.q.out b/ql/src/test/results/clientnegative/subquery_in_select.q.out index 5c69690..a8e1b99 100644 --- a/ql/src/test/results/clientnegative/subquery_in_select.q.out +++ b/ql/src/test/results/clientnegative/subquery_in_select.q.out @@ -1 +1 @@ -FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level expression http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out b/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out new file mode 100644 index 0000000..a8e1b99 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level expression http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_select_distinct.q.out b/ql/src/test/results/clientnegative/subquery_select_distinct.q.out new file mode 100644 index 0000000..51c1212 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_select_distinct.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported! http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out b/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out new file mode 100644 index 0000000..51c1212 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported! http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_select_udf.q.out b/ql/src/test/results/clientnegative/subquery_select_udf.q.out new file mode 100644 index 0000000..a8e1b99 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_select_udf.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level expression
