Author: hashutosh Date: Mon Oct 27 22:53:37 2014 New Revision: 1634722 URL: http://svn.apache.org/r1634722 Log: HIVE-8598 : Push constant filters through joins (Ashutosh Chauhan via Harish Butani)
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java hive/branches/branch-0.14/ql/src/test/queries/clientpositive/optimize_nullscan.q hive/branches/branch-0.14/ql/src/test/results/clientpositive/optimize_nullscan.q.out hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1634722&r1=1634721&r2=1634722&view=diff ============================================================================== --- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original) +++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Mon Oct 27 22:53:37 2014 @@ -149,17 +149,17 @@ public final class OpProcFactory { } } - + public static class PTFPPD extends ScriptPPD { - + /* * For WindowingTableFunction if: - * a. there is a Rank/DenseRank function: if there are unpushedPred of the form + * a. there is a Rank/DenseRank function: if there are unpushedPred of the form * rnkValue < Constant; then use the smallest Constant val as the 'rankLimit' * on the WindowingTablFn. - * b. If there are no Wdw Fns with an End Boundary past the current row, the + * b. If there are no Wdw Fns with an End Boundary past the current row, the * condition can be pushed down as a limit pushdown(mapGroupBy=true) - * + * * (non-Javadoc) * @see org.apache.hadoop.hive.ql.ppd.OpProcFactory.ScriptPPD#process(org.apache.hadoop.hive.ql.lib.Node, java.util.Stack, org.apache.hadoop.hive.ql.lib.NodeProcessorCtx, java.lang.Object[]) */ @@ -170,30 +170,30 @@ public final class OpProcFactory { + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; PTFOperator ptfOp = (PTFOperator) nd; - + pushRankLimit(ptfOp, owi); return super.process(nd, stack, procCtx, nodeOutputs); } - + private void pushRankLimit(PTFOperator ptfOp, OpWalkerInfo owi) throws SemanticException { PTFDesc conf = ptfOp.getConf(); - + if ( !conf.forWindowing() ) { return; } - + float threshold = owi.getParseContext().getConf().getFloatVar(HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE); if (threshold <= 0 || threshold >= 1) { return; } - + WindowTableFunctionDef wTFn = (WindowTableFunctionDef) conf.getFuncDef(); List<Integer> rFnIdxs = rankingFunctions(wTFn); - + if ( rFnIdxs.size() == 0 ) { return; } - + ExprWalkerInfo childInfo = getChildWalkerInfo(ptfOp, owi); if (childInfo == null) { @@ -207,7 +207,7 @@ public final class OpProcFactory { preds = ExprNodeDescUtils.split(pred, preds); } } - + int rLimit = -1; int fnIdx = -1; for(ExprNodeDesc pred : preds) { @@ -219,7 +219,7 @@ public final class OpProcFactory { } } } - + if ( rLimit != -1 ) { wTFn.setRankLimit(rLimit); wTFn.setRankLimitFunction(fnIdx); @@ -228,68 +228,68 @@ public final class OpProcFactory { } } } - + private List<Integer> rankingFunctions(WindowTableFunctionDef wTFn) { List<Integer> rFns = new ArrayList<Integer>(); for(int i=0; i < wTFn.getWindowFunctions().size(); i++ ) { WindowFunctionDef wFnDef = wTFn.getWindowFunctions().get(i); - if ( (wFnDef.getWFnEval() instanceof GenericUDAFRankEvaluator) || + if ( (wFnDef.getWFnEval() instanceof GenericUDAFRankEvaluator) || (wFnDef.getWFnEval() instanceof GenericUDAFDenseRankEvaluator ) ) { rFns.add(i); } } return rFns; } - + /* * For a predicate check if it is a candidate for pushing down as limit optimization. * The expression must be of the form rankFn <|<= constant. */ private int[] getLimit(WindowTableFunctionDef wTFn, List<Integer> rFnIdxs, ExprNodeDesc expr) { - + if ( !(expr instanceof ExprNodeGenericFuncDesc) ) { return null; } - + ExprNodeGenericFuncDesc fExpr = (ExprNodeGenericFuncDesc) expr; - - if ( !(fExpr.getGenericUDF() instanceof GenericUDFOPLessThan) && + + if ( !(fExpr.getGenericUDF() instanceof GenericUDFOPLessThan) && !(fExpr.getGenericUDF() instanceof GenericUDFOPEqualOrLessThan) ) { return null; } - + if ( !(fExpr.getChildren().get(0) instanceof ExprNodeColumnDesc) ) { return null; } - + if ( !(fExpr.getChildren().get(1) instanceof ExprNodeConstantDesc) ) { return null; } - + ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) fExpr.getChildren().get(1) ; - + if ( constantExpr.getTypeInfo() != TypeInfoFactory.intTypeInfo ) { return null; } - + int limit = (Integer) constantExpr.getValue(); if ( fExpr.getGenericUDF() instanceof GenericUDFOPEqualOrLessThan ) { limit = limit + 1; } String colName = ((ExprNodeColumnDesc)fExpr.getChildren().get(0)).getColumn(); - + for(int i=0; i < rFnIdxs.size(); i++ ) { String fAlias = wTFn.getWindowFunctions().get(i).getAlias(); if ( fAlias.equals(colName)) { return new int[] {limit,i}; } } - + return null; } - + /* - * Limit can be pushed down to Map-side if all Window Functions need access + * Limit can be pushed down to Map-side if all Window Functions need access * to rows before the current row. This is true for: * 1. Rank, DenseRank and Lead Fns. (the window doesn't matter for lead fn). * 2. If the Window for the function is Row based and the End Boundary doesn't @@ -298,8 +298,8 @@ public final class OpProcFactory { private boolean canPushLimitToReduceSink(WindowTableFunctionDef wTFn) { for(WindowFunctionDef wFnDef : wTFn.getWindowFunctions() ) { - if ( (wFnDef.getWFnEval() instanceof GenericUDAFRankEvaluator) || - (wFnDef.getWFnEval() instanceof GenericUDAFDenseRankEvaluator ) || + if ( (wFnDef.getWFnEval() instanceof GenericUDAFRankEvaluator) || + (wFnDef.getWFnEval() instanceof GenericUDAFDenseRankEvaluator ) || (wFnDef.getWFnEval() instanceof GenericUDAFLeadEvaluator ) ) { continue; } @@ -314,18 +314,18 @@ public final class OpProcFactory { } return true; } - + private void pushRankLimitToRedSink(PTFOperator ptfOp, HiveConf conf, int rLimit) throws SemanticException { - + Operator<? extends OperatorDesc> parent = ptfOp.getParentOperators().get(0); Operator<? extends OperatorDesc> gP = parent == null ? null : parent.getParentOperators().get(0); - + if ( gP == null || !(gP instanceof ReduceSinkOperator )) { return; } - + float threshold = conf.getFloatVar(HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE); - + ReduceSinkOperator rSink = (ReduceSinkOperator) gP; ReduceSinkDesc rDesc = rSink.getConf(); rDesc.setTopN(rLimit); @@ -543,7 +543,7 @@ public final class OpProcFactory { private void applyFilterTransitivity(JoinOperator nd, OpWalkerInfo owi) throws SemanticException { ExprWalkerInfo prunePreds = - owi.getPrunedPreds((Operator<? extends OperatorDesc>) nd); + owi.getPrunedPreds(nd); if (prunePreds != null) { // We want to use the row resolvers of the parents of the join op // because the rowresolver refers to the output columns of an operator @@ -579,9 +579,6 @@ public final class OpProcFactory { int numColumns = eqExpressions.size(); int numEqualities = eqExpressions.get(0).size(); - // joins[i] is the join between table i and i+1 in the JoinOperator - JoinCondDesc[] joins = (nd).getConf().getConds(); - // oldFilters contains the filters to be pushed down Map<String, List<ExprNodeDesc>> oldFilters = prunePreds.getFinalCandidates(); @@ -632,10 +629,32 @@ public final class OpProcFactory { } } } + // Push where false filter transitively + Map<String,List<ExprNodeDesc>> candidates = prunePreds.getNonFinalCandidates(); + List<ExprNodeDesc> exprs; + // where false is not associated with any alias in candidates + if (null != candidates && candidates.get(null) != null && ((exprs = candidates.get(null)) != null)) { + Iterator<ExprNodeDesc> itr = exprs.iterator(); + while (itr.hasNext()) { + ExprNodeDesc expr = itr.next(); + if (expr instanceof ExprNodeConstantDesc && Boolean.FALSE.equals(((ExprNodeConstantDesc)expr).getValue())) { + // push this 'where false' expr to all aliases + for (String alias : aliasToRR.keySet()) { + List<ExprNodeDesc> pushedFilters = newFilters.get(alias); + if (null == pushedFilters) { + newFilters.put(alias, new ArrayList<ExprNodeDesc>()); + } + newFilters.get(alias).add(expr); + } + // this filter is pushed, we can remove it from non-final candidates. + itr.remove(); + } + } + } for (Entry<String, List<ExprNodeDesc>> aliasToFilters : newFilters.entrySet()){ - owi.getPrunedPreds((Operator<? extends OperatorDesc>) nd) + owi.getPrunedPreds(nd) .addPushDowns(aliasToFilters.getKey(), aliasToFilters.getValue()); } } Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/optimize_nullscan.q URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/optimize_nullscan.q?rev=1634722&r1=1634721&r2=1634722&view=diff ============================================================================== --- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/optimize_nullscan.q (original) +++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/optimize_nullscan.q Mon Oct 27 22:53:37 2014 @@ -23,3 +23,7 @@ select * from (select key from src where explain extended select * from (select key from src union all select src.key from src left outer join srcpart on src.key = srcpart.key) a where false; select * from (select key from src union all select src.key from src left outer join srcpart on src.key = srcpart.key) a where false; + +explain extended +select * from src s1, src s2 where false and s1.value = s2.value; +select * from src s1, src s2 where false and s1.value = s2.value; Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/optimize_nullscan.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/optimize_nullscan.q.out?rev=1634722&r1=1634721&r2=1634722&view=diff ============================================================================== Files hive/branches/branch-0.14/ql/src/test/results/clientpositive/optimize_nullscan.q.out (original) and hive/branches/branch-0.14/ql/src/test/results/clientpositive/optimize_nullscan.q.out Mon Oct 27 22:53:37 2014 differ Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out?rev=1634722&r1=1634721&r2=1634722&view=diff ============================================================================== Files hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out (original) and hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out Mon Oct 27 22:53:37 2014 differ