Repository: hive Updated Branches: refs/heads/master c98ee5b75 -> 2cab4ac83
HIVE-16427 : Fix multi-insert query and write qtests (Yongzhi Chen via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2cab4ac8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2cab4ac8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2cab4ac8 Branch: refs/heads/master Commit: 2cab4ac83a387c4b1eec0e1b47c9b1a4c2671ece Parents: c98ee5b Author: Yongzhi Chen <ych...@apache.org> Authored: Tue Apr 18 22:34:34 2017 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Tue Apr 18 22:34:34 2017 -0700 ---------------------------------------------------------------------- .../optimizer/physical/NullScanOptimizer.java | 58 +++++++++++++++----- .../queries/clientpositive/inputwherefalse.q | 19 +++++++ .../clientpositive/inputwherefalse.q.out | 55 +++++++++++++++++++ 3 files changed, 118 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2cab4ac8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java index 49b839a..e4dacd1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java @@ -19,7 +19,11 @@ package org.apache.hadoop.hive.ql.optimizer.physical; import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Stack; @@ -75,6 +79,32 @@ public class NullScanOptimizer implements PhysicalPlanResolver { return pctx; } + //We need to make sure that Null Operator (LIM or FIL) is present in all branches of multi-insert query before + //applying the optimization. This method does full tree traversal starting from TS and will return true only if + //it finds target Null operator on each branch. + static private boolean isNullOpPresentInAllBranches(TableScanOperator ts, Node causeOfNullNode) { + Node curNode = null; + List<? extends Node> curChd = null; + LinkedList<Node> middleNodes = new LinkedList<Node>(); + middleNodes.addLast(ts); + while (!middleNodes.isEmpty()) { + curNode = middleNodes.remove(); + curChd = curNode.getChildren(); + for (Node chd: curChd) { + if (chd.getChildren() == null || chd.getChildren().isEmpty() || chd == causeOfNullNode) { + if (chd != causeOfNullNode) { // If there is an end node that not the limit0/wherefalse.. + return false; + } + } + else { + middleNodes.addLast(chd); + } + } + + } + return true; + } + static private class WhereFalseProcessor implements NodeProcessor { @Override @@ -91,22 +121,13 @@ public class NullScanOptimizer implements PhysicalPlanResolver { return null; } - int numOfndPeers = 0; - if (filter.getParentOperators() != null) { - for (Operator<?> fParent : filter.getParentOperators()) { - if (fParent.getChildOperators() != null) { - numOfndPeers += fParent.getChildOperators().size(); - if (numOfndPeers > 1) - return null; - } - } - } - WalkerCtx ctx = (WalkerCtx) procCtx; for (Node op : stack) { if (op instanceof TableScanOperator) { - ctx.setMayBeMetadataOnly((TableScanOperator)op); - LOG.info("Found where false TableScan. " + op); + if (isNullOpPresentInAllBranches((TableScanOperator)op, filter)) { + ctx.setMayBeMetadataOnly((TableScanOperator)op); + LOG.info("Found where false TableScan. " + op); + } } } ctx.convertMetadataOnly(); @@ -120,9 +141,18 @@ public class NullScanOptimizer implements PhysicalPlanResolver { public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - if(!(((LimitOperator)nd).getConf().getLimit() == 0)) { + LimitOperator limitOp = (LimitOperator)nd; + if(!(limitOp.getConf().getLimit() == 0)) { return null; } + + HashSet<TableScanOperator> tsOps = ((WalkerCtx)procCtx).getMayBeMetadataOnlyTableScans(); + if (tsOps != null) { + for (Iterator<TableScanOperator> tsOp = tsOps.iterator(); tsOp.hasNext();) { + if (!isNullOpPresentInAllBranches(tsOp.next(),limitOp)) + tsOp.remove(); + } + } LOG.info("Found Limit 0 TableScan. " + nd); ((WalkerCtx)procCtx).convertMetadataOnly(); return null; http://git-wip-us.apache.org/repos/asf/hive/blob/2cab4ac8/ql/src/test/queries/clientpositive/inputwherefalse.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/inputwherefalse.q b/ql/src/test/queries/clientpositive/inputwherefalse.q index c9186e6..4605ac6 100644 --- a/ql/src/test/queries/clientpositive/inputwherefalse.q +++ b/ql/src/test/queries/clientpositive/inputwherefalse.q @@ -4,6 +4,22 @@ select key where key=200 limit 1 insert overwrite directory 'target/warehouse/destemp.out/dir2/' select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1; + +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir1/000000_0; +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir2/000000_0; +dfs -cat ${system:test.warehouse.dir}/destemp.out/dir3/000000_0; +dfs -rmr ${system:test.warehouse.dir}/destemp.out; + +From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' where 1=2 insert overwrite directory 'target/warehouse/destemp.out/dir3/' select key @@ -13,3 +29,6 @@ dfs -cat ${system:test.warehouse.dir}/destemp.out/dir1/000000_0; dfs -cat ${system:test.warehouse.dir}/destemp.out/dir2/000000_0; dfs -cat ${system:test.warehouse.dir}/destemp.out/dir3/000000_0; dfs -rmr ${system:test.warehouse.dir}/destemp.out; + +explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0; +select a.value from src a join src b on a.key = b.key where a.key=100 limit 0; http://git-wip-us.apache.org/repos/asf/hive/blob/2cab4ac8/ql/src/test/results/clientpositive/inputwherefalse.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/inputwherefalse.q.out b/ql/src/test/results/clientpositive/inputwherefalse.q.out index ecb7300..898fb93 100644 --- a/ql/src/test/results/clientpositive/inputwherefalse.q.out +++ b/ql/src/test/results/clientpositive/inputwherefalse.q.out @@ -4,6 +4,39 @@ select key where key=200 limit 1 insert overwrite directory 'target/warehouse/destemp.out/dir2/' select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: target/warehouse/destemp.out/dir1 +PREHOOK: Output: target/warehouse/destemp.out/dir2 +PREHOOK: Output: target/warehouse/destemp.out/dir3 +POSTHOOK: query: From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' +limit 0 +insert overwrite directory 'target/warehouse/destemp.out/dir3/' +select key +where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: target/warehouse/destemp.out/dir1 +POSTHOOK: Output: target/warehouse/destemp.out/dir2 +POSTHOOK: Output: target/warehouse/destemp.out/dir3 +200 +100 +#### A masked pattern was here #### +PREHOOK: query: From (select * from src) a +insert overwrite directory 'target/warehouse/destemp.out/dir1/' +select key +where key=200 limit 1 +insert overwrite directory 'target/warehouse/destemp.out/dir2/' +select 'header' where 1=2 insert overwrite directory 'target/warehouse/destemp.out/dir3/' select key @@ -31,3 +64,25 @@ POSTHOOK: Output: target/warehouse/destemp.out/dir3 200 100 #### A masked pattern was here #### +PREHOOK: query: explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.value from src a join src b on a.key = b.key where a.key=100 limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here ####