HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e Branch: refs/heads/llap Commit: af91308e5b6573ea6dc793912bcc628a5a40c000 Parents: 22fa921 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Sat Aug 29 11:40:03 2015 +0200 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Sat Aug 29 11:42:59 2015 +0200 ---------------------------------------------------------------------- .../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 ++++++++++++++------ .../hadoop/hive/ql/lib/ForwardWalker.java | 33 ++++---- .../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +- .../hive/ql/optimizer/ConstantPropagate.java | 10 +-- 4 files changed, 79 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java index 583c113..07d2734 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java @@ -22,7 +22,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.IdentityHashMap; +import java.util.LinkedList; import java.util.List; +import java.util.Queue; import java.util.Set; import java.util.Stack; @@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; */ public class DefaultGraphWalker implements GraphWalker { - protected Stack<Node> opStack; + /** + * opStack keeps the nodes that have been visited, but have not been + * dispatched yet + */ + protected final Stack<Node> opStack; + /** + * opQueue keeps the nodes in the order that the were dispatched. + * Then it is used to go through the processed nodes and store + * the results that the dispatcher has produced (if any) + */ + protected final Queue<Node> opQueue; + /** + * toWalk stores the starting nodes for the graph that needs to be + * traversed + */ protected final List<Node> toWalk = new ArrayList<Node>(); protected final IdentityHashMap<Node, Object> retMap = new IdentityHashMap<Node, Object>(); protected final Dispatcher dispatcher; @@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker { public DefaultGraphWalker(Dispatcher disp) { dispatcher = disp; opStack = new Stack<Node>(); - } - - /** - * @return the toWalk - */ - public List<Node> getToWalk() { - return toWalk; + opQueue = new LinkedList<Node>(); } /** @@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker { while (toWalk.size() > 0) { Node nd = toWalk.remove(0); walk(nd); + // Some walkers extending DefaultGraphWalker e.g. ForwardWalker + // do not use opQueue and rely uniquely in the toWalk structure, + // thus we store the results produced by the dispatcher here + // TODO: rewriting the logic of those walkers to use opQueue if (nodeOutput != null && getDispatchedList().contains(nd)) { nodeOutput.put(nd, retMap.get(nd)); } } + + // Store the results produced by the dispatcher + while (!opQueue.isEmpty()) { + Node node = opQueue.poll(); + if (nodeOutput != null && getDispatchedList().contains(node)) { + nodeOutput.put(node, retMap.get(node)); + } + } } /** @@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker { * current operator in the graph * @throws SemanticException */ - public void walk(Node nd) throws SemanticException { - if (opStack.empty() || nd != opStack.peek()) { - opStack.push(nd); - } + public void walk(Node nd) throws SemanticException { + // Push the node in the stack + opStack.push(nd); + + // While there are still nodes to dispatch... + while (!opStack.empty()) { + Node node = opStack.peek(); - if ((nd.getChildren() == null) - || getDispatchedList().containsAll(nd.getChildren())) { - // all children are done or no need to walk the children - if (!getDispatchedList().contains(nd)) { - dispatch(nd, opStack); + if (node.getChildren() == null || + getDispatchedList().containsAll(node.getChildren())) { + // Dispatch current node + if (!getDispatchedList().contains(node)) { + dispatch(node, opStack); + opQueue.add(node); + } + opStack.pop(); + continue; } - opStack.pop(); - return; - } - // add children, self to the front of the queue in that order - getToWalk().add(0, nd); - getToWalk().removeAll(nd.getChildren()); - getToWalk().addAll(0, nd.getChildren()); + + // Add a single child and restart the loop + for (Node childNode : node.getChildren()) { + if (!getDispatchedList().contains(childNode)) { + opStack.push(childNode); + break; + } + } + } // end while } + } http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java index a2db3b5..67b4700 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java @@ -19,20 +19,17 @@ package org.apache.hadoop.hive.ql.lib; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; public class ForwardWalker extends DefaultGraphWalker { /** -* Constructor. -* -* @param disp -* dispatcher to call for each op encountered -*/ + * Constructor. + * + * @param disp + * dispatcher to call for each op encountered + */ public ForwardWalker(Dispatcher disp) { super(disp); } @@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker { @SuppressWarnings("unchecked") protected void addAllParents(Node nd) { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; - getToWalk().removeAll(op.getParentOperators()); - getToWalk().addAll(0, op.getParentOperators()); + toWalk.removeAll(op.getParentOperators()); + toWalk.addAll(0, op.getParentOperators()); } /** -* walk the current operator and its descendants. -* -* @param nd -* current operator in the graph -* @throws SemanticException -*/ + * walk the current operator and its descendants. + * + * @param nd + * current operator in the graph + * @throws SemanticException + */ @Override public void walk(Node nd) throws SemanticException { if (opStack.empty() || nd != opStack.peek()) { @@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker { if (allParentsDispatched(nd)) { // all children are done or no need to walk the children if (!getDispatchedList().contains(nd)) { - getToWalk().addAll(nd.getChildren()); + toWalk.addAll(nd.getChildren()); dispatch(nd, opStack); } opStack.pop(); return; } // add children, self to the front of the queue in that order - getToWalk().add(0, nd); + toWalk.add(0, nd); addAllParents(nd); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java index 9a45458..735b448 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java @@ -174,10 +174,10 @@ public class ColumnPruner implements Transform { return; } // move all the children to the front of queue - getToWalk().removeAll(nd.getChildren()); - getToWalk().addAll(0, nd.getChildren()); + toWalk.removeAll(nd.getChildren()); + toWalk.addAll(0, nd.getChildren()); // add self to the end of the queue - getToWalk().add(nd); + toWalk.add(nd); opStack.pop(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java index dd53ced..b6f1f27 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java @@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform { dispatch(nd, opStack); opStack.pop(); } else { - getToWalk().removeAll(parents); - getToWalk().add(0, nd); - getToWalk().addAll(0, parents); + toWalk.removeAll(parents); + toWalk.add(0, nd); + toWalk.addAll(0, parents); return; } // move all the children to the front of queue List<? extends Node> children = nd.getChildren(); if (children != null) { - getToWalk().removeAll(children); - getToWalk().addAll(children); + toWalk.removeAll(children); + toWalk.addAll(children); } } }