Repository: hive
Updated Branches:
  refs/heads/master 4ad4ceb66 -> a10bd8cae


HIVE-11297: Combine op trees for partition info generating tasks (Liyun Zhang 
reviewed by Chao Sun)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a10bd8ca
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a10bd8ca
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a10bd8ca

Branch: refs/heads/master
Commit: a10bd8caeb4609854ef579a48e3691cae3f145f9
Parents: 4ad4ceb
Author: Ferdinand Xu <cheng.a...@intel.com>
Authored: Mon Jun 26 10:48:21 2017 +0800
Committer: Ferdinand Xu <cheng.a...@intel.com>
Committed: Mon Jun 26 10:48:21 2017 +0800

----------------------------------------------------------------------
 .../hive/ql/parse/spark/SplitOpTreeForDPP.java  | 120 ++--
 .../spark/spark_dynamic_partition_pruning.q.out | 638 +++++++++----------
 2 files changed, 390 insertions(+), 368 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a10bd8ca/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SplitOpTreeForDPP.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SplitOpTreeForDPP.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SplitOpTreeForDPP.java
index d4f58be..1348d8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SplitOpTreeForDPP.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SplitOpTreeForDPP.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.parse.spark;
 
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
@@ -36,63 +37,72 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
-import com.google.common.base.Preconditions;
 
 /**
  * This processor triggers on SparkPartitionPruningSinkOperator. For a 
operator tree like
  * this:
  *
  * Original Tree:
- *     TS    TS
- *      |     |
- *     FIL   FIL
- *      |     | \
- *     RS     RS SEL
- *       \   /    |
- *        JOIN   GBY
- *                |
- *               SPARKPRUNINGSINK
+ *     TS1       TS2
+ *      |          |
+ *      FIL       FIL
+ *      |          |
+ *      RS      /   \   \
+ *      |      |    \    \
+ *      |     RS  SEL  SEL
+ *      \   /      |     |
+ *      JOIN      GBY   GBY
+ *                  |    |
+ *                  |  SPARKPRUNINGSINK
+ *                  |
+ *              SPARKPRUNINGSINK
  *
  * It removes the branch containing SPARKPRUNINGSINK from the original 
operator tree, and splits it into
  * two separate trees:
- *
- * Tree #1:                 Tree #2:
- *     TS    TS               TS
- *      |     |                |
- *     FIL   FIL              FIL
- *      |     |                |
- *     RS     RS              SEL
- *       \   /                 |
- *       JOIN                 GBY
- *                             |
- *                            SPARKPRUNINGSINK
- *
+ * Tree #1:                       Tree #2
+ *      TS1    TS2                 TS2
+ *      |      |                    |
+ *      FIL    FIL                 FIL
+ *      |       |                   |_____
+ *      RS     SEL                  |     \
+ *      |       |                   SEL    SEL
+ *      |     RS                    |      |
+ *      \   /                       GBY    GBY
+ *      JOIN                        |      |
+ *                                  |    SPARKPRUNINGSINK
+ *                                 SPARKPRUNINGSINK
+
  * For MapJoinOperator, this optimizer will not do anything - it should be 
executed within
  * the same SparkTask.
  */
 public class SplitOpTreeForDPP implements NodeProcessor {
+
   @Override
   public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
                         Object... nodeOutputs) throws SemanticException {
     SparkPartitionPruningSinkOperator pruningSinkOp = 
(SparkPartitionPruningSinkOperator) nd;
     GenSparkProcContext context = (GenSparkProcContext) procCtx;
 
+    for (Operator<?> op : context.pruningSinkSet) {
+      if (pruningSinkOp.getOperatorId().equals(op.getOperatorId())) {
+        return null;
+      }
+    }
+
     // Locate the op where the branch starts
     // This is guaranteed to succeed since the branch always follow the pattern
     // as shown in the first picture above.
-    Operator<?> filterOp = pruningSinkOp;
-    Operator<?> selOp = null;
-    while (filterOp != null) {
-      if (filterOp.getNumChild() > 1) {
+    Operator<?> branchingOp = pruningSinkOp;
+    while (branchingOp != null) {
+      if (branchingOp.getNumChild() > 1) {
         break;
       } else {
-        selOp = filterOp;
-        filterOp = filterOp.getParentOperators().get(0);
+        branchingOp = branchingOp.getParentOperators().get(0);
       }
     }
 
     // Check if this is a MapJoin. If so, do not split.
-    for (Operator<?> childOp : filterOp.getChildOperators()) {
+    for (Operator<?> childOp : branchingOp.getChildOperators()) {
       if (childOp instanceof ReduceSinkOperator &&
           childOp.getChildOperators().get(0) instanceof MapJoinOperator) {
         context.pruningSinkSet.add(pruningSinkOp);
@@ -103,8 +113,10 @@ public class SplitOpTreeForDPP implements NodeProcessor {
     List<Operator<?>> roots = new LinkedList<Operator<?>>();
     collectRoots(roots, pruningSinkOp);
 
-    List<Operator<?>> savedChildOps = filterOp.getChildOperators();
-    filterOp.setChildOperators(Utilities.makeList(selOp));
+    List<Operator<?>> savedChildOps = branchingOp.getChildOperators();
+    List<Operator<?>> firstNodesOfPruningBranch = 
findFirstNodesOfPruningBranch(branchingOp);
+    
branchingOp.setChildOperators(Utilities.makeList(firstNodesOfPruningBranch.toArray(new
+        Operator<?>[firstNodesOfPruningBranch.size()])));
 
     // Now clone the tree above selOp
     List<Operator<?>> newRoots = 
SerializationUtilities.cloneOperatorTree(roots);
@@ -115,27 +127,49 @@ public class SplitOpTreeForDPP implements NodeProcessor {
     }
     context.clonedPruningTableScanSet.addAll(newRoots);
 
+    //Find all pruningSinkSet in old roots
+    List<Operator<?>> oldsinkList = new ArrayList<>();
+    for (Operator<?> root : roots) {
+      SparkUtilities.collectOp(oldsinkList, root, 
SparkPartitionPruningSinkOperator.class);
+    }
+
     // Restore broken links between operators, and remove the branch from the 
original tree
-    filterOp.setChildOperators(savedChildOps);
-    filterOp.removeChild(selOp);
+    branchingOp.setChildOperators(savedChildOps);
+    for (Operator selOp : firstNodesOfPruningBranch) {
+      branchingOp.removeChild(selOp);
+    }
 
-    // Find the cloned PruningSink and add it to pruningSinkSet
-    Set<Operator<?>> sinkSet = new HashSet<Operator<?>>();
+    //Find all pruningSinkSet in new roots
+    Set<Operator<?>> sinkSet = new HashSet<>();
     for (Operator<?> root : newRoots) {
       SparkUtilities.collectOp(sinkSet, root, 
SparkPartitionPruningSinkOperator.class);
     }
-    Preconditions.checkArgument(sinkSet.size() == 1,
-        "AssertionError: expected to only contain one 
SparkPartitionPruningSinkOperator," +
-            " but found " + sinkSet.size());
-    SparkPartitionPruningSinkOperator clonedPruningSinkOp =
-        (SparkPartitionPruningSinkOperator) sinkSet.iterator().next();
-    
clonedPruningSinkOp.getConf().setTableScan(pruningSinkOp.getConf().getTableScan());
-    context.pruningSinkSet.add(clonedPruningSinkOp);
 
+    int i = 0;
+    for (Operator<?> clonedPruningSinkOp : sinkSet) {
+      SparkPartitionPruningSinkOperator oldsinkOp = 
(SparkPartitionPruningSinkOperator) oldsinkList.get(i++);
+      ((SparkPartitionPruningSinkOperator) 
clonedPruningSinkOp).getConf().setTableScan(oldsinkOp.getConf().getTableScan());
+      context.pruningSinkSet.add(clonedPruningSinkOp);
+
+    }
     return null;
   }
 
-  /**
+  //find operators which are the children of specified filterOp and there are 
SparkPartitionPruningSink in these
+  //branches.
+  private List<Operator<?>> findFirstNodesOfPruningBranch(Operator<?> 
branchingOp) {
+    List<Operator<?>> res = new ArrayList<>();
+    for (Operator child : branchingOp.getChildOperators()) {
+      List<Operator<?>> pruningList = new ArrayList<>();
+      SparkUtilities.collectOp(pruningList, child, 
SparkPartitionPruningSinkOperator.class);
+      if (pruningList.size() > 0) {
+        res.add(child);
+      }
+    }
+    return res;
+  }
+
+    /**
    * Recursively collect all roots (e.g., table scans) that can be reached via 
this op.
    * @param result contains all roots can be reached via op
    * @param op the op to examine.

http://git-wip-us.apache.org/repos/asf/hive/blob/a10bd8ca/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index fc6edb4..4448cbf 100644
--- 
a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -1,6 +1,4 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
-
-select distinct ds from srcpart
+PREHOOK: query: select distinct ds from srcpart
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -8,9 +6,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 #### A masked pattern was here ####
-POSTHOOK: query: -- SORT_QUERY_RESULTS
-
-select distinct ds from srcpart
+POSTHOOK: query: select distinct ds from srcpart
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -194,11 +190,9 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@srcpart_double_hour
 POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
-PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -452,12 +446,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
-PREHOOK: query: -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -818,11 +810,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 500
-PREHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour 
on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) 
where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -861,19 +851,6 @@ STAGE PLANS:
                             Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
                             target column name: ds
                             target work: Map 1
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: srcpart_date_hour
-                  filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 
11.0) and ds is not null and hr is not null) (type: boolean)
-                  Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 
11.0) and ds is not null and hr is not null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ds (type: string), hr (type: string)
-                      outputColumnNames: _col0, _col2
-                      Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
                       Select Operator
                         expressions: _col2 (type: string)
                         outputColumnNames: _col0
@@ -1102,11 +1079,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 500
-PREHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -1360,11 +1335,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 0
-PREHOOK: query: -- expressions
-EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- expressions
-EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -2015,11 +1988,9 @@ POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
 Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
2' is a cross product
-PREHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds 
as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = 
'2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, 
ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` 
= '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2138,11 +2109,9 @@ POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
 Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' 
is a cross product
-PREHOOK: query: -- non-equi join
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
(srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and 
(srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
(srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and 
(srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
 PREHOOK: type: QUERY
-POSTHOOK: query: -- non-equi join
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
(srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and 
(srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
(srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and 
(srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2251,11 +2220,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 POSTHOOK: Input: default@srcpart_date_hour
 #### A masked pattern was here ####
 1500
-PREHOOK: query: -- old style join syntax
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and 
srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and 
srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
 PREHOOK: type: QUERY
-POSTHOOK: query: -- old style join syntax
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and 
srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and 
srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -2294,19 +2261,6 @@ STAGE PLANS:
                             Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
                             target column name: ds
                             target work: Map 1
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: srcpart_date_hour
-                  filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 
11.0) and ds is not null and hr is not null) (type: boolean)
-                  Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 
11.0) and ds is not null and hr is not null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ds (type: string), hr (type: string)
-                      outputColumnNames: _col0, _col2
-                      Statistics: Num rows: 1 Data size: 27 Basic stats: 
COMPLETE Column stats: NONE
                       Select Operator
                         expressions: _col2 (type: string)
                         outputColumnNames: _col0
@@ -2419,11 +2373,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 POSTHOOK: Input: default@srcpart_date_hour
 #### A masked pattern was here ####
 500
-PREHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date 
on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -2627,7 +2579,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join0 to 1
+                     Left Outer Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -2662,11 +2614,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join 
srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 
'2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join 
srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 
'2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -2749,7 +2699,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Right Outer Join0 to 1
+                     Right Outer Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -2784,12 +2734,10 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- with static pruning
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and 
srcpart.hr = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- with static pruning
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and 
srcpart.hr = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -2829,6 +2777,33 @@ STAGE PLANS:
                             Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
                             target column name: ds
                             target work: Map 1
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_hour
+                  filterExpr: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) 
= 11.0)) (type: boolean)
+                  Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: ((UDFToDouble(hour) = 11.0) and 
(UDFToDouble(hr) = 11.0)) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+                    Select Operator
+                      expressions: hr (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col0 (type: string)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Spark Partition Pruning Sink Operator
+                            partition key expr: hr
+                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            target column name: hr
+                            target work: Map 1
 
   Stage: Stage-1
     Spark
@@ -3105,11 +3080,9 @@ POSTHOOK: Input: default@srcpart_date
 POSTHOOK: Input: default@srcpart_hour
 #### A masked pattern was here ####
 0
-PREHOOK: query: -- union + subquery
-EXPLAIN select count(*) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart)
+PREHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
 PREHOOK: type: QUERY
-POSTHOOK: query: -- union + subquery
-EXPLAIN select count(*) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart)
+POSTHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -3168,16 +3141,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 11 
             Reduce Operator Tree:
               Group By Operator
@@ -3206,16 +3182,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-1
     Spark
@@ -3231,6 +3210,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
+                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ds (type: string)
@@ -3316,16 +3296,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 6 
             Reduce Operator Tree:
               Group By Operator
@@ -3345,16 +3328,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -3440,16 +3426,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 11 
             Reduce Operator Tree:
               Group By Operator
@@ -3478,16 +3467,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-1
     Spark
@@ -3503,6 +3495,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
+                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ds (type: string)
@@ -3590,16 +3583,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 6 
             Reduce Operator Tree:
               Group By Operator
@@ -3619,16 +3615,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -3667,13 +3666,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 12 <- Map 11 (GROUP, 1)
-        Reducer 13 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2)
-        Reducer 15 <- Map 14 (GROUP, 1)
-        Reducer 18 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2)
+        Reducer 11 <- Map 10 (GROUP, 1)
+        Reducer 13 <- Map 12 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 11 
+        Map 10 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3691,7 +3688,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string)
-        Map 14 
+        Map 12 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3709,97 +3706,107 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string)
-        Reducer 12 
+        Reducer 11 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 13 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string)
-                  outputColumnNames: _col0
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    Spark Partition Pruning Sink Operator
-                      partition key expr: ds
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      target column name: ds
-                      target work: Map 1
-        Reducer 15 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 18 
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        Spark Partition Pruning Sink Operator
+                          partition key expr: ds
+                          Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                          target column name: ds
+                          target work: Map 4
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        Spark Partition Pruning Sink Operator
+                          partition key expr: ds
+                          Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                          target column name: ds
+                          target work: Map 1
+        Reducer 13 
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                aggregations: min(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string)
-                  outputColumnNames: _col0
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
                   Group By Operator
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    Spark Partition Pruning Sink Operator
-                      partition key expr: ds
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      target column name: ds
-                      target work: Map 4
+                    Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        Spark Partition Pruning Sink Operator
+                          partition key expr: ds
+                          Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                          target column name: ds
+                          target work: Map 4
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                        Spark Partition Pruning Sink Operator
+                          partition key expr: ds
+                          Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                          target column name: ds
+                          target work: Map 1
 
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 10 <- Map 9 (GROUP, 1)
         Reducer 2 <- Map 1 (GROUP, 2)
-        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 
(PARTITION-LEVEL SORT, 2)
         Reducer 7 <- Map 6 (GROUP, 1)
-        Reducer 8 <- Reducer 10 (GROUP, 2), Reducer 7 (GROUP, 2)
+        Reducer 9 <- Map 8 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
+                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                   Group By Operator
                     keys: ds (type: string)
@@ -3829,7 +3836,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string)
-        Map 9 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: srcpart
@@ -3847,23 +3854,6 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string)
-        Reducer 10 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -3880,7 +3870,7 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Inner Join 0 to 1
+                     Left Semi Join 0 to 1
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
@@ -3900,28 +3890,39 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 8 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 9 
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                aggregations: min(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -3949,11 +3950,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 2008-04-08
 2008-04-09
 2008-04-09
-PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -4084,12 +4083,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
-PREHOOK: query: -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -4267,11 +4264,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 500
-PREHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour 
on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) 
where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -4415,11 +4410,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 500
-PREHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -4522,17 +4515,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
-
--- expressions
-EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
-
--- expressions
-EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -4788,11 +4773,9 @@ POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
 Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
-PREHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds 
as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = 
'2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, 
ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` 
= '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -4918,11 +4901,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
-PREHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date 
on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -5109,7 +5090,7 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
-                             Left Outer Join0 to 1
+                             Left Outer Join 0 to 1
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
@@ -5148,11 +5129,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join 
srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 
'2008-04-08'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join 
srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 
'2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -5234,7 +5213,7 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
-                             Right Outer Join0 to 1
+                             Right Outer Join 0 to 1
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
@@ -5273,12 +5252,10 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- with static pruning
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and 
srcpart.hr = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: -- with static pruning
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and 
srcpart.hr = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -5341,6 +5318,20 @@ STAGE PLANS:
                         keys:
                           0 _col1 (type: string)
                           1 _col0 (type: string)
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col0 (type: string)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                          Spark Partition Pruning Sink Operator
+                            partition key expr: hr
+                            Statistics: Num rows: 1 Data size: 5 Basic stats: 
COMPLETE Column stats: NONE
+                            target column name: hr
+                            target work: Map 1
             Local Work:
               Map Reduce Local Work
 
@@ -5560,19 +5551,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
--- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
-
--- union + subquery
-EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart)
+PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
 PREHOOK: type: QUERY
-POSTHOOK: query: -- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
--- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
-
--- union + subquery
-EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart)
+POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -5631,16 +5612,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 5 
             Local Work:
               Map Reduce Local Work
@@ -5675,16 +5659,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-1
     Spark
@@ -5696,6 +5683,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcpart
+                  filterExpr: ds is not null (type: boolean)
                   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ds (type: string)

Reply via email to