hive git commit: HIVE-18250: CBO gets turned off with duplicates in RR error (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Wed, 13 Dec 2017 09:29:18 -0800

Repository: hive
Updated Branches:
  refs/heads/master e86c77af5 -> 7ea263cbe



HIVE-18250: CBO gets turned off with duplicates in RR error (Jesus Camacho 
Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7ea263cb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7ea263cb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7ea263cb

Branch: refs/heads/master
Commit: 7ea263cbe20eab04a813d270fc8cddc9ad80e7dc
Parents: e86c77a
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Mon Dec 11 18:29:54 2017 -0800
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Wed Dec 13 09:27:22 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    | 164 +++++++++----------
 .../queries/clientpositive/groupby_multialias.q |   7 +
 .../results/clientpositive/complex_alias.q.out  |  63 +++----
 .../clientpositive/groupby_multialias.q.out     |  66 ++++++++
 ql/src/test/results/clientpositive/order3.q.out |  58 ++++---
 .../clientpositive/perf/spark/query19.q.out     |   8 +-
 .../clientpositive/perf/spark/query55.q.out     |  16 +-
 .../clientpositive/perf/spark/query71.q.out     |   6 +-
 .../clientpositive/perf/tez/query19.q.out       |  28 ++--
 .../clientpositive/perf/tez/query55.q.out       |  88 +++++-----
 .../clientpositive/perf/tez/query71.q.out       |  30 ++--
 11 files changed, 302 insertions(+), 232 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 20386f1..efd5f7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -3902,102 +3902,92 @@ public class CalcitePlanner extends SemanticAnalyzer {
           }
         }
 
-          Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
-          boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false,
-                  subQueryToRelNode);
-          if(isSubQuery) {
-            ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, 
relToHiveRR.get(srcRel),
-                    outerRR, subQueryToRelNode, true);
-            col_list.add(subQueryExpr);
+        Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
+        boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false,
+                subQueryToRelNode);
+        if(isSubQuery) {
+          ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, 
relToHiveRR.get(srcRel),
+                  outerRR, subQueryToRelNode, true);
+          col_list.add(subQueryExpr);
+
+          ColumnInfo colInfo = new 
ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
+                  subQueryExpr.getWritableObjectInspector(), tabAlias, false);
+          if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
+            throw new CalciteSemanticException("Cannot add column to RR: " + 
tabAlias + "."
+                    + colAlias + " => " + colInfo + " due to duplication, see 
previous warnings",
+                    UnsupportedFeature.Duplicates_in_RR);
+          }
+          pos = Integer.valueOf(pos.intValue() + 1);
+        } else {
+
+          // 6.4 Build ExprNode corresponding to colums
+          if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+            pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : 
SemanticAnalyzer
+                            .getUnescapedName((ASTNode) 
expr.getChild(0)).toLowerCase(), expr, col_list,
+                    excludedColumns, inputRR, starRR, pos, out_rwsch, 
qb.getAliases(), true);
+            selectStar = true;
+          } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
+                  && !hasAsClause
+                  && !inputRR.getIsExprResolver()
+                  && SemanticAnalyzer.isRegex(
+                  
SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) {
+            // In case the expression is a regex COL.
+            // This can only happen without AS clause
+            // We don't allow this for ExprResolver - the Group By case
+            pos = 
genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
+                    null, expr, col_list, excludedColumns, inputRR, starRR, 
pos, out_rwsch,
+                    qb.getAliases(), true);
+          } else if (expr.getType() == HiveParser.DOT
+                  && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
+                  && 
inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
+                  .getChild(0).getText().toLowerCase()))
+                  && !hasAsClause
+                  && !inputRR.getIsExprResolver()
+                  && SemanticAnalyzer.isRegex(
+                  
SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) {
+            // In case the expression is TABLE.COL (col can be regex).
+            // This can only happen without AS clause
+            // We don't allow this for ExprResolver - the Group By case
+            pos = genColListRegex(
+                    
SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
+                    
SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
+                            .toLowerCase()), expr, col_list, excludedColumns, 
inputRR, starRR, pos,
+                    out_rwsch, qb.getAliases(), true);
+          } else if (ParseUtils.containsTokenOfType(expr, 
HiveParser.TOK_FUNCTIONDI)
+                  && !(srcRel instanceof HiveAggregate)) {
+            // Likely a malformed query eg, select hash(distinct c1) from t1;
+            throw new CalciteSemanticException("Distinct without an 
aggregation.",
+                    UnsupportedFeature.Distinct_without_an_aggreggation);
+          } else {
+            // Case when this is an expression
+            TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
+            // We allow stateful functions in the SELECT list (but nowhere 
else)
+            tcCtx.setAllowStatefulFunctions(true);
+            if (!qbp.getDestToGroupBy().isEmpty()) {
+              // Special handling of grouping function
+              expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, 
selClauseName), expr,
+                      !cubeRollupGrpSetPresent);
+            }
+            ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
+            String recommended = recommendName(exp, colAlias);
+            if (recommended != null && out_rwsch.get(null, recommended) == 
null) {
+              colAlias = recommended;
+            }
+            col_list.add(exp);
 
             ColumnInfo colInfo = new 
ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
-                    subQueryExpr.getWritableObjectInspector(), tabAlias, 
false);
+                    exp.getWritableObjectInspector(), tabAlias, false);
+            colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? 
((ExprNodeColumnDesc) exp)
+                    .isSkewedCol() : false);
             if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
               throw new CalciteSemanticException("Cannot add column to RR: " + 
tabAlias + "."
                       + colAlias + " => " + colInfo + " due to duplication, 
see previous warnings",
                       UnsupportedFeature.Duplicates_in_RR);
             }
-            pos = Integer.valueOf(pos.intValue() + 1);
-          } else {
 
-            // 6.4 Build ExprNode corresponding to colums
-            if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
-              pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : 
SemanticAnalyzer
-                              .getUnescapedName((ASTNode) 
expr.getChild(0)).toLowerCase(), expr, col_list,
-                      excludedColumns, inputRR, starRR, pos, out_rwsch, 
qb.getAliases(), true);
-              selectStar = true;
-            } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
-                    && !hasAsClause
-                    && !inputRR.getIsExprResolver()
-                    && SemanticAnalyzer.isRegex(
-                    
SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) {
-              // In case the expression is a regex COL.
-              // This can only happen without AS clause
-              // We don't allow this for ExprResolver - the Group By case
-              pos = 
genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
-                      null, expr, col_list, excludedColumns, inputRR, starRR, 
pos, out_rwsch,
-                      qb.getAliases(), true);
-            } else if (expr.getType() == HiveParser.DOT
-                    && expr.getChild(0).getType() == 
HiveParser.TOK_TABLE_OR_COL
-                    && 
inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
-                    .getChild(0).getText().toLowerCase()))
-                    && !hasAsClause
-                    && !inputRR.getIsExprResolver()
-                    && SemanticAnalyzer.isRegex(
-                    
SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) {
-              // In case the expression is TABLE.COL (col can be regex).
-              // This can only happen without AS clause
-              // We don't allow this for ExprResolver - the Group By case
-              pos = genColListRegex(
-                      
SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
-                      
SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
-                              .toLowerCase()), expr, col_list, 
excludedColumns, inputRR, starRR, pos,
-                      out_rwsch, qb.getAliases(), true);
-            } else if (ParseUtils.containsTokenOfType(expr, 
HiveParser.TOK_FUNCTIONDI)
-                    && !(srcRel instanceof HiveAggregate)) {
-              // Likely a malformed query eg, select hash(distinct c1) from t1;
-              throw new CalciteSemanticException("Distinct without an 
aggregation.",
-                      UnsupportedFeature.Distinct_without_an_aggreggation);
-            }
-              else {
-              // Case when this is an expression
-              TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
-              // We allow stateful functions in the SELECT list (but nowhere 
else)
-              tcCtx.setAllowStatefulFunctions(true);
-              if (!qbp.getDestToGroupBy().isEmpty()) {
-                // Special handling of grouping function
-                expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, 
selClauseName), expr,
-                        !cubeRollupGrpSetPresent);
-              }
-              ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
-              String recommended = recommendName(exp, colAlias);
-              if (recommended != null && out_rwsch.get(null, recommended) == 
null) {
-                colAlias = recommended;
-              }
-              col_list.add(exp);
-
-              ColumnInfo colInfo = new 
ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
-                      exp.getWritableObjectInspector(), tabAlias, false);
-              colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? 
((ExprNodeColumnDesc) exp)
-                      .isSkewedCol() : false);
-              if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
-                throw new CalciteSemanticException("Cannot add column to RR: " 
+ tabAlias + "."
-                        + colAlias + " => " + colInfo + " due to duplication, 
see previous warnings",
-                        UnsupportedFeature.Duplicates_in_RR);
-              }
-
-              if (exp instanceof ExprNodeColumnDesc) {
-                ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
-                String[] altMapping = 
inputRR.getAlternateMappings(colExp.getColumn());
-                if (altMapping != null) {
-                  // TODO: this can overwrite the mapping. Should this be 
allowed?
-                  out_rwsch.put(altMapping[0], altMapping[1], colInfo);
-                }
-              }
-
-              pos = Integer.valueOf(pos.intValue() + 1);
-            }
+            pos = Integer.valueOf(pos.intValue() + 1);
           }
+        }
       }
       selectStar = selectStar && exprList.getChildCount() == posn + 1;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/queries/clientpositive/groupby_multialias.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_multialias.q 
b/ql/src/test/queries/clientpositive/groupby_multialias.q
new file mode 100644
index 0000000..b0a0171
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_multialias.q
@@ -0,0 +1,7 @@
+create table t1 (a int);
+
+explain
+select t1.a as a1, min(t1.a) as a
+from t1
+group by t1.a;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/complex_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/complex_alias.q.out 
b/ql/src/test/results/clientpositive/complex_alias.q.out
index 5182153..64e1f37 100644
--- a/ql/src/test/results/clientpositive/complex_alias.q.out
+++ b/ql/src/test/results/clientpositive/complex_alias.q.out
@@ -17,7 +17,7 @@ POSTHOOK: Output: default@agg1
 POSTHOOK: Lineage: agg1.col0 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: agg1.col1 SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
 POSTHOOK: Lineage: agg1.col2 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
-Warning: Shuffle Join JOIN[20][tables = [single_use_subq12, 
single_use_subq11]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT single_use_subq11.a1 AS a1,
        single_use_subq11.a2 AS a2
@@ -76,27 +76,24 @@ STAGE PLANS:
             alias: agg1
             Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
-              expressions: col0 (type: int), col2 (type: double)
-              outputColumnNames: col0, col2
+              expressions: col0 (type: int)
+              outputColumnNames: col0
               Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: sum(col2)
                 keys: col0 (type: int)
                 mode: hash
-                outputColumnNames: _col0, _col1
+                outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: double)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: sum(VALUE._col0)
           keys: KEY._col0 (type: int)
           mode: mergepartial
-          outputColumnNames: _col0, _col1
+          outputColumnNames: _col0
           Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
@@ -151,24 +148,20 @@ STAGE PLANS:
               Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: '42' (type: string), col0 (type: int)
-                outputColumnNames: _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
                 Union
                   Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    expressions: _col1 (type: string), _col2 (type: int)
-                    outputColumnNames: _col1, _col2
+                  Group By Operator
+                    keys: _col0 (type: string), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col2 (type: int), _col1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
                       Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: 
string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
-                        Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
           TableScan
             alias: agg1
             Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
@@ -177,32 +170,28 @@ STAGE PLANS:
               Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: '41' (type: string), col0 (type: int)
-                outputColumnNames: _col1, _col2
+                outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE 
Column stats: NONE
                 Union
                   Statistics: Num rows: 2 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    expressions: _col1 (type: string), _col2 (type: int)
-                    outputColumnNames: _col1, _col2
+                  Group By Operator
+                    keys: _col0 (type: string), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col2 (type: int), _col1 (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
                       Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: 
string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
-                        Statistics: Num rows: 2 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: int), KEY._col1 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
-            expressions: _col1 (type: string), _col1 (type: string)
+            expressions: _col0 (type: string), _col0 (type: string)
             outputColumnNames: _col1, _col2
             Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column 
stats: NONE
             File Output Operator
@@ -218,7 +207,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[20][tables = [single_use_subq12, 
single_use_subq11]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: SELECT single_use_subq11.a1 AS a1,
        single_use_subq11.a2 AS a2
 FROM   (SELECT Sum(agg1.col2) AS a1

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/groupby_multialias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_multialias.q.out 
b/ql/src/test/results/clientpositive/groupby_multialias.q.out
new file mode 100644
index 0000000..cee790a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_multialias.q.out
@@ -0,0 +1,66 @@
+PREHOOK: query: create table t1 (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain
+select t1.a as a1, min(t1.a) as a
+from t1
+group by t1.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t1.a as a1, min(t1.a) as a
+from t1
+group by t1.a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            Select Operator
+              expressions: a (type: int)
+              outputColumnNames: a
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+              Group By Operator
+                aggregations: min(a)
+                keys: a (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  value expressions: _col1 (type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/order3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/order3.q.out 
b/ql/src/test/results/clientpositive/order3.q.out
index d3db1b9..85ee858 100644
--- a/ql/src/test/results/clientpositive/order3.q.out
+++ b/ql/src/test/results/clientpositive/order3.q.out
@@ -58,19 +58,23 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0
           Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Select Operator
+            expressions: _col0 (type: int)
+            outputColumnNames: _col1
+            Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
+              key expressions: _col1 (type: int)
               sort order: +
               Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
               TopN Hash Memory Usage: 0.1
@@ -92,7 +96,7 @@ STAGE PLANS:
 
   Stage: Stage-0
     Fetch Operator
-      limit: 3
+      limit: -1
       Processor Tree:
         ListSink
 
@@ -147,19 +151,23 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Select Operator
+            expressions: _col1 (type: int), _col0 (type: int)
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
+              key expressions: _col2 (type: int)
               sort order: +
               Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
               TopN Hash Memory Usage: 0.1
@@ -182,7 +190,7 @@ STAGE PLANS:
 
   Stage: Stage-0
     Fetch Operator
-      limit: 3
+      limit: -1
       Processor Tree:
         ListSink
 
@@ -237,19 +245,23 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: int)
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: int)
+              key expressions: _col2 (type: int)
               sort order: +
               Statistics: Num rows: 7 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
               TopN Hash Memory Usage: 0.1

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/spark/query19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
index 6a70ddc..281445c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
@@ -241,11 +241,11 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 421657640 Data size: 37198759433 Basic 
stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col1 (type: int), _col0 (type: string), _col2 
(type: int), _col3 (type: string), _col4 (type: decimal(17,2))
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  expressions: _col2 (type: int), _col3 (type: string), _col4 
(type: decimal(17,2)), _col0 (type: string), _col1 (type: int)
+                  outputColumnNames: _col2, _col3, _col4, _col5, _col6
                   Statistics: Num rows: 421657640 Data size: 37198759433 Basic 
stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col4 (type: decimal(17,2)), _col1 (type: 
string), _col0 (type: int), _col2 (type: int), _col3 (type: string)
+                    key expressions: _col4 (type: decimal(17,2)), _col5 (type: 
string), _col6 (type: int), _col2 (type: int), _col3 (type: string)
                     sort order: -++++
                     Statistics: Num rows: 421657640 Data size: 37198759433 
Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
@@ -300,7 +300,7 @@ STAGE PLANS:
 
   Stage: Stage-0
     Fetch Operator
-      limit: 100
+      limit: -1
       Processor Tree:
         ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/spark/query55.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query55.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
index c611918..44eb24e 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query55.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
@@ -137,12 +137,16 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col2 (type: decimal(17,2)), _col0 (type: 
int)
-                  sort order: -+
+                Select Operator
+                  expressions: _col1 (type: string), _col2 (type: 
decimal(17,2)), _col0 (type: int)
+                  outputColumnNames: _col1, _col2, _col3
                   Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
-                  TopN Hash Memory Usage: 0.1
-                  value expressions: _col1 (type: string)
+                  Reduce Output Operator
+                    key expressions: _col2 (type: decimal(17,2)), _col3 (type: 
int)
+                    sort order: -+
+                    Statistics: Num rows: 348477374 Data size: 30742775095 
Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col1 (type: string)
         Reducer 5 
             Reduce Operator Tree:
               Select Operator
@@ -162,7 +166,7 @@ STAGE PLANS:
 
   Stage: Stage-0
     Fetch Operator
-      limit: 100
+      limit: -1
       Processor Tree:
         ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/spark/query71.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
index 73af356..92b7915 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query71.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
@@ -296,11 +296,11 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 670816149 Data size: 72801917486 Basic 
stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col3 (type: string), _col1 
(type: int), _col2 (type: int), _col4 (type: decimal(17,2))
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  expressions: _col3 (type: string), _col1 (type: int), _col2 
(type: int), _col4 (type: decimal(17,2)), _col0 (type: int)
+                  outputColumnNames: _col1, _col2, _col3, _col4, _col5
                   Statistics: Num rows: 670816149 Data size: 72801917486 Basic 
stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
-                    key expressions: _col4 (type: decimal(17,2)), _col0 (type: 
int)
+                    key expressions: _col4 (type: decimal(17,2)), _col5 (type: 
int)
                     sort order: -+
                     Statistics: Num rows: 670816149 Data size: 72801917486 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/tez/query19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query19.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query19.q.out
index 363425f..73bb6d9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query19.q.out
@@ -59,10 +59,10 @@ Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
-    limit:100
+    limit:-1
     Stage-1
       Reducer 6
-      File Output Operator [FS_44]
+      File Output Operator [FS_45]
         Limit [LIM_43] (rows=100 width=88)
           Number of rows:100
           Select Operator [SEL_42] (rows=421657640 width=88)
@@ -70,7 +70,7 @@ Stage-0
           <-Reducer 5 [SIMPLE_EDGE]
             SHUFFLE [RS_41]
               Select Operator [SEL_39] (rows=421657640 width=88)
-                Output:["_col0","_col1","_col2","_col3","_col4"]
+                Output:["_col2","_col3","_col4","_col5","_col6"]
                 Group By Operator [GBY_38] (rows=421657640 width=88)
                   
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3
                 <-Reducer 4 [SIMPLE_EDGE]
@@ -82,47 +82,47 @@ Stage-0
                         Output:["_col8","_col13","_col14","_col15","_col16"]
                         Filter Operator [FIL_34] (rows=843315281 width=88)
                           predicate:(substr(_col3, 1, 5) <> substr(_col19, 1, 
5))
-                          Merge Join Operator [MERGEJOIN_73] (rows=843315281 
width=88)
+                          Merge Join Operator [MERGEJOIN_74] (rows=843315281 
width=88)
                             
Conds:RS_31._col7=RS_32._col0(Inner),Output:["_col3","_col8","_col13","_col14","_col15","_col16","_col19"]
                           <-Map 13 [SIMPLE_EDGE]
                             SHUFFLE [RS_32]
                               PartitionCols:_col0
                               Select Operator [SEL_24] (rows=1704 width=1910)
                                 Output:["_col0","_col1"]
-                                Filter Operator [FIL_68] (rows=1704 width=1910)
+                                Filter Operator [FIL_69] (rows=1704 width=1910)
                                   predicate:s_store_sk is not null
                                   TableScan [TS_22] (rows=1704 width=1910)
                                     
default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_zip"]
                           <-Reducer 3 [SIMPLE_EDGE]
                             SHUFFLE [RS_31]
                               PartitionCols:_col7
-                              Merge Join Operator [MERGEJOIN_72] 
(rows=766650239 width=88)
+                              Merge Join Operator [MERGEJOIN_73] 
(rows=766650239 width=88)
                                 
Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col13","_col14","_col15","_col16"]
                               <-Reducer 10 [SIMPLE_EDGE]
                                 SHUFFLE [RS_29]
                                   PartitionCols:_col2
-                                  Merge Join Operator [MERGEJOIN_71] 
(rows=696954748 width=88)
+                                  Merge Join Operator [MERGEJOIN_72] 
(rows=696954748 width=88)
                                     
Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col2","_col3","_col4","_col9","_col10","_col11","_col12"]
                                   <-Map 12 [SIMPLE_EDGE]
                                     SHUFFLE [RS_19]
                                       PartitionCols:_col0
                                       Select Operator [SEL_14] (rows=231000 
width=1436)
                                         
Output:["_col0","_col1","_col2","_col3","_col4"]
-                                        Filter Operator [FIL_67] (rows=231000 
width=1436)
+                                        Filter Operator [FIL_68] (rows=231000 
width=1436)
                                           predicate:((i_manager_id = 7) and 
i_item_sk is not null)
                                           TableScan [TS_12] (rows=462000 
width=1436)
                                             
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"]
                                   <-Reducer 9 [SIMPLE_EDGE]
                                     SHUFFLE [RS_18]
                                       PartitionCols:_col1
-                                      Merge Join Operator [MERGEJOIN_70] 
(rows=633595212 width=88)
+                                      Merge Join Operator [MERGEJOIN_71] 
(rows=633595212 width=88)
                                         
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
                                       <-Map 11 [SIMPLE_EDGE]
                                         SHUFFLE [RS_16]
                                           PartitionCols:_col0
                                           Select Operator [SEL_11] (rows=18262 
width=1119)
                                             Output:["_col0"]
-                                            Filter Operator [FIL_66] 
(rows=18262 width=1119)
+                                            Filter Operator [FIL_67] 
(rows=18262 width=1119)
                                               predicate:((d_moy = 11) and 
(d_year = 1999) and d_date_sk is not null)
                                               TableScan [TS_9] (rows=73049 
width=1119)
                                                 
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
@@ -131,21 +131,21 @@ Stage-0
                                           PartitionCols:_col0
                                           Select Operator [SEL_8] 
(rows=575995635 width=88)
                                             
Output:["_col0","_col1","_col2","_col3","_col4"]
-                                            Filter Operator [FIL_65] 
(rows=575995635 width=88)
+                                            Filter Operator [FIL_66] 
(rows=575995635 width=88)
                                               predicate:(ss_customer_sk is not 
null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk 
is not null)
                                               TableScan [TS_6] (rows=575995635 
width=88)
                                                 
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"]
                               <-Reducer 2 [SIMPLE_EDGE]
                                 SHUFFLE [RS_28]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_69] 
(rows=88000001 width=860)
+                                  Merge Join Operator [MERGEJOIN_70] 
(rows=88000001 width=860)
                                     
Conds:RS_25._col1=RS_26._col0(Inner),Output:["_col0","_col3"]
                                   <-Map 1 [SIMPLE_EDGE]
                                     SHUFFLE [RS_25]
                                       PartitionCols:_col1
                                       Select Operator [SEL_2] (rows=80000000 
width=860)
                                         Output:["_col0","_col1"]
-                                        Filter Operator [FIL_63] 
(rows=80000000 width=860)
+                                        Filter Operator [FIL_64] 
(rows=80000000 width=860)
                                           predicate:(c_current_addr_sk is not 
null and c_customer_sk is not null)
                                           TableScan [TS_0] (rows=80000000 
width=860)
                                             
default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
@@ -154,7 +154,7 @@ Stage-0
                                       PartitionCols:_col0
                                       Select Operator [SEL_5] (rows=40000000 
width=1014)
                                         Output:["_col0","_col1"]
-                                        Filter Operator [FIL_64] 
(rows=40000000 width=1014)
+                                        Filter Operator [FIL_65] 
(rows=40000000 width=1014)
                                           predicate:ca_address_sk is not null
                                           TableScan [TS_3] (rows=40000000 
width=1014)
                                             
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"]

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/tez/query55.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query55.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query55.q.out
index 27ea1c3..5a5e33a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query55.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query55.q.out
@@ -34,55 +34,57 @@ Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
-    limit:100
+    limit:-1
     Stage-1
       Reducer 5
-      File Output Operator [FS_24]
+      File Output Operator [FS_25]
         Limit [LIM_23] (rows=100 width=88)
           Number of rows:100
           Select Operator [SEL_22] (rows=348477374 width=88)
             Output:["_col0","_col1","_col2"]
           <-Reducer 4 [SIMPLE_EDGE]
             SHUFFLE [RS_21]
-              Group By Operator [GBY_18] (rows=348477374 width=88)
-                
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
-              <-Reducer 3 [SIMPLE_EDGE]
-                SHUFFLE [RS_17]
-                  PartitionCols:_col0, _col1
-                  Group By Operator [GBY_16] (rows=696954748 width=88)
-                    
Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8
-                    Merge Join Operator [MERGEJOIN_34] (rows=696954748 
width=88)
-                      
Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col7","_col8"]
-                    <-Map 7 [SIMPLE_EDGE]
-                      SHUFFLE [RS_13]
-                        PartitionCols:_col0
-                        Select Operator [SEL_8] (rows=231000 width=1436)
-                          Output:["_col0","_col1","_col2"]
-                          Filter Operator [FIL_32] (rows=231000 width=1436)
-                            predicate:((i_manager_id = 36) and i_item_sk is 
not null)
-                            TableScan [TS_6] (rows=462000 width=1436)
-                              
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"]
-                    <-Reducer 2 [SIMPLE_EDGE]
-                      SHUFFLE [RS_12]
-                        PartitionCols:_col1
-                        Merge Join Operator [MERGEJOIN_33] (rows=633595212 
width=88)
-                          
Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"]
-                        <-Map 1 [SIMPLE_EDGE]
-                          SHUFFLE [RS_9]
-                            PartitionCols:_col0
-                            Select Operator [SEL_2] (rows=575995635 width=88)
-                              Output:["_col0","_col1","_col2"]
-                              Filter Operator [FIL_30] (rows=575995635 
width=88)
-                                predicate:(ss_item_sk is not null and 
ss_sold_date_sk is not null)
-                                TableScan [TS_0] (rows=575995635 width=88)
-                                  
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"]
-                        <-Map 6 [SIMPLE_EDGE]
-                          SHUFFLE [RS_10]
-                            PartitionCols:_col0
-                            Select Operator [SEL_5] (rows=18262 width=1119)
-                              Output:["_col0"]
-                              Filter Operator [FIL_31] (rows=18262 width=1119)
-                                predicate:((d_moy = 12) and (d_year = 2001) 
and d_date_sk is not null)
-                                TableScan [TS_3] (rows=73049 width=1119)
-                                  
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+              Select Operator [SEL_19] (rows=348477374 width=88)
+                Output:["_col1","_col2","_col3"]
+                Group By Operator [GBY_18] (rows=348477374 width=88)
+                  
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
+                <-Reducer 3 [SIMPLE_EDGE]
+                  SHUFFLE [RS_17]
+                    PartitionCols:_col0, _col1
+                    Group By Operator [GBY_16] (rows=696954748 width=88)
+                      
Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col7, _col8
+                      Merge Join Operator [MERGEJOIN_35] (rows=696954748 
width=88)
+                        
Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col2","_col7","_col8"]
+                      <-Map 7 [SIMPLE_EDGE]
+                        SHUFFLE [RS_13]
+                          PartitionCols:_col0
+                          Select Operator [SEL_8] (rows=231000 width=1436)
+                            Output:["_col0","_col1","_col2"]
+                            Filter Operator [FIL_33] (rows=231000 width=1436)
+                              predicate:((i_manager_id = 36) and i_item_sk is 
not null)
+                              TableScan [TS_6] (rows=462000 width=1436)
+                                
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"]
+                      <-Reducer 2 [SIMPLE_EDGE]
+                        SHUFFLE [RS_12]
+                          PartitionCols:_col1
+                          Merge Join Operator [MERGEJOIN_34] (rows=633595212 
width=88)
+                            
Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1","_col2"]
+                          <-Map 1 [SIMPLE_EDGE]
+                            SHUFFLE [RS_9]
+                              PartitionCols:_col0
+                              Select Operator [SEL_2] (rows=575995635 width=88)
+                                Output:["_col0","_col1","_col2"]
+                                Filter Operator [FIL_31] (rows=575995635 
width=88)
+                                  predicate:(ss_item_sk is not null and 
ss_sold_date_sk is not null)
+                                  TableScan [TS_0] (rows=575995635 width=88)
+                                    
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"]
+                          <-Map 6 [SIMPLE_EDGE]
+                            SHUFFLE [RS_10]
+                              PartitionCols:_col0
+                              Select Operator [SEL_5] (rows=18262 width=1119)
+                                Output:["_col0"]
+                                Filter Operator [FIL_32] (rows=18262 
width=1119)
+                                  predicate:((d_moy = 12) and (d_year = 2001) 
and d_date_sk is not null)
+                                  TableScan [TS_3] (rows=73049 width=1119)
+                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7ea263cb/ql/src/test/results/clientpositive/perf/tez/query71.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query71.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query71.q.out
index bd48e56..6635b2e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query71.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query71.q.out
@@ -90,13 +90,13 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 7
-      File Output Operator [FS_52]
+      File Output Operator [FS_53]
         Select Operator [SEL_51] (rows=670816149 width=108)
           Output:["_col0","_col1","_col2","_col3","_col4"]
         <-Reducer 6 [SIMPLE_EDGE]
           SHUFFLE [RS_50]
             Select Operator [SEL_48] (rows=670816149 width=108)
-              Output:["_col0","_col1","_col2","_col3","_col4"]
+              Output:["_col1","_col2","_col3","_col4","_col5"]
               Group By Operator [GBY_47] (rows=670816149 width=108)
                 
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3
               <-Reducer 5 [SIMPLE_EDGE]
@@ -104,28 +104,28 @@ Stage-0
                   PartitionCols:_col0, _col1, _col2, _col3
                   Group By Operator [GBY_45] (rows=1341632299 width=108)
                     
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4,
 _col8, _col9, _col5
-                    Merge Join Operator [MERGEJOIN_86] (rows=1341632299 
width=108)
+                    Merge Join Operator [MERGEJOIN_87] (rows=1341632299 
width=108)
                       
Conds:RS_41._col2=RS_42._col0(Inner),Output:["_col0","_col4","_col5","_col8","_col9"]
                     <-Map 16 [SIMPLE_EDGE]
                       SHUFFLE [RS_42]
                         PartitionCols:_col0
                         Select Operator [SEL_37] (rows=86400 width=471)
                           Output:["_col0","_col1","_col2"]
-                          Filter Operator [FIL_81] (rows=86400 width=471)
+                          Filter Operator [FIL_82] (rows=86400 width=471)
                             predicate:(((t_meal_time = 'breakfast') or 
(t_meal_time = 'dinner')) and t_time_sk is not null)
                             TableScan [TS_35] (rows=86400 width=471)
                               
default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"]
                     <-Reducer 4 [SIMPLE_EDGE]
                       SHUFFLE [RS_41]
                         PartitionCols:_col2
-                        Merge Join Operator [MERGEJOIN_85] (rows=1219665700 
width=108)
+                        Merge Join Operator [MERGEJOIN_86] (rows=1219665700 
width=108)
                           Conds:Union 
3._col1=RS_39._col0(Inner),Output:["_col0","_col2","_col4","_col5"]
                         <-Map 15 [SIMPLE_EDGE]
                           SHUFFLE [RS_39]
                             PartitionCols:_col0
                             Select Operator [SEL_34] (rows=231000 width=1436)
                               Output:["_col0","_col1","_col2"]
-                              Filter Operator [FIL_80] (rows=231000 width=1436)
+                              Filter Operator [FIL_81] (rows=231000 width=1436)
                                 predicate:((i_manager_id = 1) and i_item_sk is 
not null)
                                 TableScan [TS_32] (rows=462000 width=1436)
                                   
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"]
@@ -135,14 +135,14 @@ Stage-0
                               PartitionCols:_col1
                               Select Operator [SEL_19] (rows=316788826 
width=135)
                                 Output:["_col0","_col1","_col2"]
-                                Merge Join Operator [MERGEJOIN_83] 
(rows=316788826 width=135)
+                                Merge Join Operator [MERGEJOIN_84] 
(rows=316788826 width=135)
                                   
Conds:RS_16._col0=RS_17._col0(Inner),Output:["_col1","_col2","_col3"]
                                 <-Map 11 [SIMPLE_EDGE]
                                   SHUFFLE [RS_17]
                                     PartitionCols:_col0
                                     Select Operator [SEL_15] (rows=18262 
width=1119)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_77] (rows=18262 
width=1119)
+                                      Filter Operator [FIL_78] (rows=18262 
width=1119)
                                         predicate:((d_moy = 12) and (d_year = 
2001) and d_date_sk is not null)
                                         TableScan [TS_13] (rows=73049 
width=1119)
                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
@@ -151,7 +151,7 @@ Stage-0
                                     PartitionCols:_col0
                                     Select Operator [SEL_12] (rows=287989836 
width=135)
                                       Output:["_col0","_col1","_col2","_col3"]
-                                      Filter Operator [FIL_76] (rows=287989836 
width=135)
+                                      Filter Operator [FIL_77] (rows=287989836 
width=135)
                                         predicate:(cs_item_sk is not null and 
cs_sold_date_sk is not null and cs_sold_time_sk is not null)
                                         TableScan [TS_10] (rows=287989836 
width=135)
                                           
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"]
@@ -160,14 +160,14 @@ Stage-0
                               PartitionCols:_col1
                               Select Operator [SEL_30] (rows=633595212 
width=88)
                                 Output:["_col0","_col1","_col2"]
-                                Merge Join Operator [MERGEJOIN_84] 
(rows=633595212 width=88)
+                                Merge Join Operator [MERGEJOIN_85] 
(rows=633595212 width=88)
                                   
Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col3"]
                                 <-Map 12 [SIMPLE_EDGE]
                                   SHUFFLE [RS_27]
                                     PartitionCols:_col0
                                     Select Operator [SEL_23] (rows=575995635 
width=88)
                                       Output:["_col0","_col1","_col2","_col3"]
-                                      Filter Operator [FIL_78] (rows=575995635 
width=88)
+                                      Filter Operator [FIL_79] (rows=575995635 
width=88)
                                         predicate:(ss_item_sk is not null and 
ss_sold_date_sk is not null and ss_sold_time_sk is not null)
                                         TableScan [TS_21] (rows=575995635 
width=88)
                                           
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"]
@@ -176,7 +176,7 @@ Stage-0
                                     PartitionCols:_col0
                                     Select Operator [SEL_26] (rows=18262 
width=1119)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_79] (rows=18262 
width=1119)
+                                      Filter Operator [FIL_80] (rows=18262 
width=1119)
                                         predicate:((d_moy = 12) and (d_year = 
2001) and d_date_sk is not null)
                                         TableScan [TS_24] (rows=73049 
width=1119)
                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
@@ -185,14 +185,14 @@ Stage-0
                               PartitionCols:_col1
                               Select Operator [SEL_9] (rows=158402938 
width=135)
                                 Output:["_col0","_col1","_col2"]
-                                Merge Join Operator [MERGEJOIN_82] 
(rows=158402938 width=135)
+                                Merge Join Operator [MERGEJOIN_83] 
(rows=158402938 width=135)
                                   
Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2","_col3"]
                                 <-Map 1 [SIMPLE_EDGE]
                                   SHUFFLE [RS_6]
                                     PartitionCols:_col0
                                     Select Operator [SEL_2] (rows=144002668 
width=135)
                                       Output:["_col0","_col1","_col2","_col3"]
-                                      Filter Operator [FIL_74] (rows=144002668 
width=135)
+                                      Filter Operator [FIL_75] (rows=144002668 
width=135)
                                         predicate:(ws_item_sk is not null and 
ws_sold_date_sk is not null and ws_sold_time_sk is not null)
                                         TableScan [TS_0] (rows=144002668 
width=135)
                                           
default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"]
@@ -201,7 +201,7 @@ Stage-0
                                     PartitionCols:_col0
                                     Select Operator [SEL_5] (rows=18262 
width=1119)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_75] (rows=18262 
width=1119)
+                                      Filter Operator [FIL_76] (rows=18262 
width=1119)
                                         predicate:((d_moy = 12) and (d_year = 
2001) and d_date_sk is not null)
                                         TableScan [TS_3] (rows=73049 
width=1119)
                                           
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]

hive git commit: HIVE-18250: CBO gets turned off with duplicates in RR error (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to