HIVE-16774: Support position in ORDER BY when using SELECT * (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d7ab32f2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d7ab32f2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d7ab32f2

Branch: refs/heads/hive-14535
Commit: d7ab32f2e657c5f11adf8d0d3143fc1b00223d88
Parents: 9e6c23d
Author: Pengcheng Xiong <[email protected]>
Authored: Wed May 31 17:44:49 2017 -0700
Committer: Pengcheng Xiong <[email protected]>
Committed: Wed May 31 17:44:49 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  85 +++--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  36 +-
 .../orderby_position_unsupported.q              |   3 +-
 .../test/queries/clientpositive/order_by_pos.q  |  20 ++
 .../test/queries/clientpositive/perf/query47.q  |  53 +++
 .../test/queries/clientpositive/perf/query57.q  |  50 +++
 .../orderby_position_unsupported.q.out          |   4 +-
 .../results/clientpositive/order_by_pos.q.out   | 129 +++++++
 .../results/clientpositive/perf/query47.q.out   | 357 +++++++++++++++++++
 .../results/clientpositive/perf/query57.q.out   | 351 ++++++++++++++++++
 10 files changed, 1020 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 721dac8..7f583ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -3104,38 +3104,63 @@ public class CalcitePlanner extends SemanticAnalyzer {
           ASTNode ref = (ASTNode) nullObASTExpr.getChild(0);
           Map<ASTNode, ExprNodeDesc> astToExprNDescMap = null;
           ExprNodeDesc obExprNDesc = null;
-          // first try to get it from select
-          // in case of udtf, selectOutputRR may be null.
-          if (selectOutputRR != null) {
-            try {
-              astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR);
-              obExprNDesc = astToExprNDescMap.get(ref);
-            } catch (SemanticException ex) {
-              // we can tolerate this as this is the previous behavior
-              LOG.debug("Can not find column in " + ref.getText() + ". The 
error msg is "
-                  + ex.getMessage());
+          
+          boolean isBothByPos = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS);
+          boolean isObyByPos = isBothByPos
+              || HiveConf.getBoolVar(conf, 
ConfVars.HIVE_ORDERBY_POSITION_ALIAS);
+          // replace each of the position alias in ORDERBY with the actual 
column
+          if (ref != null && ref.getToken().getType() == HiveParser.Number) {
+            if (isObyByPos) {
+              int pos = Integer.parseInt(ref.getText());
+              if (pos > 0 && pos <= selectOutputRR.getColumnInfos().size()) {
+                // fieldIndex becomes so simple
+                // Note that pos starts from 1 while fieldIndex starts from 0;
+                fieldIndex = pos - 1;
+              } else {
+                throw new SemanticException(
+                    
ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg("Position alias: " + pos
+                        + " does not exist\n" + "The Select List is indexed 
from 1 to "
+                        + selectOutputRR.getColumnInfos().size()));
+              }
+            } else { // if not using position alias and it is a number.
+              LOG.warn("Using constant number "
+                  + ref.getText()
+                  + " in order by. If you try to use position alias when 
hive.orderby.position.alias is false, the position alias will be ignored.");
             }
-          }
-          // then try to get it from all
-          if (obExprNDesc == null) {
-            astToExprNDescMap = genAllExprNodeDesc(ref, inputRR);
-            obExprNDesc = astToExprNDescMap.get(ref);
-          }
-          if (obExprNDesc == null) {
-            throw new SemanticException("Invalid order by expression: " + 
obASTExpr.toString());
-          }
-          // 2.2 Convert ExprNode to RexNode
-          rnd = converter.convert(obExprNDesc);
-
-          // 2.3 Determine the index of ob expr in child schema
-          // NOTE: Calcite can not take compound exprs in OB without it being
-          // present in the child (& hence we add a child Project Rel)
-          if (rnd instanceof RexInputRef) {
-            fieldIndex = ((RexInputRef) rnd).getIndex();
           } else {
-            fieldIndex = srcRelRecordSz + newVCLst.size();
-            newVCLst.add(rnd);
-            vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>(ref, 
obExprNDesc.getTypeInfo()));
+            // first try to get it from select
+            // in case of udtf, selectOutputRR may be null.
+            if (selectOutputRR != null) {
+              try {
+                astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR);
+                obExprNDesc = astToExprNDescMap.get(ref);
+              } catch (SemanticException ex) {
+                // we can tolerate this as this is the previous behavior
+                LOG.debug("Can not find column in " + ref.getText() + ". The 
error msg is "
+                    + ex.getMessage());
+              }
+            }
+            // then try to get it from all
+            if (obExprNDesc == null) {
+              astToExprNDescMap = genAllExprNodeDesc(ref, inputRR);
+              obExprNDesc = astToExprNDescMap.get(ref);
+            }
+            if (obExprNDesc == null) {
+              throw new SemanticException("Invalid order by expression: " + 
obASTExpr.toString());
+            }
+            // 2.2 Convert ExprNode to RexNode
+            rnd = converter.convert(obExprNDesc);
+
+            // 2.3 Determine the index of ob expr in child schema
+            // NOTE: Calcite can not take compound exprs in OB without it being
+            // present in the child (& hence we add a child Project Rel)
+            if (rnd instanceof RexInputRef) {
+              fieldIndex = ((RexInputRef) rnd).getIndex();
+            } else {
+              fieldIndex = srcRelRecordSz + newVCLst.size();
+              newVCLst.add(rnd);
+              vcASTTypePairs.add(new Pair<ASTNode, TypeInfo>(ref, 
obExprNDesc.getTypeInfo()));
+            }
           }
 
           // 2.4 Determine the Direction of order by

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 7f5051c..d514644 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12563,41 +12563,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
           }
         }
 
-        // replace each of the position alias in ORDERBY with the actual 
column name
-        if (orderbyNode != null) {
-          isAllCol = false;
-          for (int child_pos = 0; child_pos < selectNode.getChildCount(); 
++child_pos) {
-            ASTNode node = (ASTNode) 
selectNode.getChild(child_pos).getChild(0);
-            if (node != null && node.getToken().getType() == 
HiveParser.TOK_ALLCOLREF) {
-              isAllCol = true;
-            }
-          }
-          for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); 
++child_pos) {
-            ASTNode colNode = (ASTNode) 
orderbyNode.getChild(child_pos).getChild(0);
-            ASTNode node = (ASTNode) colNode.getChild(0);
-            if (node != null && node.getToken().getType() == 
HiveParser.Number) {
-              if (isObyByPos) {
-                if (!isAllCol) {
-                  int pos = Integer.parseInt(node.getText());
-                  if (pos > 0 && pos <= selectExpCnt) {
-                    colNode.setChild(0, selectNode.getChild(pos - 
1).getChild(0));
-                  } else {
-                    throw new SemanticException(
-                      ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg(
-                      "Position alias: " + pos + " does not exist\n" +
-                      "The Select List is indexed from 1 to " + selectExpCnt));
-                  }
-                } else {
-                  throw new SemanticException(
-                    ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg());
-                }
-              } else { //if not using position alias and it is a number.
-                warn("Using constant number " + node.getText() +
-                  " in order by. If you try to use position alias when 
hive.orderby.position.alias is false, the position alias will be ignored.");
-              }
-            }
-          }
-        }
+        // orderby position will be processed in genPlan
       }
 
       for (int i = next.getChildren().size() - 1; i >= 0; i--) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/orderby_position_unsupported.q 
b/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
index a490c23..01e897d 100644
--- a/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
+++ b/ql/src/test/queries/clientnegative/orderby_position_unsupported.q
@@ -1,4 +1,3 @@
 set hive.groupby.orderby.position.alias=true;
 
--- position alias is not supported when SELECT *
-SELECT src.* FROM src ORDER BY 1;
+SELECT src.* FROM src ORDER BY 3;

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/queries/clientpositive/order_by_pos.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/order_by_pos.q 
b/ql/src/test/queries/clientpositive/order_by_pos.q
new file mode 100644
index 0000000..744abe5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/order_by_pos.q
@@ -0,0 +1,20 @@
+set hive.fetch.task.conversion=none;
+
+create table t(a int, b int);
+
+insert into t values 
(1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9);
+
+select * from t order by 2;
+
+select * from t order by 1;
+
+select * from t union select * from t order by 1, 2;
+
+select * from t union select * from t order by 2;
+
+select * from t union select * from t order by 1;
+
+select * from (select a, count(a) from t group by a)subq order by 2, 1;
+
+select * from (select a,b, count(*) from t group by a, b)subq order by 3, 2 
desc;
+ 

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/queries/clientpositive/perf/query47.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query47.q 
b/ql/src/test/queries/clientpositive/perf/query47.q
new file mode 100644
index 0000000..5c26ba5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query47.q
@@ -0,0 +1,53 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query47.tpl and seed 2031708268
+explain
+with v1 as(
+ select i_category, i_brand,
+        s_store_name, s_company_name,
+        d_year, d_moy,
+        sum(ss_sales_price) sum_sales,
+        avg(sum(ss_sales_price)) over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name
+           order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+       ss_sold_date_sk = d_date_sk and
+       ss_store_sk = s_store_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          s_store_name, s_company_name,
+          d_year, d_moy),
+ v2 as(
+ select v1.i_category
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1.s_store_name = v1_lag.s_store_name and
+       v1.s_store_name = v1_lead.s_store_name and
+       v1.s_company_name = v1_lag.s_company_name and
+       v1.s_company_name = v1_lead.s_company_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and    
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100;
+
+-- end query 1 in stream 0 using template query47.tpl

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/queries/clientpositive/perf/query57.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query57.q 
b/ql/src/test/queries/clientpositive/perf/query57.q
new file mode 100644
index 0000000..4dc6e63
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query57.q
@@ -0,0 +1,50 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query57.tpl and seed 2031708268
+explain
+with v1 as(
+ select i_category, i_brand,
+        cc_name,
+        d_year, d_moy,
+        sum(cs_sales_price) sum_sales,
+        avg(sum(cs_sales_price)) over
+          (partition by i_category, i_brand,
+                     cc_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     cc_name
+           order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+       cs_sold_date_sk = d_date_sk and
+       cc_call_center_sk= cs_call_center_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1. cc_name = v1_lag. cc_name and
+       v1. cc_name = v1_lead. cc_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100;
+
+-- end query 1 in stream 0 using template query57.tpl

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out 
b/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
index 56d3240..a275224 100644
--- a/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
+++ b/ql/src/test/results/clientnegative/orderby_position_unsupported.q.out
@@ -1 +1,3 @@
-FAILED: SemanticException [Error 10219]: Position in ORDER BY is not supported 
when using SELECT *
+FAILED: SemanticException [Error 10221]: Invalid position alias in Order By
+ Position alias: 3 does not exist
+The Select List is indexed from 1 to 2

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/results/clientpositive/order_by_pos.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/order_by_pos.q.out 
b/ql/src/test/results/clientpositive/order_by_pos.q.out
new file mode 100644
index 0000000..b7ea716
--- /dev/null
+++ b/ql/src/test/results/clientpositive/order_by_pos.q.out
@@ -0,0 +1,129 @@
+PREHOOK: query: create table t(a int, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t(a int, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values 
(1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values 
(1,2),(1,2),(1,3),(2,4),(20,-100),(-1000,100),(4,5),(3,7),(8,9)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.b EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select * from t order by 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t order by 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+20     -100
+1      2
+1      2
+1      3
+2      4
+4      5
+3      7
+8      9
+-1000  100
+PREHOOK: query: select * from t order by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t order by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+-1000  100
+1      3
+1      2
+1      2
+2      4
+3      7
+4      5
+8      9
+20     -100
+PREHOOK: query: select * from t union select * from t order by 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t union select * from t order by 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+-1000  100
+1      2
+1      3
+2      4
+3      7
+4      5
+8      9
+20     -100
+PREHOOK: query: select * from t union select * from t order by 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t union select * from t order by 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+20     -100
+1      2
+1      3
+2      4
+4      5
+3      7
+8      9
+-1000  100
+PREHOOK: query: select * from t union select * from t order by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t union select * from t order by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+-1000  100
+1      3
+1      2
+2      4
+3      7
+4      5
+8      9
+20     -100
+PREHOOK: query: select * from (select a, count(a) from t group by a)subq order 
by 2, 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (select a, count(a) from t group by a)subq 
order by 2, 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+-1000  1
+2      1
+3      1
+4      1
+8      1
+20     1
+1      3
+PREHOOK: query: select * from (select a,b, count(*) from t group by a, b)subq 
order by 3, 2 desc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (select a,b, count(*) from t group by a, b)subq 
order by 3, 2 desc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+-1000  100     1
+8      9       1
+3      7       1
+4      5       1
+2      4       1
+1      3       1
+20     -100    1
+1      2       2

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/results/clientpositive/perf/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query47.q.out 
b/ql/src/test/results/clientpositive/perf/query47.q.out
new file mode 100644
index 0000000..d5e1922
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query47.q.out
@@ -0,0 +1,357 @@
+PREHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+        s_store_name, s_company_name,
+        d_year, d_moy,
+        sum(ss_sales_price) sum_sales,
+        avg(sum(ss_sales_price)) over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name
+           order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+       ss_sold_date_sk = d_date_sk and
+       ss_store_sk = s_store_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          s_store_name, s_company_name,
+          d_year, d_moy),
+ v2 as(
+ select v1.i_category
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1.s_store_name = v1_lag.s_store_name and
+       v1.s_store_name = v1_lead.s_store_name and
+       v1.s_company_name = v1_lag.s_company_name and
+       v1.s_company_name = v1_lead.s_company_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and    
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+        s_store_name, s_company_name,
+        d_year, d_moy,
+        sum(ss_sales_price) sum_sales,
+        avg(sum(ss_sales_price)) over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name
+           order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+       ss_sold_date_sk = d_date_sk and
+       ss_store_sk = s_store_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          s_store_name, s_company_name,
+          d_year, d_moy),
+ v2 as(
+ select v1.i_category
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1.s_store_name = v1_lag.s_store_name and
+       v1.s_store_name = v1_lead.s_store_name and
+       v1.s_company_name = v1_lag.s_company_name and
+       v1.s_company_name = v1_lead.s_company_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and    
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 10 <- Map 21 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 22 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 3 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE), Reducer 6 
(SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 8
+      File Output Operator [FS_112]
+        Limit [LIM_110] (rows=100 width=88)
+          Number of rows:100
+          Select Operator [SEL_109] (rows=843315280 width=88)
+            Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+          <-Reducer 7 [SIMPLE_EDGE]
+            SHUFFLE [RS_108]
+              Select Operator [SEL_107] (rows=843315280 width=88)
+                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                Merge Join Operator [MERGEJOIN_189] (rows=843315280 width=88)
+                  Conds:RS_103._col0, _col1, _col2, _col3, (_col7 + 
1)=RS_104._col0, _col1, _col2, _col3, _col8(Inner),RS_104._col0, _col1, _col2, 
_col3, _col8=RS_105._col0, _col1, _col2, _col3, (_col7 - 
1)(Inner),Output:["_col6","_col8","_col12","_col13","_col14","_col15","_col23"]
+                <-Reducer 14 [SIMPLE_EDGE]
+                  SHUFFLE [RS_104]
+                    PartitionCols:_col0, _col1, _col2, _col3, _col8
+                    Select Operator [SEL_67] (rows=31943759 width=88)
+                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                      Filter Operator [FIL_169] (rows=31943759 width=88)
+                        predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - 
_col0)) / _col0) > 0.1)) ELSE (null) END
+                        Select Operator [SEL_66] (rows=63887519 width=88)
+                          
Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                          Filter Operator [FIL_170] (rows=63887519 width=88)
+                            predicate:((_col0 > 0) and rank_window_1 is not 
null and (_col5 = 2000))
+                            PTF Operator [PTF_65] (rows=383325119 width=88)
+                              Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS 
FIRST, _col6 ASC NULLS FIRST","partition by:":"_col1, _col2, _col3, _col4"}]
+                              Select Operator [SEL_64] (rows=383325119 
width=88)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                              <-Reducer 13 [SIMPLE_EDGE]
+                                SHUFFLE [RS_63]
+                                  PartitionCols:_col0, _col1, _col2, _col3
+                                  Select Operator [SEL_62] (rows=383325119 
width=88)
+                                    
Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                    PTF Operator [PTF_61] (rows=383325119 
width=88)
+                                      Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS 
FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, 
_col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3, _col4"}]
+                                      Select Operator [SEL_60] (rows=383325119 
width=88)
+                                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                      <-Reducer 12 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_59]
+                                          PartitionCols:_col0, _col1, _col2, 
_col3, _col4
+                                          Select Operator [SEL_58] 
(rows=383325119 width=88)
+                                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                            Group By Operator [GBY_57] 
(rows=383325119 width=88)
+                                              
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5
+                                            <-Reducer 11 [SIMPLE_EDGE]
+                                              SHUFFLE [RS_56]
+                                                PartitionCols:_col0, _col1, 
_col2, _col3, _col4, _col5
+                                                Group By Operator [GBY_55] 
(rows=766650239 width=88)
+                                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col9, _col11, _col12
+                                                  Merge Join Operator 
[MERGEJOIN_185] (rows=766650239 width=88)
+                                                    
Conds:RS_51._col2=RS_52._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"]
+                                                  <-Map 22 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_52]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_44] 
(rows=1704 width=1910)
+                                                        
Output:["_col0","_col1","_col2"]
+                                                        Filter Operator 
[FIL_174] (rows=1704 width=1910)
+                                                          
predicate:(s_store_sk is not null and s_store_name is not null and 
s_company_name is not null)
+                                                          TableScan [TS_9] 
(rows=1704 width=1910)
+                                                            
default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_company_name"]
+                                                  <-Reducer 10 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_51]
+                                                      PartitionCols:_col2
+                                                      Merge Join Operator 
[MERGEJOIN_184] (rows=696954748 width=88)
+                                                        
Conds:RS_48._col1=RS_49._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"]
+                                                      <-Map 21 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_49]
+                                                          PartitionCols:_col0
+                                                          Select Operator 
[SEL_41] (rows=462000 width=1436)
+                                                            
Output:["_col0","_col1","_col2"]
+                                                            Filter Operator 
[FIL_173] (rows=462000 width=1436)
+                                                              
predicate:(i_item_sk is not null and i_category is not null and i_brand is not 
null)
+                                                              TableScan [TS_6] 
(rows=462000 width=1436)
+                                                                
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"]
+                                                      <-Reducer 9 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_48]
+                                                          PartitionCols:_col1
+                                                          Merge Join Operator 
[MERGEJOIN_183] (rows=633595212 width=88)
+                                                            
Conds:RS_45._col0=RS_46._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                                          <-Map 1 [SIMPLE_EDGE]
+                                                            SHUFFLE [RS_45]
+                                                              
PartitionCols:_col0
+                                                              Select Operator 
[SEL_35] (rows=575995635 width=88)
+                                                                
Output:["_col0","_col1","_col2","_col3"]
+                                                                Filter 
Operator [FIL_171] (rows=575995635 width=88)
+                                                                  
predicate:(ss_item_sk is not null and ss_sold_date_sk is not null and 
ss_store_sk is not null)
+                                                                  TableScan 
[TS_0] (rows=575995635 width=88)
+                                                                    
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"]
+                                                          <-Map 20 
[SIMPLE_EDGE]
+                                                            SHUFFLE [RS_46]
+                                                              
PartitionCols:_col0
+                                                              Select Operator 
[SEL_38] (rows=73048 width=1119)
+                                                                
Output:["_col0","_col1","_col2"]
+                                                                Filter 
Operator [FIL_172] (rows=73048 width=1119)
+                                                                  
predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 
2001) and (d_moy = 1))) and d_date_sk is not null)
+                                                                  TableScan 
[TS_3] (rows=73049 width=1119)
+                                                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                <-Reducer 19 [SIMPLE_EDGE]
+                  SHUFFLE [RS_105]
+                    PartitionCols:_col0, _col1, _col2, _col3, (_col7 - 1)
+                    Select Operator [SEL_99] (rows=383325119 width=88)
+                      Output:["_col0","_col1","_col2","_col3","_col6","_col7"]
+                      Filter Operator [FIL_175] (rows=383325119 width=88)
+                        predicate:rank_window_0 is not null
+                        PTF Operator [PTF_98] (rows=383325119 width=88)
+                          Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS 
FIRST, _col5 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3"}]
+                          Select Operator [SEL_97] (rows=383325119 width=88)
+                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                          <-Reducer 18 [SIMPLE_EDGE]
+                            SHUFFLE [RS_96]
+                              PartitionCols:_col0, _col1, _col2, _col3
+                              Select Operator [SEL_95] (rows=383325119 
width=88)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                Group By Operator [GBY_94] (rows=383325119 
width=88)
+                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5
+                                <-Reducer 17 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_93]
+                                    PartitionCols:_col0, _col1, _col2, _col3, 
_col4, _col5
+                                    Group By Operator [GBY_92] (rows=766650239 
width=88)
+                                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col9, _col11, _col12
+                                      Merge Join Operator [MERGEJOIN_188] 
(rows=766650239 width=88)
+                                        
Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"]
+                                      <-Map 22 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_89]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_81] (rows=1704 
width=1910)
+                                            Output:["_col0","_col1","_col2"]
+                                            Filter Operator [FIL_179] 
(rows=1704 width=1910)
+                                              predicate:(s_store_sk is not 
null and s_store_name is not null and s_company_name is not null)
+                                               Please refer to the previous 
TableScan [TS_9]
+                                      <-Reducer 16 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_88]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_187] 
(rows=696954748 width=88)
+                                            
Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"]
+                                          <-Map 21 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_86]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_78] 
(rows=462000 width=1436)
+                                                
Output:["_col0","_col1","_col2"]
+                                                Filter Operator [FIL_178] 
(rows=462000 width=1436)
+                                                  predicate:(i_item_sk is not 
null and i_category is not null and i_brand is not null)
+                                                   Please refer to the 
previous TableScan [TS_6]
+                                          <-Reducer 15 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_85]
+                                              PartitionCols:_col1
+                                              Merge Join Operator 
[MERGEJOIN_186] (rows=633595212 width=88)
+                                                
Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                              <-Map 1 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_82]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_72] 
(rows=575995635 width=88)
+                                                    
Output:["_col0","_col1","_col2","_col3"]
+                                                    Filter Operator [FIL_176] 
(rows=575995635 width=88)
+                                                      predicate:(ss_item_sk is 
not null and ss_sold_date_sk is not null and ss_store_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_0]
+                                              <-Map 20 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_83]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_75] 
(rows=73048 width=1119)
+                                                    
Output:["_col0","_col1","_col2"]
+                                                    Filter Operator [FIL_177] 
(rows=73048 width=1119)
+                                                      predicate:(((d_year = 
2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 
1))) and d_date_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_3]
+                <-Reducer 6 [SIMPLE_EDGE]
+                  SHUFFLE [RS_103]
+                    PartitionCols:_col0, _col1, _col2, _col3, (_col7 + 1)
+                    Select Operator [SEL_29] (rows=383325119 width=88)
+                      Output:["_col0","_col1","_col2","_col3","_col6","_col7"]
+                      Filter Operator [FIL_164] (rows=383325119 width=88)
+                        predicate:rank_window_0 is not null
+                        PTF Operator [PTF_28] (rows=383325119 width=88)
+                          Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS 
FIRST, _col5 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2, _col3"}]
+                          Select Operator [SEL_27] (rows=383325119 width=88)
+                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                          <-Reducer 5 [SIMPLE_EDGE]
+                            SHUFFLE [RS_26]
+                              PartitionCols:_col0, _col1, _col2, _col3
+                              Select Operator [SEL_25] (rows=383325119 
width=88)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                Group By Operator [GBY_24] (rows=383325119 
width=88)
+                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5
+                                <-Reducer 4 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_23]
+                                    PartitionCols:_col0, _col1, _col2, _col3, 
_col4, _col5
+                                    Group By Operator [GBY_22] (rows=766650239 
width=88)
+                                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col9, _col11, _col12
+                                      Merge Join Operator [MERGEJOIN_182] 
(rows=766650239 width=88)
+                                        
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"]
+                                      <-Map 22 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_19]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_11] (rows=1704 
width=1910)
+                                            Output:["_col0","_col1","_col2"]
+                                            Filter Operator [FIL_168] 
(rows=1704 width=1910)
+                                              predicate:(s_store_sk is not 
null and s_store_name is not null and s_company_name is not null)
+                                               Please refer to the previous 
TableScan [TS_9]
+                                      <-Reducer 3 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_18]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_181] 
(rows=696954748 width=88)
+                                            
Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"]
+                                          <-Map 21 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_16]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_8] 
(rows=462000 width=1436)
+                                                
Output:["_col0","_col1","_col2"]
+                                                Filter Operator [FIL_167] 
(rows=462000 width=1436)
+                                                  predicate:(i_item_sk is not 
null and i_category is not null and i_brand is not null)
+                                                   Please refer to the 
previous TableScan [TS_6]
+                                          <-Reducer 2 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_15]
+                                              PartitionCols:_col1
+                                              Merge Join Operator 
[MERGEJOIN_180] (rows=633595212 width=88)
+                                                
Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                              <-Map 1 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_12]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_2] 
(rows=575995635 width=88)
+                                                    
Output:["_col0","_col1","_col2","_col3"]
+                                                    Filter Operator [FIL_165] 
(rows=575995635 width=88)
+                                                      predicate:(ss_item_sk is 
not null and ss_sold_date_sk is not null and ss_store_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_0]
+                                              <-Map 20 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_13]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_5] 
(rows=73048 width=1119)
+                                                    
Output:["_col0","_col1","_col2"]
+                                                    Filter Operator [FIL_166] 
(rows=73048 width=1119)
+                                                      predicate:(((d_year = 
2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 
1))) and d_date_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_3]
+

http://git-wip-us.apache.org/repos/asf/hive/blob/d7ab32f2/ql/src/test/results/clientpositive/perf/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query57.q.out 
b/ql/src/test/results/clientpositive/perf/query57.q.out
new file mode 100644
index 0000000..6c237bf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query57.q.out
@@ -0,0 +1,351 @@
+PREHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+        cc_name,
+        d_year, d_moy,
+        sum(cs_sales_price) sum_sales,
+        avg(sum(cs_sales_price)) over
+          (partition by i_category, i_brand,
+                     cc_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     cc_name
+           order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+       cs_sold_date_sk = d_date_sk and
+       cc_call_center_sk= cs_call_center_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1. cc_name = v1_lag. cc_name and
+       v1. cc_name = v1_lead. cc_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with v1 as(
+ select i_category, i_brand,
+        cc_name,
+        d_year, d_moy,
+        sum(cs_sales_price) sum_sales,
+        avg(sum(cs_sales_price)) over
+          (partition by i_category, i_brand,
+                     cc_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     cc_name
+           order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+       cs_sold_date_sk = d_date_sk and
+       cc_call_center_sk= cs_call_center_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          cc_name , d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1. cc_name = v1_lag. cc_name and
+       v1. cc_name = v1_lead. cc_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, 3
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 10 <- Map 21 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 22 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 3 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE), Reducer 6 
(SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 8
+      File Output Operator [FS_112]
+        Limit [LIM_110] (rows=100 width=135)
+          Number of rows:100
+          Select Operator [SEL_109] (rows=421645952 width=135)
+            
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+          <-Reducer 7 [SIMPLE_EDGE]
+            SHUFFLE [RS_108]
+              Select Operator [SEL_107] (rows=421645952 width=135)
+                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                Merge Join Operator [MERGEJOIN_189] (rows=421645952 width=135)
+                  Conds:RS_103._col0, _col2, _col1, (_col6 + 1)=RS_104._col0, 
_col2, _col1, _col7(Inner),RS_104._col0, _col2, _col1, _col7=RS_105._col0, 
_col2, _col1, (_col6 - 
1)(Inner),Output:["_col5","_col7","_col8","_col10","_col11","_col12","_col13","_col20"]
+                <-Reducer 14 [SIMPLE_EDGE]
+                  SHUFFLE [RS_104]
+                    PartitionCols:_col0, _col2, _col1, _col7
+                    Select Operator [SEL_67] (rows=15971437 width=135)
+                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                      Filter Operator [FIL_169] (rows=15971437 width=135)
+                        predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - 
_col0)) / _col0) > 0.1)) ELSE (null) END
+                        Select Operator [SEL_66] (rows=31942874 width=135)
+                          
Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                          Filter Operator [FIL_170] (rows=31942874 width=135)
+                            predicate:((_col0 > 0) and rank_window_1 is not 
null and (_col4 = 2000))
+                            PTF Operator [PTF_65] (rows=191657247 width=135)
+                              Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS 
FIRST, _col5 ASC NULLS FIRST","partition by:":"_col1, _col2, _col3"}]
+                              Select Operator [SEL_64] (rows=191657247 
width=135)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                              <-Reducer 13 [SIMPLE_EDGE]
+                                SHUFFLE [RS_63]
+                                  PartitionCols:_col0, _col1, _col2
+                                  Select Operator [SEL_62] (rows=191657247 
width=135)
+                                    
Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"]
+                                    PTF Operator [PTF_61] (rows=191657247 
width=135)
+                                      Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS 
FIRST, _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS 
FIRST","partition by:":"_col0, _col1, _col2, _col3"}]
+                                      Select Operator [SEL_60] (rows=191657247 
width=135)
+                                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                      <-Reducer 12 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_59]
+                                          PartitionCols:_col0, _col1, _col2, 
_col3
+                                          Select Operator [SEL_58] 
(rows=191657247 width=135)
+                                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                            Group By Operator [GBY_57] 
(rows=191657247 width=135)
+                                              
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4
+                                            <-Reducer 11 [SIMPLE_EDGE]
+                                              SHUFFLE [RS_56]
+                                                PartitionCols:_col0, _col1, 
_col2, _col3, _col4
+                                                Group By Operator [GBY_55] 
(rows=383314495 width=135)
+                                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col10, _col11
+                                                  Merge Join Operator 
[MERGEJOIN_185] (rows=383314495 width=135)
+                                                    
Conds:RS_51._col2=RS_52._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"]
+                                                  <-Map 22 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_52]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_44] 
(rows=462000 width=1436)
+                                                        
Output:["_col0","_col1","_col2"]
+                                                        Filter Operator 
[FIL_174] (rows=462000 width=1436)
+                                                          predicate:(i_item_sk 
is not null and i_category is not null and i_brand is not null)
+                                                          TableScan [TS_9] 
(rows=462000 width=1436)
+                                                            
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_category"]
+                                                  <-Reducer 10 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_51]
+                                                      PartitionCols:_col2
+                                                      Merge Join Operator 
[MERGEJOIN_184] (rows=348467716 width=135)
+                                                        
Conds:RS_48._col1=RS_49._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"]
+                                                      <-Map 21 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_49]
+                                                          PartitionCols:_col0
+                                                          Select Operator 
[SEL_41] (rows=60 width=2045)
+                                                            
Output:["_col0","_col1"]
+                                                            Filter Operator 
[FIL_173] (rows=60 width=2045)
+                                                              
predicate:(cc_call_center_sk is not null and cc_name is not null)
+                                                              TableScan [TS_6] 
(rows=60 width=2045)
+                                                                
default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"]
+                                                      <-Reducer 9 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_48]
+                                                          PartitionCols:_col1
+                                                          Merge Join Operator 
[MERGEJOIN_183] (rows=316788826 width=135)
+                                                            
Conds:RS_45._col0=RS_46._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                                          <-Map 1 [SIMPLE_EDGE]
+                                                            SHUFFLE [RS_45]
+                                                              
PartitionCols:_col0
+                                                              Select Operator 
[SEL_35] (rows=287989836 width=135)
+                                                                
Output:["_col0","_col1","_col2","_col3"]
+                                                                Filter 
Operator [FIL_171] (rows=287989836 width=135)
+                                                                  
predicate:(cs_item_sk is not null and cs_sold_date_sk is not null and 
cs_call_center_sk is not null)
+                                                                  TableScan 
[TS_0] (rows=287989836 width=135)
+                                                                    
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"]
+                                                          <-Map 20 
[SIMPLE_EDGE]
+                                                            SHUFFLE [RS_46]
+                                                              
PartitionCols:_col0
+                                                              Select Operator 
[SEL_38] (rows=73048 width=1119)
+                                                                
Output:["_col0","_col1","_col2"]
+                                                                Filter 
Operator [FIL_172] (rows=73048 width=1119)
+                                                                  
predicate:(((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 
2001) and (d_moy = 1))) and d_date_sk is not null)
+                                                                  TableScan 
[TS_3] (rows=73049 width=1119)
+                                                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                <-Reducer 19 [SIMPLE_EDGE]
+                  SHUFFLE [RS_105]
+                    PartitionCols:_col0, _col2, _col1, (_col6 - 1)
+                    Select Operator [SEL_99] (rows=191657247 width=135)
+                      Output:["_col0","_col1","_col2","_col5","_col6"]
+                      Filter Operator [FIL_175] (rows=191657247 width=135)
+                        predicate:rank_window_0 is not null
+                        PTF Operator [PTF_98] (rows=191657247 width=135)
+                          Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS 
FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2"}]
+                          Select Operator [SEL_97] (rows=191657247 width=135)
+                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                          <-Reducer 18 [SIMPLE_EDGE]
+                            SHUFFLE [RS_96]
+                              PartitionCols:_col0, _col1, _col2
+                              Select Operator [SEL_95] (rows=191657247 
width=135)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                Group By Operator [GBY_94] (rows=191657247 
width=135)
+                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4
+                                <-Reducer 17 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_93]
+                                    PartitionCols:_col0, _col1, _col2, _col3, 
_col4
+                                    Group By Operator [GBY_92] (rows=383314495 
width=135)
+                                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col10, _col11
+                                      Merge Join Operator [MERGEJOIN_188] 
(rows=383314495 width=135)
+                                        
Conds:RS_88._col2=RS_89._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"]
+                                      <-Map 22 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_89]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_81] 
(rows=462000 width=1436)
+                                            Output:["_col0","_col1","_col2"]
+                                            Filter Operator [FIL_179] 
(rows=462000 width=1436)
+                                              predicate:(i_item_sk is not null 
and i_category is not null and i_brand is not null)
+                                               Please refer to the previous 
TableScan [TS_9]
+                                      <-Reducer 16 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_88]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_187] 
(rows=348467716 width=135)
+                                            
Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"]
+                                          <-Map 21 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_86]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_78] 
(rows=60 width=2045)
+                                                Output:["_col0","_col1"]
+                                                Filter Operator [FIL_178] 
(rows=60 width=2045)
+                                                  predicate:(cc_call_center_sk 
is not null and cc_name is not null)
+                                                   Please refer to the 
previous TableScan [TS_6]
+                                          <-Reducer 15 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_85]
+                                              PartitionCols:_col1
+                                              Merge Join Operator 
[MERGEJOIN_186] (rows=316788826 width=135)
+                                                
Conds:RS_82._col0=RS_83._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                              <-Map 1 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_82]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_72] 
(rows=287989836 width=135)
+                                                    
Output:["_col0","_col1","_col2","_col3"]
+                                                    Filter Operator [FIL_176] 
(rows=287989836 width=135)
+                                                      predicate:(cs_item_sk is 
not null and cs_sold_date_sk is not null and cs_call_center_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_0]
+                                              <-Map 20 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_83]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_75] 
(rows=73048 width=1119)
+                                                    
Output:["_col0","_col1","_col2"]
+                                                    Filter Operator [FIL_177] 
(rows=73048 width=1119)
+                                                      predicate:(((d_year = 
2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 
1))) and d_date_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_3]
+                <-Reducer 6 [SIMPLE_EDGE]
+                  SHUFFLE [RS_103]
+                    PartitionCols:_col0, _col2, _col1, (_col6 + 1)
+                    Select Operator [SEL_29] (rows=191657247 width=135)
+                      Output:["_col0","_col1","_col2","_col5","_col6"]
+                      Filter Operator [FIL_164] (rows=191657247 width=135)
+                        predicate:rank_window_0 is not null
+                        PTF Operator [PTF_28] (rows=191657247 width=135)
+                          Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS 
FIRST, _col4 ASC NULLS FIRST","partition by:":"_col0, _col1, _col2"}]
+                          Select Operator [SEL_27] (rows=191657247 width=135)
+                            
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                          <-Reducer 5 [SIMPLE_EDGE]
+                            SHUFFLE [RS_26]
+                              PartitionCols:_col0, _col1, _col2
+                              Select Operator [SEL_25] (rows=191657247 
width=135)
+                                
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                Group By Operator [GBY_24] (rows=191657247 
width=135)
+                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4
+                                <-Reducer 4 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_23]
+                                    PartitionCols:_col0, _col1, _col2, _col3, 
_col4
+                                    Group By Operator [GBY_22] (rows=383314495 
width=135)
+                                      
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5,
 _col6, _col8, _col10, _col11
+                                      Merge Join Operator [MERGEJOIN_182] 
(rows=383314495 width=135)
+                                        
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"]
+                                      <-Map 22 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_19]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_11] 
(rows=462000 width=1436)
+                                            Output:["_col0","_col1","_col2"]
+                                            Filter Operator [FIL_168] 
(rows=462000 width=1436)
+                                              predicate:(i_item_sk is not null 
and i_category is not null and i_brand is not null)
+                                               Please refer to the previous 
TableScan [TS_9]
+                                      <-Reducer 3 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_18]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_181] 
(rows=348467716 width=135)
+                                            
Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"]
+                                          <-Map 21 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_16]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_8] (rows=60 
width=2045)
+                                                Output:["_col0","_col1"]
+                                                Filter Operator [FIL_167] 
(rows=60 width=2045)
+                                                  predicate:(cc_call_center_sk 
is not null and cc_name is not null)
+                                                   Please refer to the 
previous TableScan [TS_6]
+                                          <-Reducer 2 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_15]
+                                              PartitionCols:_col1
+                                              Merge Join Operator 
[MERGEJOIN_180] (rows=316788826 width=135)
+                                                
Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"]
+                                              <-Map 1 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_12]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_2] 
(rows=287989836 width=135)
+                                                    
Output:["_col0","_col1","_col2","_col3"]
+                                                    Filter Operator [FIL_165] 
(rows=287989836 width=135)
+                                                      predicate:(cs_item_sk is 
not null and cs_sold_date_sk is not null and cs_call_center_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_0]
+                                              <-Map 20 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_13]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_5] 
(rows=73048 width=1119)
+                                                    
Output:["_col0","_col1","_col2"]
+                                                    Filter Operator [FIL_166] 
(rows=73048 width=1119)
+                                                      predicate:(((d_year = 
2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 
1))) and d_date_sk is not null)
+                                                       Please refer to the 
previous TableScan [TS_3]
+

Reply via email to