[hive] 01/02: Revert "Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)"

jcamacho Fri, 17 Jul 2020 08:13:24 -0700

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


commit 86781643c3d90887c04551aecf5b5d54b86fbe48
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
AuthorDate: Fri Jul 17 08:11:39 2020 -0700

    Revert "Allow PPD when subject is not a column with grouping sets present 
(Zhihua Deng, reviewed by Jesus Camacho Rodriguez)"
    
    This reverts commit 44aa72f096639d7b1a52ef18887016af98bd6999.
---
 .../apache/hadoop/hive/ql/ppd/OpProcFactory.java   |  44 +-
 .../groupby_grouping_sets_pushdown1.q              |  54 +-
 .../llap/groupby_grouping_sets_pushdown1.q.out     | 802 ---------------------
 3 files changed, 23 insertions(+), 877 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 56d3e90..6c66260 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -792,40 +792,40 @@ public final class OpProcFactory {
       return null;
     }
 
-    private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> 
groupByKeys,
-        List<Integer> gbyKeyPos) {
-      for (int i = 0; i < groupByKeys.size(); i++) {
-        if (groupByKeys.get(i).isSame(expr)) {
-          gbyKeyPos.add(i);
-          return;
-        }
-      }
-      if (expr.getChildren() != null) {
-        for (int i = 0; i < expr.getChildren().size(); i++) {
-          getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, 
gbyKeyPos);
-        }
-      }
-    }
-
     private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> 
groupByKeys,
         FastBitSet[] bitSets, int groupingSetPosition) {
-      List<Integer> gbyKeyPos = new ArrayList<Integer>();
-      getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos);
-      // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let 
the expr push down
-      for (Integer pos : gbyKeyPos) {
+      List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
+      extractCols(expr, columns);
+      for (ExprNodeDesc col : columns) {
+        int index = groupByKeys.indexOf(col);
+        assert index >= 0;
         for (FastBitSet bitset : bitSets) {
           int keyPos = bitset.nextClearBit(0);
-          while (keyPos < groupingSetPosition && keyPos != pos) {
+          while (keyPos < groupingSetPosition && keyPos != index) {
             keyPos = bitset.nextClearBit(keyPos + 1);
           }
-          // If the gbyKey has not be found in grouping sets, the expr should 
not be pushed down
-          if (keyPos != pos) {
+          // If the column has not be found in grouping sets, the expr should 
not be pushed down
+          if (keyPos != index) {
             return false;
           }
         }
       }
       return true;
     }
+
+    // Extract columns from expression
+    private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
+      if (expr instanceof ExprNodeColumnDesc) {
+        columns.add(expr);
+      }
+
+      if (expr instanceof ExprNodeGenericFuncDesc) {
+        List<ExprNodeDesc> children = expr.getChildren();
+        for (int i = 0; i < children.size(); ++i) {
+          extractCols(children.get(i), columns);
+        }
+      }
+    }
   }
 
   /**
diff --git 
a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q 
b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
index cbfe58c..ce2c68c 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -39,56 +39,4 @@ SELECT * FROM (
 SELECT a, b, sum(s)
 FROM T1
 GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE b IS NULL;
-
-EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB");
-
-SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB');
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1;
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1;
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
+) t WHERE b IS NULL;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
index 81fdd06..2d71757 100644
--- 
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -643,805 +643,3 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
 aaa    NULL    123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB")
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB")
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: t1
-                  filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(s)
-                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
-                      minReductionHashAggr: 0.0
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        bucketingVersion: 2
-                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
-                        null sort order: zzz
-                        numBuckets: -1
-                        sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
-                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col3 (type: bigint)
-                        auto parallelism: true
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: t1
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count -1
-                    bucketing_version 2
-                    column.name.delimiter ,
-                    columns a,b,s
-                    columns.types string:string:bigint
-#### A masked pattern was here ####
-                    name default.t1
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucketing_version 2
-                      column.name.delimiter ,
-                      columns a,b,s
-                      columns.comments 
-                      columns.types string:string:bigint
-#### A masked pattern was here ####
-                      name default.t1
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.t1
-                  name: default.t1
-            Truncated Path -> Alias:
-              /t1 [t1]
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                pruneGroupingSetId: true
-                Select Operator
-                  expressions: upper(_col0) (type: string), _col1 (type: 
string), _col3 (type: bigint)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    bucketingVersion: 2
-                    compressed: false
-                    GlobalTableId: 0
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        properties:
-                          bucketing_version -1
-                          columns _col0,_col1,_col2
-                          columns.types string:string:bigint
-                          escape.delim \
-                          hive.serialization.extend.additional.nesting.levels 
true
-                          serialization.escape.crlf true
-                          serialization.format 1
-                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    TotalFiles: 1
-                    GatherStats: false
-                    MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB')
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB')
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-AAA    bbb     123456
-AAA    NULL    123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: t1
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: false (type: boolean)
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(s)
-                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
-                      minReductionHashAggr: 0.0
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        bucketingVersion: 2
-                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
-                        null sort order: zzz
-                        numBuckets: -1
-                        sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
-                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col3 (type: bigint)
-                        auto parallelism: true
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-            Path -> Alias:
-              nullscan://null/default.t1/part_ [t1]
-            Path -> Partition:
-              nullscan://null/default.t1/part_ 
-                Partition
-                  input format: 
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count -1
-                    bucketing_version 2
-                    column.name.delimiter ,
-                    columns a,b,s
-                    columns.types string:string:bigint
-#### A masked pattern was here ####
-                    name default.t1
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.NullStructSerDe
-                  serde: org.apache.hadoop.hive.serde2.NullStructSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucketing_version 2
-                      column.name.delimiter ,
-                      columns a,b,s
-                      columns.comments 
-                      columns.types string:string:bigint
-#### A masked pattern was here ####
-                      name default.t1
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.t1
-                  name: default.t1
-            Truncated Path -> Alias:
-              nullscan://null/default.t1/part_ [t1]
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                pruneGroupingSetId: true
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    bucketingVersion: 2
-                    compressed: false
-                    GlobalTableId: 0
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        properties:
-                          bucketing_version -1
-                          columns _col0,_col1,_col2
-                          columns.types string:string:bigint
-                          escape.delim \
-                          hive.serialization.extend.additional.nesting.levels 
true
-                          serialization.escape.crlf true
-                          serialization.format 1
-                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    TotalFiles: 1
-                    GatherStats: false
-                    MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: t1
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  GatherStats: false
-                  Select Operator
-                    expressions: a (type: string), b (type: string), s (type: 
bigint)
-                    outputColumnNames: a, b, s
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(s)
-                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
-                      minReductionHashAggr: 0.0
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Filter Operator
-                        isSamplingPred: false
-                        predicate: (upper(_col0) = 'AAA') (type: boolean)
-                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          bucketingVersion: 2
-                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
-                          null sort order: zzz
-                          numBuckets: -1
-                          sort order: +++
-                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
-                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          tag: -1
-                          value expressions: _col3 (type: bigint)
-                          auto parallelism: true
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: t1
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count -1
-                    bucketing_version 2
-                    column.name.delimiter ,
-                    columns a,b,s
-                    columns.types string:string:bigint
-#### A masked pattern was here ####
-                    name default.t1
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucketing_version 2
-                      column.name.delimiter ,
-                      columns a,b,s
-                      columns.comments 
-                      columns.types string:string:bigint
-#### A masked pattern was here ####
-                      name default.t1
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.t1
-                  name: default.t1
-            Truncated Path -> Alias:
-              /t1 [t1]
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                pruneGroupingSetId: true
-                Filter Operator
-                  isSamplingPred: false
-                  predicate: (_col3 > 100L) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      bucketingVersion: 2
-                      compressed: false
-                      GlobalTableId: 0
-#### A masked pattern was here ####
-                      NumFilesPerFileSink: 1
-                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          properties:
-                            bucketing_version -1
-                            columns _col0,_col1,_col2
-                            columns.types string:string:bigint
-                            escape.delim \
-                            
hive.serialization.extend.additional.nesting.levels true
-                            serialization.escape.crlf true
-                            serialization.format 1
-                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      TotalFiles: 1
-                      GatherStats: false
-                      MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-aaa    bbb     123456
-aaa    NULL    123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: t1
-                  filterExpr: (upper(a) = 'AAA') (type: boolean)
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: (upper(a) = 'AAA') (type: boolean)
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(s)
-                      keys: upper(a) (type: string), b (type: string), 0L 
(type: bigint)
-                      minReductionHashAggr: 0.0
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1 Data size: 287 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        bucketingVersion: 2
-                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
-                        null sort order: zzz
-                        numBuckets: -1
-                        sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
-                        Statistics: Num rows: 1 Data size: 287 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col3 (type: bigint)
-                        auto parallelism: true
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: t1
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count -1
-                    bucketing_version 2
-                    column.name.delimiter ,
-                    columns a,b,s
-                    columns.types string:string:bigint
-#### A masked pattern was here ####
-                    name default.t1
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucketing_version 2
-                      column.name.delimiter ,
-                      columns a,b,s
-                      columns.comments 
-                      columns.types string:string:bigint
-#### A masked pattern was here ####
-                      name default.t1
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.t1
-                  name: default.t1
-            Truncated Path -> Alias:
-              /t1 [t1]
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE 
Column stats: COMPLETE
-                pruneGroupingSetId: true
-                Select Operator
-                  expressions: _col1 (type: string), _col3 (type: bigint)
-                  outputColumnNames: _col1, _col3
-                  Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: (_col3 > 100L) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: 'AAA' (type: string), _col1 (type: string), 
_col3 (type: bigint)
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        bucketingVersion: 2
-                        compressed: false
-                        GlobalTableId: 0
-#### A masked pattern was here ####
-                        NumFilesPerFileSink: 1
-                        Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            properties:
-                              bucketing_version -1
-                              columns _col0,_col1,_col2
-                              columns.types string:string:bigint
-                              escape.delim \
-                              
hive.serialization.extend.additional.nesting.levels true
-                              serialization.escape.crlf true
-                              serialization.format 1
-                              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        TotalFiles: 1
-                        GatherStats: false
-                        MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-AAA    bbb     123456
-AAA    NULL    123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: t1
-                  filterExpr: (upper(b) = 'BBB') (type: boolean)
-                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: (upper(b) = 'BBB') (type: boolean)
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(s)
-                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
-                      minReductionHashAggr: 0.0
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Filter Operator
-                        isSamplingPred: false
-                        predicate: _col0 is not null (type: boolean)
-                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          bucketingVersion: 2
-                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
-                          null sort order: zzz
-                          numBuckets: -1
-                          sort order: +++
-                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
-                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          tag: -1
-                          value expressions: _col3 (type: bigint)
-                          auto parallelism: true
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: t1
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count -1
-                    bucketing_version 2
-                    column.name.delimiter ,
-                    columns a,b,s
-                    columns.types string:string:bigint
-#### A masked pattern was here ####
-                    name default.t1
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucketing_version 2
-                      column.name.delimiter ,
-                      columns a,b,s
-                      columns.comments 
-                      columns.types string:string:bigint
-#### A masked pattern was here ####
-                      name default.t1
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.t1
-                  name: default.t1
-            Truncated Path -> Alias:
-              /t1 [t1]
-        Reducer 2 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                pruneGroupingSetId: true
-                Filter Operator
-                  isSamplingPred: false
-                  predicate: (_col3 > 100L) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      bucketingVersion: 2
-                      compressed: false
-                      GlobalTableId: 0
-#### A masked pattern was here ####
-                      NumFilesPerFileSink: 1
-                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          properties:
-                            bucketing_version -1
-                            columns _col0,_col1,_col2
-                            columns.types string:string:bigint
-                            escape.delim \
-                            
hive.serialization.extend.additional.nesting.levels true
-                            serialization.escape.crlf true
-                            serialization.format 1
-                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      TotalFiles: 1
-                      GatherStats: false
-                      MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-aaa    bbb     123456

[hive] 01/02: Revert "Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)"

Reply via email to