[hive] 02/02: HIVE-23850: Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)

jcamacho Fri, 17 Jul 2020 08:13:25 -0700

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


commit a5dd502f1069a6cd48d56c21a35f93be79362c62
Author: dengzh <dengzhhu...@gmail.com>
AuthorDate: Fri Jul 17 23:04:17 2020 +0800

    HIVE-23850: Allow PPD when subject is not a column with grouping sets 
present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
    
    Closes apache/hive#1255
---
 .../apache/hadoop/hive/ql/ppd/OpProcFactory.java   |  44 +-
 .../groupby_grouping_sets_pushdown1.q              |  54 +-
 .../llap/groupby_grouping_sets_pushdown1.q.out     | 802 +++++++++++++++++++++
 3 files changed, 877 insertions(+), 23 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 6c66260..56d3e90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -792,40 +792,40 @@ public final class OpProcFactory {
       return null;
     }
 
+    private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> 
groupByKeys,
+        List<Integer> gbyKeyPos) {
+      for (int i = 0; i < groupByKeys.size(); i++) {
+        if (groupByKeys.get(i).isSame(expr)) {
+          gbyKeyPos.add(i);
+          return;
+        }
+      }
+      if (expr.getChildren() != null) {
+        for (int i = 0; i < expr.getChildren().size(); i++) {
+          getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, 
gbyKeyPos);
+        }
+      }
+    }
+
     private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> 
groupByKeys,
         FastBitSet[] bitSets, int groupingSetPosition) {
-      List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
-      extractCols(expr, columns);
-      for (ExprNodeDesc col : columns) {
-        int index = groupByKeys.indexOf(col);
-        assert index >= 0;
+      List<Integer> gbyKeyPos = new ArrayList<Integer>();
+      getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos);
+      // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let 
the expr push down
+      for (Integer pos : gbyKeyPos) {
         for (FastBitSet bitset : bitSets) {
           int keyPos = bitset.nextClearBit(0);
-          while (keyPos < groupingSetPosition && keyPos != index) {
+          while (keyPos < groupingSetPosition && keyPos != pos) {
             keyPos = bitset.nextClearBit(keyPos + 1);
           }
-          // If the column has not be found in grouping sets, the expr should 
not be pushed down
-          if (keyPos != index) {
+          // If the gbyKey has not be found in grouping sets, the expr should 
not be pushed down
+          if (keyPos != pos) {
             return false;
           }
         }
       }
       return true;
     }
-
-    // Extract columns from expression
-    private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
-      if (expr instanceof ExprNodeColumnDesc) {
-        columns.add(expr);
-      }
-
-      if (expr instanceof ExprNodeGenericFuncDesc) {
-        List<ExprNodeDesc> children = expr.getChildren();
-        for (int i = 0; i < children.size(); ++i) {
-          extractCols(children.get(i), columns);
-        }
-      }
-    }
   }
 
   /**
diff --git 
a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q 
b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
index ce2c68c..cbfe58c 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -39,4 +39,56 @@ SELECT * FROM (
 SELECT a, b, sum(s)
 FROM T1
 GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE b IS NULL;
\ No newline at end of file
+) t WHERE b IS NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB");
+
+SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB');
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1;
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1;
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
diff --git 
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
index 2d71757..81fdd06 100644
--- 
a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -643,3 +643,805 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
 aaa    NULL    123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        bucketingVersion: 2
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                        null sort order: zzz
+                        numBuckets: -1
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        tag: -1
+                        value expressions: _col3 (type: bigint)
+                        auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: upper(_col0) (type: string), _col1 (type: 
string), _col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1,_col2
+                          columns.types string:string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+AAA    bbb     123456
+AAA    NULL    123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: false (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        bucketingVersion: 2
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                        null sort order: zzz
+                        numBuckets: -1
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        tag: -1
+                        value expressions: _col3 (type: bigint)
+                        auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+              nullscan://null/default.t1/part_ [t1]
+            Path -> Partition:
+              nullscan://null/default.t1/part_ 
+                Partition
+                  input format: 
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.NullStructSerDe
+                  serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              nullscan://null/default.t1/part_ [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1,_col2
+                          columns.types string:string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Select Operator
+                    expressions: a (type: string), b (type: string), s (type: 
bigint)
+                    outputColumnNames: a, b, s
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Filter Operator
+                        isSamplingPred: false
+                        predicate: (upper(_col0) = 'AAA') (type: boolean)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                          null sort order: zzz
+                          numBuckets: -1
+                          sort order: +++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col3 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Filter Operator
+                  isSamplingPred: false
+                  predicate: (_col3 > 100L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      bucketingVersion: 2
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            bucketing_version -1
+                            columns _col0,_col1,_col2
+                            columns.types string:string:bigint
+                            escape.delim \
+                            
hive.serialization.extend.additional.nesting.levels true
+                            serialization.escape.crlf true
+                            serialization.format 1
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa    bbb     123456
+aaa    NULL    123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (upper(a) = 'AAA') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (upper(a) = 'AAA') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: upper(a) (type: string), b (type: string), 0L 
(type: bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 287 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        bucketingVersion: 2
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                        null sort order: zzz
+                        numBuckets: -1
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                        Statistics: Num rows: 1 Data size: 287 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        tag: -1
+                        value expressions: _col3 (type: bigint)
+                        auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col1 (type: string), _col3 (type: bigint)
+                  outputColumnNames: _col1, _col3
+                  Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (_col3 > 100L) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'AAA' (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        bucketingVersion: 2
+                        compressed: false
+                        GlobalTableId: 0
+#### A masked pattern was here ####
+                        NumFilesPerFileSink: 1
+                        Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            properties:
+                              bucketing_version -1
+                              columns _col0,_col1,_col2
+                              columns.types string:string:bigint
+                              escape.delim \
+                              
hive.serialization.extend.additional.nesting.levels true
+                              serialization.escape.crlf true
+                              serialization.format 1
+                              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        TotalFiles: 1
+                        GatherStats: false
+                        MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+AAA    bbb     123456
+AAA    NULL    123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: (upper(b) = 'BBB') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (upper(b) = 'BBB') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(s)
+                      keys: a (type: string), b (type: string), 0L (type: 
bigint)
+                      minReductionHashAggr: 0.0
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Filter Operator
+                        isSamplingPred: false
+                        predicate: _col0 is not null (type: boolean)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: bigint)
+                          null sort order: zzz
+                          numBuckets: -1
+                          sort order: +++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col3 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: t1
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns a,b,s
+                    columns.types string:string:bigint
+#### A masked pattern was here ####
+                    name default.t1
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns a,b,s
+                      columns.comments 
+                      columns.types string:string:bigint
+#### A masked pattern was here ####
+                      name default.t1
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.t1
+                  name: default.t1
+            Truncated Path -> Alias:
+              /t1 [t1]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                pruneGroupingSetId: true
+                Filter Operator
+                  isSamplingPred: false
+                  predicate: (_col3 > 100L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      bucketingVersion: 2
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      Statistics: Num rows: 1 Data size: 182 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            bucketing_version -1
+                            columns _col0,_col1,_col2
+                            columns.types string:string:bigint
+                            escape.delim \
+                            
hive.serialization.extend.additional.nesting.levels true
+                            serialization.escape.crlf true
+                            serialization.format 1
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa    bbb     123456

[hive] 02/02: HIVE-23850: Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)

Reply via email to