This is an automated email from the ASF dual-hosted git repository. jcamacho pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit a5dd502f1069a6cd48d56c21a35f93be79362c62 Author: dengzh <dengzhhu...@gmail.com> AuthorDate: Fri Jul 17 23:04:17 2020 +0800 HIVE-23850: Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez) Closes apache/hive#1255 --- .../apache/hadoop/hive/ql/ppd/OpProcFactory.java | 44 +- .../groupby_grouping_sets_pushdown1.q | 54 +- .../llap/groupby_grouping_sets_pushdown1.q.out | 802 +++++++++++++++++++++ 3 files changed, 877 insertions(+), 23 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 6c66260..56d3e90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -792,40 +792,40 @@ public final class OpProcFactory { return null; } + private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys, + List<Integer> gbyKeyPos) { + for (int i = 0; i < groupByKeys.size(); i++) { + if (groupByKeys.get(i).isSame(expr)) { + gbyKeyPos.add(i); + return; + } + } + if (expr.getChildren() != null) { + for (int i = 0; i < expr.getChildren().size(); i++) { + getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, gbyKeyPos); + } + } + } + private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys, FastBitSet[] bitSets, int groupingSetPosition) { - List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>(); - extractCols(expr, columns); - for (ExprNodeDesc col : columns) { - int index = groupByKeys.indexOf(col); - assert index >= 0; + List<Integer> gbyKeyPos = new ArrayList<Integer>(); + getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos); + // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let the expr push down + for (Integer pos : gbyKeyPos) { for (FastBitSet bitset : bitSets) { int keyPos = bitset.nextClearBit(0); - while (keyPos < groupingSetPosition && keyPos != index) { + while (keyPos < groupingSetPosition && keyPos != pos) { keyPos = bitset.nextClearBit(keyPos + 1); } - // If the column has not be found in grouping sets, the expr should not be pushed down - if (keyPos != index) { + // If the gbyKey has not be found in grouping sets, the expr should not be pushed down + if (keyPos != pos) { return false; } } } return true; } - - // Extract columns from expression - private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) { - if (expr instanceof ExprNodeColumnDesc) { - columns.add(expr); - } - - if (expr instanceof ExprNodeGenericFuncDesc) { - List<ExprNodeDesc> children = expr.getChildren(); - for (int i = 0; i < children.size(); ++i) { - extractCols(children.get(i), columns); - } - } - } } /** diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q index ce2c68c..cbfe58c 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q @@ -39,4 +39,56 @@ SELECT * FROM ( SELECT a, b, sum(s) FROM T1 GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE b IS NULL; \ No newline at end of file +) t WHERE b IS NULL; + +EXPLAIN EXTENDED SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ("AAA", "BBB"); + +SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ('AAA', 'BBB'); + +EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1; + +SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1; + +EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100; + +SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100; + +EXPLAIN EXTENDED SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100; + +SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100; + +EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'; + +SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'; diff --git a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out index 2d71757..81fdd06 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out @@ -643,3 +643,805 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### aaa NULL 123456 +PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ("AAA", "BBB") +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ("AAA", "BBB") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + numBuckets: -1 + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col3 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [t1] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + Select Operator + expressions: upper(_col0) (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2 + columns.types string:string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ('AAA', 'BBB') +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( +SELECT upper(a) x, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE x in ('AAA', 'BBB') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +AAA bbb 123456 +AAA NULL 123456 +PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + numBuckets: -1 + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col3 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: + nullscan://null/default.t1/part_ [t1] + Path -> Partition: + nullscan://null/default.t1/part_ + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + nullscan://null/default.t1/part_ [t1] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2 + columns.types string:string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((a), (a, b)) +HAVING upper(a) = 'AAA' AND 1 != 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: a (type: string), b (type: string), s (type: bigint) + outputColumnNames: a, b, s + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (upper(_col0) = 'AAA') (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + numBuckets: -1 + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col3 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [t1] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + Filter Operator + isSamplingPred: false + predicate: (_col3 > 100L) (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2 + columns.types string:string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((), (a), (a, b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +aaa bbb 123456 +aaa NULL 123456 +PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: (upper(a) = 'AAA') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (upper(a) = 'AAA') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: upper(a) (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + numBuckets: -1 + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col3 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [t1] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + Select Operator + expressions: _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (_col3 > 100L) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'AAA' (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2 + columns.types string:string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT upper(a), b, sum(s) +FROM T1 +GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) +HAVING upper(a) = 'AAA' AND sum(s) > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +AAA bbb 123456 +AAA NULL 123456 +PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: (upper(b) = 'BBB') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (upper(b) = 'BBB') (type: boolean) + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(s) + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zzz + numBuckets: -1 + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col3 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: t1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns a,b,s + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.t1 + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Truncated Path -> Alias: + /t1 [t1] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + pruneGroupingSetId: true + Filter Operator + isSamplingPred: false + predicate: (_col3 > 100L) (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0,_col1,_col2 + columns.types string:string:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(s) +FROM T1 +GROUP BY a, b GROUPING SETS ((b), (a, b)) +HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +aaa bbb 123456