This is an automated email from the ASF dual-hosted git repository. jcamacho pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit 86781643c3d90887c04551aecf5b5d54b86fbe48 Author: Jesus Camacho Rodriguez <jcama...@apache.org> AuthorDate: Fri Jul 17 08:11:39 2020 -0700 Revert "Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)" This reverts commit 44aa72f096639d7b1a52ef18887016af98bd6999. --- .../apache/hadoop/hive/ql/ppd/OpProcFactory.java | 44 +- .../groupby_grouping_sets_pushdown1.q | 54 +- .../llap/groupby_grouping_sets_pushdown1.q.out | 802 --------------------- 3 files changed, 23 insertions(+), 877 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 56d3e90..6c66260 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -792,40 +792,40 @@ public final class OpProcFactory { return null; } - private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys, - List<Integer> gbyKeyPos) { - for (int i = 0; i < groupByKeys.size(); i++) { - if (groupByKeys.get(i).isSame(expr)) { - gbyKeyPos.add(i); - return; - } - } - if (expr.getChildren() != null) { - for (int i = 0; i < expr.getChildren().size(); i++) { - getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, gbyKeyPos); - } - } - } - private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys, FastBitSet[] bitSets, int groupingSetPosition) { - List<Integer> gbyKeyPos = new ArrayList<Integer>(); - getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos); - // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let the expr push down - for (Integer pos : gbyKeyPos) { + List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>(); + extractCols(expr, columns); + for (ExprNodeDesc col : columns) { + int index = groupByKeys.indexOf(col); + assert index >= 0; for (FastBitSet bitset : bitSets) { int keyPos = bitset.nextClearBit(0); - while (keyPos < groupingSetPosition && keyPos != pos) { + while (keyPos < groupingSetPosition && keyPos != index) { keyPos = bitset.nextClearBit(keyPos + 1); } - // If the gbyKey has not be found in grouping sets, the expr should not be pushed down - if (keyPos != pos) { + // If the column has not be found in grouping sets, the expr should not be pushed down + if (keyPos != index) { return false; } } } return true; } + + // Extract columns from expression + private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) { + if (expr instanceof ExprNodeColumnDesc) { + columns.add(expr); + } + + if (expr instanceof ExprNodeGenericFuncDesc) { + List<ExprNodeDesc> children = expr.getChildren(); + for (int i = 0; i < children.size(); ++i) { + extractCols(children.get(i), columns); + } + } + } } /** diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q index cbfe58c..ce2c68c 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q @@ -39,56 +39,4 @@ SELECT * FROM ( SELECT a, b, sum(s) FROM T1 GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE b IS NULL; - -EXPLAIN EXTENDED SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ("AAA", "BBB"); - -SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ('AAA', 'BBB'); - -EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1; - -SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1; - -EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100; - -SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100; - -EXPLAIN EXTENDED SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100; - -SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100; - -EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'; - -SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'; +) t WHERE b IS NULL; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out index 81fdd06..2d71757 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out @@ -643,805 +643,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### aaa NULL 123456 -PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ("AAA", "BBB") -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ("AAA", "BBB") -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(s) - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - bucketingVersion: 2 - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - numBuckets: -1 - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.comments - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - Select Operator - expressions: upper(_col0) (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ('AAA', 'BBB') -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM ( -SELECT upper(a) x, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -) t WHERE x in ('AAA', 'BBB') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -AAA bbb 123456 -AAA NULL 123456 -PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(s) - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - bucketingVersion: 2 - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - numBuckets: -1 - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: - nullscan://null/default.t1/part_ [t1] - Path -> Partition: - nullscan://null/default.t1/part_ - Partition - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.comments - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - nullscan://null/default.t1/part_ [t1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((a), (a, b)) -HAVING upper(a) = 'AAA' AND 1 != 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: a (type: string), b (type: string), s (type: bigint) - outputColumnNames: a, b, s - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(s) - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - isSamplingPred: false - predicate: (upper(_col0) = 'AAA') (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - bucketingVersion: 2 - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - numBuckets: -1 - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.comments - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - Filter Operator - isSamplingPred: false - predicate: (_col3 > 100L) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((), (a), (a, b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -aaa bbb 123456 -aaa NULL 123456 -PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: (upper(a) = 'AAA') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (upper(a) = 'AAA') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(s) - keys: upper(a) (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - bucketingVersion: 2 - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - numBuckets: -1 - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.comments - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - Select Operator - expressions: _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - isSamplingPred: false - predicate: (_col3 > 100L) (type: boolean) - Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'AAA' (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT upper(a), b, sum(s) -FROM T1 -GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b)) -HAVING upper(a) = 'AAA' AND sum(s) > 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -AAA bbb 123456 -AAA NULL 123456 -PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - filterExpr: (upper(b) = 'BBB') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (upper(b) = 'BBB') (type: boolean) - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(s) - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - bucketingVersion: 2 - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zzz - numBuckets: -1 - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucketing_version 2 - column.name.delimiter , - columns a,b,s - columns.comments - columns.types string:string:bigint -#### A masked pattern was here #### - name default.t1 - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] - Reducer 2 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - pruneGroupingSetId: true - Filter Operator - isSamplingPred: false - predicate: (_col3 > 100L) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - bucketingVersion: 2 - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - bucketing_version -1 - columns _col0,_col1,_col2 - columns.types string:string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT a, b, sum(s) -FROM T1 -GROUP BY a, b GROUPING SETS ((b), (a, b)) -HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -aaa bbb 123456