This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new d22fa4a HIVE-25170: Fix wrong colExprMap generated by SemanticAnalyzer (#2331) (Wei Zhang reviewed by Zoltan Haindrich) d22fa4a is described below commit d22fa4af5db01a99611790ed10cfddcab327823f Author: Wei Zhang <zhangwei...@126.com> AuthorDate: Fri Aug 6 21:27:34 2021 +0800 HIVE-25170: Fix wrong colExprMap generated by SemanticAnalyzer (#2331) (Wei Zhang reviewed by Zoltan Haindrich) --- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../queries/clientpositive/constant_key_column.q | 29 ++++ .../clientpositive/llap/constant_key_column.q.out | 175 +++++++++++++++++++++ 3 files changed, 205 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a33d0fe..fdb6cc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9021,7 +9021,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { List<String> keyColNames = rsdesc.getOutputKeyColumnNames(); for (int i = 0 ; i < keyColNames.size(); i++) { - colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i)); + colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), newSortCols.get(i)); } List<String> valueColNames = rsdesc.getOutputValueColumnNames(); for (int i = 0 ; i < valueColNames.size(); i++) { diff --git a/ql/src/test/queries/clientpositive/constant_key_column.q b/ql/src/test/queries/clientpositive/constant_key_column.q new file mode 100644 index 0000000..28d3a16 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constant_key_column.q @@ -0,0 +1,29 @@ +--! qt:dataset:src +SET hive.remove.orderby.in.subquery=false; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10; + +SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10; diff --git a/ql/src/test/results/clientpositive/llap/constant_key_column.q.out b/ql/src/test/results/clientpositive/llap/constant_key_column.q.out new file mode 100644 index 0000000..a1a843d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/constant_key_column.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: EXPLAIN +SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: 'constant' (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(_col2) + keys: 'constant' (type: string), _col1 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col1 (type: string) + null sort order: z + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Reduce Output Operator + key expressions: 'constant' (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: 'constant' (type: string), _col1 (type: string) + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: 'constant' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col1 (type: string) + null sort order: z + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316 Data size: 85636 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'constant' (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 316 Data size: 85636 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: 'constant' (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 114708 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 3630 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3630 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT constant_col, key, max(value) +FROM +( + SELECT 'constant' as constant_col, key, value + FROM src + DISTRIBUTE BY constant_col, key + SORT BY constant_col, key, value +) a +GROUP BY constant_col, key +ORDER BY constant_col, key +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +constant 0 val_0 +constant 10 val_10 +constant 100 val_100 +constant 103 val_103 +constant 104 val_104 +constant 105 val_105 +constant 11 val_11 +constant 111 val_111 +constant 113 val_113 +constant 114 val_114