This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-3 by this push: new 2e3b7d3a7e7 HIVE-27602: Backport HIVE-21915: Hive with TEZ UNION ALL and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg) 2e3b7d3a7e7 is described below commit 2e3b7d3a7e73d94457553d2c181dc2c3f970b4bb Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Fri Aug 18 19:46:52 2023 +0530 HIVE-27602: Backport HIVE-21915: Hive with TEZ UNION ALL and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#4581) --- .../test/resources/testconfiguration.properties | 3 +- .../apache/hadoop/hive/ql/parse/GenTezUtils.java | 6 +- .../test/queries/clientpositive/tez_union_udtf.q | 22 ++++ .../clientpositive/tez/tez_union_udtf.q.out | 131 +++++++++++++++++++++ 4 files changed, 160 insertions(+), 2 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b602d7b9413..aac8218d079 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -62,7 +62,8 @@ minitez.query.files=acid_vectorization_original_tez.q,\ hybridgrace_hashjoin_2.q,\ multi_count_distinct.q,\ tez-tag.q,\ - tez_union_with_udf.q + tez_union_with_udf.q,\ + tez_union_udtf.q minillap.shared.query.files=insert_into1.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 7188a0d9754..c1888bc0acb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -298,7 +298,11 @@ public class GenTezUtils { FileSinkOperator fileSink = (FileSinkOperator)current; // remember it for additional processing later - context.fileSinkSet.add(fileSink); + if (context.fileSinkSet.contains(fileSink)) { + continue; + } else { + context.fileSinkSet.add(fileSink); + } FileSinkDesc desc = fileSink.getConf(); Path path = desc.getDirName(); diff --git a/ql/src/test/queries/clientpositive/tez_union_udtf.q b/ql/src/test/queries/clientpositive/tez_union_udtf.q new file mode 100644 index 00000000000..ed58cfd5508 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_union_udtf.q @@ -0,0 +1,22 @@ +--! qt:dataset:src1 +--! qt:dataset:src +set hive.merge.tezfiles=true; +-- SORT_BEFORE_DIFF + +EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238'; + +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238'; + +SELECT * FROM x; + diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out new file mode 100644 index 00000000000..1ec9c3feb4e --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +Plan not optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Union 2 (CONTAINS) +Map 3 <- Union 2 (CONTAINS) + +Stage-3 + Stats Work{} + Stage-9 + Create Table Operator: + name:default.x + Stage-2 + Dependency Collection{} + Stage-5(CONDITIONAL) + Move Operator + Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Conditional Operator + Stage-1 + Union 2 + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_38] + table:{"name:":"default.x"} + Select Operator [SEL_37] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_36] (rows=2 width=91) + Output:["_col1"] + Filter Operator [FIL_35] (rows=2 width=87) + predicate:(key = '238') + TableScan [TS_16] (rows=500 width=87) + Output:["key"] + <-Map 3 [CONTAINS] + File Output Operator [FS_32] + table:{"name:":"default.x"} + Select Operator [SEL_31] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_29] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_27] (rows=4 width=239) + Output:["_col5"] + Select Operator [SEL_25] (rows=2 width=431) + Lateral View Forward [LVF_24] (rows=2 width=86) + Filter Operator [FIL_23] (rows=2 width=86) + predicate:(key = '238') + TableScan [TS_22] (rows=25 width=86) + Output:["key"] + File Output Operator [FS_32] + table:{"name:":"default.x"} + Select Operator [SEL_31] (rows=6 width=91) + Output:["_col0","_col1"] + Select Operator [SEL_29] (rows=4 width=87) + Output:["_col1"] + Lateral View Join Operator [LVJ_27] (rows=4 width=239) + Output:["_col5"] + UDTF Operator [UDTF_28] (rows=2 width=48) + function name:explode + Select Operator [SEL_26] (rows=2 width=48) + Output:["_col0"] + Please refer to the previous Lateral View Forward [LVF_24] + Stage-4(CONDITIONAL) + File Merge + Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Stage-7 + Move Operator + Stage-6(CONDITIONAL) + File Merge + Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Stage-0 + Move Operator + Please refer to the previous Stage-5(CONDITIONAL) + Please refer to the previous Stage-4(CONDITIONAL) + Please refer to the previous Stage-7 + +PREHOOK: query: CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.tag EXPRESSION [] +PREHOOK: query: SELECT * FROM x +PREHOOK: type: QUERY +PREHOOK: Input: default@x +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +POSTHOOK: Output: hdfs://### HDFS PATH ### +238 1 +238 1 +238 2