This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 2e3b7d3a7e7 HIVE-27602: Backport HIVE-21915: Hive with TEZ UNION ALL 
and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg)
2e3b7d3a7e7 is described below

commit 2e3b7d3a7e73d94457553d2c181dc2c3f970b4bb
Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com>
AuthorDate: Fri Aug 18 19:46:52 2023 +0530

    HIVE-27602: Backport HIVE-21915: Hive with TEZ UNION ALL and UDTF results 
in data loss (Wei Zhang, reviewed by Vineet Garg)
    
    Signed-off-by: Sankar Hariappan <sank...@apache.org>
    Closes (#4581)
---
 .../test/resources/testconfiguration.properties    |   3 +-
 .../apache/hadoop/hive/ql/parse/GenTezUtils.java   |   6 +-
 .../test/queries/clientpositive/tez_union_udtf.q   |  22 ++++
 .../clientpositive/tez/tez_union_udtf.q.out        | 131 +++++++++++++++++++++
 4 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index b602d7b9413..aac8218d079 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -62,7 +62,8 @@ minitez.query.files=acid_vectorization_original_tez.q,\
   hybridgrace_hashjoin_2.q,\
   multi_count_distinct.q,\
   tez-tag.q,\
-  tez_union_with_udf.q
+  tez_union_with_udf.q,\
+  tez_union_udtf.q
 
 
 minillap.shared.query.files=insert_into1.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 7188a0d9754..c1888bc0acb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -298,7 +298,11 @@ public class GenTezUtils {
         FileSinkOperator fileSink = (FileSinkOperator)current;
 
         // remember it for additional processing later
-        context.fileSinkSet.add(fileSink);
+        if (context.fileSinkSet.contains(fileSink)) {
+          continue;
+        } else {
+          context.fileSinkSet.add(fileSink);
+        }
 
         FileSinkDesc desc = fileSink.getConf();
         Path path = desc.getDirName();
diff --git a/ql/src/test/queries/clientpositive/tez_union_udtf.q 
b/ql/src/test/queries/clientpositive/tez_union_udtf.q
new file mode 100644
index 00000000000..ed58cfd5508
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_union_udtf.q
@@ -0,0 +1,22 @@
+--! qt:dataset:src1
+--! qt:dataset:src
+set hive.merge.tezfiles=true;
+-- SORT_BEFORE_DIFF
+
+EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238';
+
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238';
+
+SELECT * FROM x;
+
diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out 
b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out
new file mode 100644
index 00000000000..1ec9c3feb4e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out
@@ -0,0 +1,131 @@
+PREHOOK: query: EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+Plan not optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Union 2 (CONTAINS)
+Map 3 <- Union 2 (CONTAINS)
+
+Stage-3
+  Stats Work{}
+    Stage-9
+      Create Table Operator:
+        name:default.x
+        Stage-2
+          Dependency Collection{}
+            Stage-5(CONDITIONAL)
+              Move Operator
+                Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)
+                  Conditional Operator
+                    Stage-1
+                      Union 2
+                      <-Map 1 [CONTAINS] vectorized
+                        File Output Operator [FS_38]
+                          table:{"name:":"default.x"}
+                          Select Operator [SEL_37] (rows=6 width=91)
+                            Output:["_col0","_col1"]
+                            Select Operator [SEL_36] (rows=2 width=91)
+                              Output:["_col1"]
+                              Filter Operator [FIL_35] (rows=2 width=87)
+                                predicate:(key = '238')
+                                TableScan [TS_16] (rows=500 width=87)
+                                  Output:["key"]
+                      <-Map 3 [CONTAINS]
+                        File Output Operator [FS_32]
+                          table:{"name:":"default.x"}
+                          Select Operator [SEL_31] (rows=6 width=91)
+                            Output:["_col0","_col1"]
+                            Select Operator [SEL_29] (rows=4 width=87)
+                              Output:["_col1"]
+                              Lateral View Join Operator [LVJ_27] (rows=4 
width=239)
+                                Output:["_col5"]
+                                Select Operator [SEL_25] (rows=2 width=431)
+                                  Lateral View Forward [LVF_24] (rows=2 
width=86)
+                                    Filter Operator [FIL_23] (rows=2 width=86)
+                                      predicate:(key = '238')
+                                      TableScan [TS_22] (rows=25 width=86)
+                                        Output:["key"]
+                        File Output Operator [FS_32]
+                          table:{"name:":"default.x"}
+                          Select Operator [SEL_31] (rows=6 width=91)
+                            Output:["_col0","_col1"]
+                            Select Operator [SEL_29] (rows=4 width=87)
+                              Output:["_col1"]
+                              Lateral View Join Operator [LVJ_27] (rows=4 
width=239)
+                                Output:["_col5"]
+                                UDTF Operator [UDTF_28] (rows=2 width=48)
+                                  function name:explode
+                                  Select Operator [SEL_26] (rows=2 width=48)
+                                    Output:["_col0"]
+                                     Please refer to the previous Lateral View 
Forward [LVF_24]
+            Stage-4(CONDITIONAL)
+              File Merge
+                 Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: 
Stage-5, Stage-4, Stage-6)
+            Stage-7
+              Move Operator
+                Stage-6(CONDITIONAL)
+                  File Merge
+                     Please refer to the previous Stage-8(CONDITIONAL CHILD 
TASKS: Stage-5, Stage-4, Stage-6)
+        Stage-0
+          Move Operator
+             Please refer to the previous Stage-5(CONDITIONAL)
+             Please refer to the previous Stage-4(CONDITIONAL)
+             Please refer to the previous Stage-7
+
+PREHOOK: query: CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+POSTHOOK: Lineage: x.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: x.tag EXPRESSION []
+PREHOOK: query: SELECT * FROM x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT * FROM x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+238    1
+238    1
+238    2

Reply via email to