This is an automated email from the ASF dual-hosted git repository. veghlaci05 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 4d23badbd9c HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh) 4d23badbd9c is described below commit 4d23badbd9c0a158f4ee4faee73ef5086dd2993b Author: seonggon <ln...@postech.ac.kr> AuthorDate: Mon Aug 21 18:30:25 2023 +0900 HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh) --- .../apache/hadoop/hive/ql/parse/GenTezWork.java | 18 +- .../queries/clientpositive/smb_join_after_union.q | 56 ++++ .../clientpositive/llap/smb_join_after_union.q.out | 320 +++++++++++++++++++++ 3 files changed, 393 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 1385f6514ba..736e562c1af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -23,6 +23,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; @@ -423,7 +424,7 @@ public class GenTezWork implements SemanticNodeProcessor { rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName()); // remember the output name of the reduce sink - rs.getConf().setOutputName(rWork.getName()); + rs.getConf().setOutputName(getActualOutputWorkName(context, rWork)); // For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any // ReduceSink parents that we missed. @@ -514,4 +515,19 @@ public class GenTezWork implements SemanticNodeProcessor { unionWork.addUnionOperators(context.currentUnionOperators); context.workWithUnionOperators.add(work); } + + /** + * If the given reduceWork is the merged work of a MergeJoinWork, return the name of that MergeJoinWork. + * Otherwise, return the name of the given reduceWork. + */ + private String getActualOutputWorkName(GenTezProcContext context, ReduceWork reduceWork) { + return context.opMergeJoinWorkMap.values().stream() + .filter(mergeJoinWork -> mergeJoinWork.getBaseWorkList().contains(reduceWork)) + .map(MergeJoinWork::getMainWork) + // getMainWork() == null means that we have not visited the leaf Operator of MergeJoinWork. + // In this case, GenTezWork will adjust the output name of merged works + // by calling MergeJoinWork.addMergedWork() with non-null argument for parameter work. + .filter(Objects::nonNull) + .findAny().orElse(reduceWork).getName(); + } } diff --git a/ql/src/test/queries/clientpositive/smb_join_after_union.q b/ql/src/test/queries/clientpositive/smb_join_after_union.q new file mode 100644 index 00000000000..62cb72296ad --- /dev/null +++ b/ql/src/test/queries/clientpositive/smb_join_after_union.q @@ -0,0 +1,56 @@ +-- SORT_QUERY_RESULTS + +create external table hive1_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive2_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive3_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive4_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); + +insert into table hive1_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000'; +insert into table hive1_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000'; +insert into table hive2_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001'; +insert into table hive2_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001'; + +-- Reference, without SMB join +set hive.auto.convert.sortmerge.join=false; + +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null; + +-- HIVE-27303 +-- The following list is the expected OperatorGraph for the query. +-- (Path1) TS0-SEL1-UNION4 +-- (Path2) TS2-SEL3-UNION4-GBY7-RS8-GBY9-MERGEJOIN40 +-- (Path3) TS11-FIL32-SEL13-UNION17 +-- (Path4) TS14-FIL33-SEL16-UNION17-GBY20-RS21-GBY22-DUMMYSTORE41-MERGEJOIN40-FIL27-SEL28-FS29 +-- With previous implementation, HIVE-27303 issue happens in the following steps. +-- TezCompiler.generateTaskTree() traverses the OperatorGraph in the following order: Path1 -> Path2 -> Path3 -> Path4. +-- During traversing Path4, TezCompiler changes the Output of RS21 to merged ReduceWork(Root: GBY22, Name: Reduce7) +-- contained in MergeJoinWork(Root: GBY9, Name: Reduce3) for MERGEJOIN40. +-- The output should be adjusted to MergeJoinWork as a merged work is not a regular tez vertex. +-- But TezCompiler already visited RS21-GBY22 path during Path3 and cut the parent-child relationship between them. +-- Therefore, TezCompiler does not visit MERGEJOIN40 during Path4, and the output name of RS21 is configured to non-existent vertex. + +-- Use SMB join +set hive.auto.convert.join=false; +set hive.auto.convert.sortmerge.join=true; +set hive.optimize.dynamic.partition.hashjoin=false; +set hive.disable.unsafe.external.table.operations=false; + +-- Prevent using AntiJoin +set hive.auto.convert.anti.join=false; + +explain +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null; + +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null; diff --git a/ql/src/test/results/clientpositive/llap/smb_join_after_union.q.out b/ql/src/test/results/clientpositive/llap/smb_join_after_union.q.out new file mode 100644 index 00000000000..6e1bb43084b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/smb_join_after_union.q.out @@ -0,0 +1,320 @@ +PREHOOK: query: create external table hive1_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive1_tbl_data +POSTHOOK: query: create external table hive1_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive1_tbl_data +PREHOOK: query: create external table hive2_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive2_tbl_data +POSTHOOK: query: create external table hive2_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive2_tbl_data +PREHOOK: query: create external table hive3_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive3_tbl_data +POSTHOOK: query: create external table hive3_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive3_tbl_data +PREHOOK: query: create external table hive4_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive4_tbl_data +POSTHOOK: query: create external table hive4_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive4_tbl_data +PREHOOK: query: insert into table hive1_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive1_tbl_data +POSTHOOK: query: insert into table hive1_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive1_tbl_data +POSTHOOK: Lineage: hive1_tbl_data.col_updated_date EXPRESSION [] +POSTHOOK: Lineage: hive1_tbl_data.columid SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.column_fn SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.column_ln SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.email SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.pk_colum SIMPLE [] +PREHOOK: query: insert into table hive1_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive1_tbl_data +POSTHOOK: query: insert into table hive1_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-10000' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive1_tbl_data +POSTHOOK: Lineage: hive1_tbl_data.col_updated_date EXPRESSION [] +POSTHOOK: Lineage: hive1_tbl_data.columid SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.column_fn SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.column_ln SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.email SIMPLE [] +POSTHOOK: Lineage: hive1_tbl_data.pk_colum SIMPLE [] +PREHOOK: query: insert into table hive2_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive2_tbl_data +POSTHOOK: query: insert into table hive2_tbl_data select '00001','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive2_tbl_data +POSTHOOK: Lineage: hive2_tbl_data.col_updated_date EXPRESSION [] +POSTHOOK: Lineage: hive2_tbl_data.columid SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.column_fn SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.column_ln SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.email SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.pk_colum SIMPLE [] +PREHOOK: query: insert into table hive2_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@hive2_tbl_data +POSTHOOK: query: insert into table hive2_tbl_data select '00002','john','doe','j...@hotmail.com','2014-01-01 12:01:02','00001' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@hive2_tbl_data +POSTHOOK: Lineage: hive2_tbl_data.col_updated_date EXPRESSION [] +POSTHOOK: Lineage: hive2_tbl_data.columid SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.column_fn SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.column_ln SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.email SIMPLE [] +POSTHOOK: Lineage: hive2_tbl_data.pk_colum SIMPLE [] +PREHOOK: query: select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +PREHOOK: type: QUERY +PREHOOK: Input: default@hive1_tbl_data +PREHOOK: Input: default@hive2_tbl_data +PREHOOK: Input: default@hive3_tbl_data +PREHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here #### +POSTHOOK: query: select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive1_tbl_data +POSTHOOK: Input: default@hive2_tbl_data +POSTHOOK: Input: default@hive3_tbl_data +POSTHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here #### +PREHOOK: query: explain +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +PREHOOK: type: QUERY +PREHOOK: Input: default@hive1_tbl_data +PREHOOK: Input: default@hive2_tbl_data +PREHOOK: Input: default@hive3_tbl_data +PREHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here #### +POSTHOOK: query: explain +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive1_tbl_data +POSTHOOK: Input: default@hive2_tbl_data +POSTHOOK: Input: default@hive3_tbl_data +POSTHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Map 5 <- Union 6 (CONTAINS) + Map 8 <- Union 6 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive3_tbl_data + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: columid (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: hive1_tbl_data + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: columid (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: hive4_tbl_data + filterExpr: columid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: columid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: columid (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: hive2_tbl_data + filterExpr: columid is not null (type: boolean) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: columid is not null (type: boolean) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: columid (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Dummy Store + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is null (type: boolean) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + Union 6 + Vertex: Union 6 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +PREHOOK: type: QUERY +PREHOOK: Input: default@hive1_tbl_data +PREHOOK: Input: default@hive2_tbl_data +PREHOOK: Input: default@hive3_tbl_data +PREHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here #### +POSTHOOK: query: select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive1_tbl_data +POSTHOOK: Input: default@hive2_tbl_data +POSTHOOK: Input: default@hive3_tbl_data +POSTHOOK: Input: default@hive4_tbl_data +#### A masked pattern was here ####