[ https://issues.apache.org/jira/browse/HIVE-21054?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
daveKim updated HIVE-21054: --------------------------- Description: i make 3 tables {code:java} // 코드 자리 표시자 CREATE TABLE `testdb`.`tab` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) CREATE TABLE `testdb`.`space` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) CREATE TABLE `testdb`.`colon` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) {code} and execute query at below : {code:java} // set hive.vectorized.execution.enabled=FALSE; SELECT distinct t1.ctype, t1.id_all , t2.list19 FROM ( SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list FROM tab WHERE stn1='20130101010100' AND ctype22 BETWEEN 2 AND 5 --result 45 row UNION ALL SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list FROM space WHERE stn1='20130102010100' AND ctype22 BETWEEN 2 AND 5 --result empty ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows {code} expected result ||ctype||id_all||list19|| |2|104|ITEM30| |2|683|ITEM30| |1|970|ITEM30| but, actual result empty.. instead make table "union all" query result {code:java} // 코드 자리 표시자 set hive.vectorized.execution.enabled=FALSE; create table unionalltbl as SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list FROM tab WHERE stn1='20130101010100' AND ctype22 BETWEEN 2 AND 5 UNION ALL SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list FROM space WHERE stn1='20130102010100' AND ctype22 BETWEEN 2 AND 5 {code} and retry query {code:java} // 코드 자리 표시자 set hive.vectorized.execution.enabled=FALSE; select DISTINCT t1.close_type, t1.id_all, t2.list19 from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30' {code} i got expected result and i compare two query explain first query {code:java} // 코드 자리 표시자 rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[] ( input=rel#18655:HiveProject.HIVE.[] ( input=rel#18653:HiveJoin.HIVE.[] ( left=rel#18650:HiveProject.HIVE.[] ( input=rel#18648:HiveUnion.HIVE.[] ( input#0=rel#18641:HiveProject.HIVE.[] ( input=rel#18639:HiveFilter.HIVE.[] ( input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false] ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5)) ) ,close_type=$21,id_all=$19,item_list=$18 ) ,input#1=rel#18646:HiveProject.HIVE.[] ( input=rel#18644:HiveFilter.HIVE.[] ( input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false] ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5)) ),close_type=$21,id_all=$19,item_list=$18 ) ,all=true ) ,close_type=$0,id_all=$1,item_list=$2 ), right=rel#18619:HiveTableScan.HIVE.[] ( table=[testdb.colon],table:alias=t2 ) [false],condition=AND ( =($1, $22), =($21, _UTF-16LE'ITEM30') ) ,joinType=inner,algorithm=none,cost=not available ) ,$f0=$0,$f1=$1,$f2=$21 ) ,group={0, 1, 2} ) ,close_type=$0,id_all=$1,list19=$2) {code} second query {code:java} // 코드 자리 표시자 rel#17893:HiveProject.HIVE.[] ( input=rel#17891:HiveAggregate.HIVE.[] ( input=rel#17889:HiveProject.HIVE.[] ( input=rel#17887:HiveJoin.HIVE.[] ( left=rel#17872:HiveTableScan.HIVE.[] (able=[testdb.unionalltbl],table:alias=t1)[false], right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false], condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')), joinType=inner,algorithm=none,cost=not available ), $f0=$0,$f1=$1,$f2=$24 ), group={0, 1, 2} ) ,close_type=$0,id_all=$1,list19=$2 ) {code} why right table 'colon' condition column number different? was: i make 3 tables {code:java} // 코드 자리 표시자 CREATE TABLE `testdb`.`tab` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) CREATE TABLE `testdb`.`space` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) CREATE TABLE `testdb`.`colon` ( stn1 STRING COMMENT '' , int2 BIGINT COMMENT '' , float3 FLOAT COMMENT '' , long4 STRING COMMENT '' , double5 DOUBLE COMMENT '' , boolean6 BOOLEAN COMMENT '' , timestamp7 TIMESTAMP COMMENT '' , words8 STRING COMMENT '' , email9 STRING COMMENT '' , time10 STRING COMMENT '' , int11 BIGINT COMMENT '' , float12 FLOAT COMMENT '' , long13 STRING COMMENT '' , double14 DOUBLE COMMENT '' , boolean15 BOOLEAN COMMENT '' , timestamp16 TIMESTAMP COMMENT '' , email17 STRING COMMENT '' , time18 STRING COMMENT '' , list19 STRING COMMENT '' , id20 STRING COMMENT '' , sqltime21 TIMESTAMP COMMENT '' , ctype22 INT COMMENT '' , reg23 STRING COMMENT '' ) {code} and execute query at below : {code:java} // set hive.vectorized.execution.enabled=FALSE; SELECT distinct t1.ctype, t1.id_all , t2.list19 FROM ( SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list FROM tab WHERE stn1='20130101010100' AND ctype22 BETWEEN 2 AND 5 --result 45 row UNION ALL SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list FROM space WHERE stn1='20130102010100' AND ctype22 BETWEEN 2 AND 5 --result empty ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows {code} expected result ||ctype||id_all||list19|| |2|104|ITEM30| |2|683|ITEM30| |1|970|ITEM30| but, actual result empty.. instead make table "union all" query result {code:java} // 코드 자리 표시자 set hive.vectorized.execution.enabled=FALSE; create table unionalltbl as SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list FROM tab WHERE stn1='20130101010100' AND ctype22 BETWEEN 2 AND 5 UNION ALL SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list FROM space WHERE stn1='20130102010100' AND ctype22 BETWEEN 2 AND 5 {code} and retry query {code:java} // 코드 자리 표시자 set hive.vectorized.execution.enabled=FALSE; select DISTINCT t1.close_type, t1.id_all, t2.list19 from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30' {code} i got expected result and i compare two query explain first query {code:java} // 코드 자리 표시자 rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[] ( input=rel#18655:HiveProject.HIVE.[] ( input=rel#18653:HiveJoin.HIVE.[] ( left=rel#18650:HiveProject.HIVE.[] ( input=rel#18648:HiveUnion.HIVE.[] ( input#0=rel#18641:HiveProject.HIVE.[] ( input=rel#18639:HiveFilter.HIVE.[] ( input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false] ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5)) ) ,close_type=$21,id_all=$19,item_list=$18 ) ,input#1=rel#18646:HiveProject.HIVE.[] ( input=rel#18644:HiveFilter.HIVE.[] ( input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false] ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5)) ),close_type=$21,id_all=$19,item_list=$18 ) ,all=true ) ,close_type=$0,id_all=$1,item_list=$2 ), right=rel#18619:HiveTableScan.HIVE.[] ( table=[testdb.colon],table:alias=t2 ) [false],condition=AND ( =($1, $22), =($21, _UTF-16LE'ITEM30') ) ,joinType=inner,algorithm=none,cost=not available ) ,$f0=$0,$f1=$1,$f2=$21 ) ,group={0, 1, 2} ) ,close_type=$0,id_all=$1,list19=$2) {code} second query {code:java} // 코드 자리 표시자 rel#17893:HiveProject.HIVE.[] ( input=rel#17891:HiveAggregate.HIVE.[] ( input=rel#17889:HiveProject.HIVE.[] ( input=rel#17887:HiveJoin.HIVE.[] ( left=rel#17872:HiveTableScan.HIVE.[] (able=[testdb.unionalltbl],table:alias=t1)[false], right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false], condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')), joinType=inner,algorithm=none,cost=not available ), $f0=$0,$f1=$1,$f2=$24 ), group={0, 1, 2} ) ,close_type=$0,id_all=$1,list19=$2 ) {code} why right table 'colon' condition column number different? > union all and join result wrong > ------------------------------- > > Key: HIVE-21054 > URL: https://issues.apache.org/jira/browse/HIVE-21054 > Project: Hive > Issue Type: Bug > Components: Query Planning > Affects Versions: 3.1.1 > Reporter: daveKim > Priority: Major > > i make 3 tables > > {code:java} > // 코드 자리 표시자 > CREATE TABLE `testdb`.`tab` ( > stn1 STRING COMMENT '' , > int2 BIGINT COMMENT '' , > float3 FLOAT COMMENT '' , > long4 STRING COMMENT '' , > double5 DOUBLE COMMENT '' , > boolean6 BOOLEAN COMMENT '' , > timestamp7 TIMESTAMP COMMENT '' , > words8 STRING COMMENT '' , > email9 STRING COMMENT '' , > time10 STRING COMMENT '' , > int11 BIGINT COMMENT '' , > float12 FLOAT COMMENT '' , > long13 STRING COMMENT '' , > double14 DOUBLE COMMENT '' , > boolean15 BOOLEAN COMMENT '' , > timestamp16 TIMESTAMP COMMENT '' , > email17 STRING COMMENT '' , > time18 STRING COMMENT '' , > list19 STRING COMMENT '' , > id20 STRING COMMENT '' , > sqltime21 TIMESTAMP COMMENT '' , > ctype22 INT COMMENT '' , > reg23 STRING COMMENT '' > ) > CREATE TABLE `testdb`.`space` ( > stn1 STRING COMMENT '' , > int2 BIGINT COMMENT '' , > float3 FLOAT COMMENT '' , > long4 STRING COMMENT '' , > double5 DOUBLE COMMENT '' , > boolean6 BOOLEAN COMMENT '' , > timestamp7 TIMESTAMP COMMENT '' , > words8 STRING COMMENT '' , > email9 STRING COMMENT '' , > time10 STRING COMMENT '' , > int11 BIGINT COMMENT '' , > float12 FLOAT COMMENT '' , > long13 STRING COMMENT '' , > double14 DOUBLE COMMENT '' , > boolean15 BOOLEAN COMMENT '' , > timestamp16 TIMESTAMP COMMENT '' , > email17 STRING COMMENT '' , > time18 STRING COMMENT '' , > list19 STRING COMMENT '' , > id20 STRING COMMENT '' , > sqltime21 TIMESTAMP COMMENT '' , > ctype22 INT COMMENT '' , > reg23 STRING COMMENT '' > ) > CREATE TABLE `testdb`.`colon` ( > stn1 STRING COMMENT '' , > int2 BIGINT COMMENT '' , > float3 FLOAT COMMENT '' , > long4 STRING COMMENT '' , > double5 DOUBLE COMMENT '' , > boolean6 BOOLEAN COMMENT '' , > timestamp7 TIMESTAMP COMMENT '' , > words8 STRING COMMENT '' , > email9 STRING COMMENT '' , > time10 STRING COMMENT '' , > int11 BIGINT COMMENT '' , > float12 FLOAT COMMENT '' , > long13 STRING COMMENT '' , > double14 DOUBLE COMMENT '' , > boolean15 BOOLEAN COMMENT '' , > timestamp16 TIMESTAMP COMMENT '' , > email17 STRING COMMENT '' , > time18 STRING COMMENT '' , > list19 STRING COMMENT '' , > id20 STRING COMMENT '' , > sqltime21 TIMESTAMP COMMENT '' , > ctype22 INT COMMENT '' , > reg23 STRING COMMENT '' > ) > {code} > > > and execute query at below : > {code:java} > // > set hive.vectorized.execution.enabled=FALSE; > SELECT distinct t1.ctype, t1.id_all , t2.list19 > FROM ( > SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list > FROM tab WHERE stn1='20130101010100' > AND ctype22 BETWEEN 2 AND 5 --result 45 row > UNION ALL > SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list > FROM space > WHERE stn1='20130102010100' > AND ctype22 BETWEEN 2 AND 5 --result empty > ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join > matched 3 rows > {code} > expected result > ||ctype||id_all||list19|| > |2|104|ITEM30| > |2|683|ITEM30| > |1|970|ITEM30| > but, actual result empty.. > > > instead make table "union all" query result > {code:java} > // 코드 자리 표시자 > set hive.vectorized.execution.enabled=FALSE; > create table unionalltbl as > SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list > FROM tab WHERE stn1='20130101010100' > AND ctype22 BETWEEN 2 AND 5 > UNION ALL > SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list > FROM space > WHERE stn1='20130102010100' > AND ctype22 BETWEEN 2 AND 5 > {code} > > and retry query > {code:java} > // 코드 자리 표시자 > set hive.vectorized.execution.enabled=FALSE; > select DISTINCT t1.close_type, t1.id_all, t2.list19 > from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30' > {code} > i got expected result > > and i compare two query explain > first query > {code:java} > // 코드 자리 표시자 > rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[] > ( > input=rel#18655:HiveProject.HIVE.[] > ( > input=rel#18653:HiveJoin.HIVE.[] > ( > left=rel#18650:HiveProject.HIVE.[] > ( > input=rel#18648:HiveUnion.HIVE.[] > ( > input#0=rel#18641:HiveProject.HIVE.[] > ( > input=rel#18639:HiveFilter.HIVE.[] > ( > input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false] > ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5)) > ) > ,close_type=$21,id_all=$19,item_list=$18 > ) > ,input#1=rel#18646:HiveProject.HIVE.[] > ( > input=rel#18644:HiveFilter.HIVE.[] > ( > input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false] > ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5)) > ),close_type=$21,id_all=$19,item_list=$18 > ) > ,all=true > ) > ,close_type=$0,id_all=$1,item_list=$2 > ), > right=rel#18619:HiveTableScan.HIVE.[] > ( > table=[testdb.colon],table:alias=t2 > ) > [false],condition=AND > ( > =($1, $22), =($21, _UTF-16LE'ITEM30') > ) > ,joinType=inner,algorithm=none,cost=not available > ) > ,$f0=$0,$f1=$1,$f2=$21 > ) > ,group={0, 1, 2} > ) > ,close_type=$0,id_all=$1,list19=$2) > {code} > second query > {code:java} > // 코드 자리 표시자 > rel#17893:HiveProject.HIVE.[] > ( > input=rel#17891:HiveAggregate.HIVE.[] > ( > input=rel#17889:HiveProject.HIVE.[] > ( > input=rel#17887:HiveJoin.HIVE.[] > ( > left=rel#17872:HiveTableScan.HIVE.[] > (able=[testdb.unionalltbl],table:alias=t1)[false], > right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false], > condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')), > joinType=inner,algorithm=none,cost=not available > ), > $f0=$0,$f1=$1,$f2=$24 > ), > group={0, 1, 2} > ) > ,close_type=$0,id_all=$1,list19=$2 > ) > {code} > why right table 'colon' condition column number different? > -- This message was sent by Atlassian JIRA (v7.6.3#76005)