[ 
https://issues.apache.org/jira/browse/HIVE-9745?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Xuefu Zhang updated HIVE-9745:
------------------------------
    Description: 
The following query should return 5 rows but Hive returns 3

{code}
select rnum, tchar.cchar from tchar where not (  tchar.cchar = ' ' or ( 
tchar.cchar is null and ' ' is null ))
{code}
Consider the following project of the base table

{code}
select rnum, tchar.cchar, 
case tchar.cchar when ' ' then 'space' else 'not space' end, 
case when tchar.cchar is null then 'is null' else 'not null' end, case when ' ' 
is null then 'is null' else 'not null' end
from tchar
order by rnum
{code}
Row 0 is a NULL
Row 1 was loaded with a zero length string ''
Row 2 was loaded with a single space ' '
{code}
rnum    tchar.cchar     _c2     _c3     _c4
0       <null>  not space       is null not null
1                                               not space       not null        
not null
2                                               not space       not null        
not null
3       BB                                      not space       not null        
not null
4       EE                                      not space       not null        
not null
5       FF                                      not space       not null        
not null
{code}
Explicitly type cast the literal which many  SQL developers would not expect 
need to do.
{code}
select rnum, tchar.cchar, 
case tchar.cchar when cast(' ' as char(1)) then 'space' else 'not space' end, 
case when tchar.cchar is null then 'is null' else 'not null' end, case when 
cast( ' ' as char(1)) is null then 'is null' else 'not null' end
from tchar
order by rnum

rnum    tchar.cchar     _c2     _c3     _c4
0       <null>  not space       is null not null
1                                               space   not null        not null
2                                               space   not null        not null
3       BB                                      not space       not null        
not null
4       EE                                      not space       not null        
not null
5       FF                                      not space       not null        
not null


create table  if not exists T_TCHAR ( RNUM int , CCHAR char(32 )    )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
 STORED AS TEXTFILE  ;

0|\N
1|
2| 
3|BB
4|EE
5|FF


create table  if not exists TCHAR ( RNUM int , CCHAR char(32 )    )
 STORED AS orc  ;

insert overwrite table TCHAR select * from  T_TCHAR;
{code}


  was:
The following query should return 5 rows but Hive returns 3


select rnum, tchar.cchar from tchar where not (  tchar.cchar = ' ' or ( 
tchar.cchar is null and ' ' is null ))

Consider the following project of the base table


select rnum, tchar.cchar, 
case tchar.cchar when ' ' then 'space' else 'not space' end, 
case when tchar.cchar is null then 'is null' else 'not null' end, case when ' ' 
is null then 'is null' else 'not null' end
from tchar
order by rnum

Row 0 is a NULL
Row 1 was loaded with a zero length string ''
Row 2 was loaded with a single space ' '

rnum    tchar.cchar     _c2     _c3     _c4
0       <null>  not space       is null not null
1                                               not space       not null        
not null
2                                               not space       not null        
not null
3       BB                                      not space       not null        
not null
4       EE                                      not space       not null        
not null
5       FF                                      not space       not null        
not null

Explicitly type cast the literal which many  SQL developers would not expect 
need to do.

select rnum, tchar.cchar, 
case tchar.cchar when cast(' ' as char(1)) then 'space' else 'not space' end, 
case when tchar.cchar is null then 'is null' else 'not null' end, case when 
cast( ' ' as char(1)) is null then 'is null' else 'not null' end
from tchar
order by rnum

rnum    tchar.cchar     _c2     _c3     _c4
0       <null>  not space       is null not null
1                                               space   not null        not null
2                                               space   not null        not null
3       BB                                      not space       not null        
not null
4       EE                                      not space       not null        
not null
5       FF                                      not space       not null        
not null


create table  if not exists T_TCHAR ( RNUM int , CCHAR char(32 )    )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
 STORED AS TEXTFILE  ;

0|\N
1|
2| 
3|BB
4|EE
5|FF


create table  if not exists TCHAR ( RNUM int , CCHAR char(32 )    )
 STORED AS orc  ;

insert overwrite table TCHAR select * from  T_TCHAR;



> predicate evaluation of character fields with spaces and literals with spaces 
> returns unexpected result
> -------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-9745
>                 URL: https://issues.apache.org/jira/browse/HIVE-9745
>             Project: Hive
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 0.14.0
>            Reporter: N Campbell
>
> The following query should return 5 rows but Hive returns 3
> {code}
> select rnum, tchar.cchar from tchar where not (  tchar.cchar = ' ' or ( 
> tchar.cchar is null and ' ' is null ))
> {code}
> Consider the following project of the base table
> {code}
> select rnum, tchar.cchar, 
> case tchar.cchar when ' ' then 'space' else 'not space' end, 
> case when tchar.cchar is null then 'is null' else 'not null' end, case when ' 
> ' is null then 'is null' else 'not null' end
> from tchar
> order by rnum
> {code}
> Row 0 is a NULL
> Row 1 was loaded with a zero length string ''
> Row 2 was loaded with a single space ' '
> {code}
> rnum  tchar.cchar     _c2     _c3     _c4
> 0     <null>  not space       is null not null
> 1                                             not space       not null        
> not null
> 2                                             not space       not null        
> not null
> 3     BB                                      not space       not null        
> not null
> 4     EE                                      not space       not null        
> not null
> 5     FF                                      not space       not null        
> not null
> {code}
> Explicitly type cast the literal which many  SQL developers would not expect 
> need to do.
> {code}
> select rnum, tchar.cchar, 
> case tchar.cchar when cast(' ' as char(1)) then 'space' else 'not space' end, 
> case when tchar.cchar is null then 'is null' else 'not null' end, case when 
> cast( ' ' as char(1)) is null then 'is null' else 'not null' end
> from tchar
> order by rnum
> rnum  tchar.cchar     _c2     _c3     _c4
> 0     <null>  not space       is null not null
> 1                                             space   not null        not null
> 2                                             space   not null        not null
> 3     BB                                      not space       not null        
> not null
> 4     EE                                      not space       not null        
> not null
> 5     FF                                      not space       not null        
> not null
> create table  if not exists T_TCHAR ( RNUM int , CCHAR char(32 )    )
> ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
>  STORED AS TEXTFILE  ;
> 0|\N
> 1|
> 2| 
> 3|BB
> 4|EE
> 5|FF
> create table  if not exists TCHAR ( RNUM int , CCHAR char(32 )    )
>  STORED AS orc  ;
> insert overwrite table TCHAR select * from  T_TCHAR;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to