Naresh P R created HIVE-27876:
---------------------------------

             Summary: Incorrect query results on tables with ClusterBy & SortBy
                 Key: HIVE-27876
                 URL: https://issues.apache.org/jira/browse/HIVE-27876
             Project: Hive
          Issue Type: Bug
            Reporter: Naresh P R


Repro:

 
{code:java}
create external table test_bucket(age int, name string, dept string) clustered 
by (age, name) sorted by (age asc, name asc) into 2 buckets stored as orc;
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');

//empty wrong results with default CDP configs
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+------+-------+------+
| age  | name  | _c2  |
+------+-------+------+
+------+-------+------+

// Workaround
set hive.map.aggr=false;
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+------+--------+------+
| age  |  name  | _c2  |
+------+--------+------+
| 1    | user1  | 2    |
| 2    | user2  | 2    |
+------+--------+------+ {code}
 

 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to