http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1.q b/ql/src/test/queries/clientpositive/groupby_sort_1.q index 2255ede..46ec0be 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1.q @@ -6,58 +6,58 @@ set hive.map.groupby.sorted=true; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n4(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n4; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n4 select key, val from T1_n4; -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n2(key int, cnt int); -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM T1_n4 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM T1_n4 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -CREATE TABLE outputTbl2(key1 int, key2 string, cnt int); +CREATE TABLE outputTbl2_n0(key1 int, key2 string, cnt int); -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n0 +SELECT key, val, count(1) FROM T1_n4 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n0 +SELECT key, val, count(1) FROM T1_n4 GROUP BY key, val; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n0; -- It should work for sub-queries EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n4) subq1 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n4) subq1 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- It should work for sub-queries with column aliases EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n4) subq1 GROUP BY k; -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n4) subq1 GROUP BY k; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; CREATE TABLE outputTbl3(key1 int, key2 int, cnt int); @@ -65,10 +65,10 @@ CREATE TABLE outputTbl3(key1 int, key2 int, cnt int); -- by a match to the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +SELECT 1, key, count(1) FROM T1_n4 GROUP BY 1, key; INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +SELECT 1, key, count(1) FROM T1_n4 GROUP BY 1, key; SELECT * FROM outputTbl3; @@ -77,20 +77,20 @@ CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int); -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +SELECT key, 1, val, count(1) FROM T1_n4 GROUP BY key, 1, val; INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +SELECT key, 1, val, count(1) FROM T1_n4 GROUP BY key, 1, val; SELECT * FROM outputTbl4; -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +SELECT key, key + 1, count(1) FROM T1_n4 GROUP BY key, key + 1; INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +SELECT key, key + 1, count(1) FROM T1_n4 GROUP BY key, key + 1; SELECT * FROM outputTbl3; @@ -99,104 +99,104 @@ SELECT * FROM outputTbl3; -- group by followed by another group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq1 group by key + key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq1 group by key + key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- group by followed by a union EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n4 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n4 GROUP BY key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n4 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n4 GROUP BY key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n4 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT key + key as key, count(1) FROM T1_n4 GROUP BY key + key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key +SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT key + key as key, count(1) as cnt FROM T1_n4 GROUP BY key + key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- group by followed by a join EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq2 ON subq1.key = subq2.key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n2 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n4 GROUP BY key) subq2 ON subq1.key = subq2.key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) FROM T1_n4 GROUP BY key) subq1 JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 +(SELECT key, val, count(1) FROM T1_n4 GROUP BY key, val) subq2 ON subq1.key = subq2.key; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n3(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1; +INSERT OVERWRITE TABLE T2_n3 select key, val from T1_n4; -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM T2_n3 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n2 +SELECT key, count(1) FROM T2_n3 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n2; -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +SELECT key, 1, val, count(1) FROM T2_n3 GROUP BY key, 1, val; INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +SELECT key, 1, val, count(1) FROM T2_n3 GROUP BY key, 1, val; SELECT * FROM outputTbl4; @@ -206,10 +206,10 @@ CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int); -- sorted keys followed by anything EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +SELECT key, 1, val, 2, count(1) FROM T2_n3 GROUP BY key, 1, val, 2; INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +SELECT key, 1, val, 2, count(1) FROM T2_n3 GROUP BY key, 1, val, 2; SELECT * FROM outputTbl5; @@ -217,12 +217,12 @@ SELECT * FROM outputTbl5; EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n3)subq group by key, constant, val; INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n3)subq group by key, constant, val; SELECT * FROM outputTbl4; @@ -233,7 +233,7 @@ INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n3)subq )subq2 group by key, constant3, val; @@ -241,7 +241,7 @@ INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n3)subq )subq2 group by key, constant3, val; @@ -251,33 +251,33 @@ set hive.map.aggr=true; set hive.multigroupby.singlereducer=false; set mapred.reduce.tasks=31; -CREATE TABLE DEST1(key INT, cnt INT); -CREATE TABLE DEST2(key INT, val STRING, cnt INT); +CREATE TABLE DEST1_n7(key INT, cnt INT); +CREATE TABLE DEST2_n1(key INT, val STRING, cnt INT); SET hive.exec.compress.intermediate=true; SET hive.exec.compress.output=true; EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n3 +INSERT OVERWRITE TABLE DEST1_n7 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n1 SELECT key, val, count(1) GROUP BY key, val; -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n3 +INSERT OVERWRITE TABLE DEST1_n7 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n1 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n7; +select * from DEST2_n1; -- multi-table insert with a sub-query EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n3 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n7 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n1 SELECT key, val, count(1) GROUP BY key, val; -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n3 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n7 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n1 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n7; +select * from DEST2_n1;
http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_10.q b/ql/src/test/queries/clientpositive/groupby_sort_10.q index daf2db9..626af88 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_10.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_10.q @@ -5,24 +5,24 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CREATE TABLE T1_n149(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +INSERT OVERWRITE TABLE T1_n149 PARTITION (ds='1') SELECT * from src where key = 0 or key = 11; -- The plan is converted to a map-side plan -EXPLAIN select distinct key from T1; -select distinct key from T1; +EXPLAIN select distinct key from T1_n149; +select distinct key from T1_n149; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='2') +INSERT OVERWRITE TABLE T1_n149 PARTITION (ds='2') SELECT * from src where key = 0 or key = 11; -- The plan is not converted to a map-side, since although the sorting columns and grouping -- columns match, the user is querying multiple input partitions -EXPLAIN select distinct key from T1; -select distinct key from T1; +EXPLAIN select distinct key from T1_n149; +select distinct key from T1_n149; -DROP TABLE T1; +DROP TABLE T1_n149; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_11.q b/ql/src/test/queries/clientpositive/groupby_sort_11.q index 015d91b..b8fd66a 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_11.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_11.q @@ -6,38 +6,38 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CREATE TABLE T1_n18(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +INSERT OVERWRITE TABLE T1_n18 PARTITION (ds='1') SELECT * from src where key < 10; -- The plan is optimized to perform partial aggregation on the mapper -EXPLAIN select count(distinct key) from T1; -select count(distinct key) from T1; +EXPLAIN select count(distinct key) from T1_n18; +select count(distinct key) from T1_n18; -- The plan is optimized to perform partial aggregation on the mapper -EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from T1; -select count(distinct key), count(1), count(key), sum(distinct key) from T1; +EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from T1_n18; +select count(distinct key), count(1), count(key), sum(distinct key) from T1_n18; -- The plan is not changed in the presence of a grouping key -EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; -select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; +EXPLAIN select count(distinct key), count(1), count(key), sum(distinct key) from T1_n18 group by key; +select count(distinct key), count(1), count(key), sum(distinct key) from T1_n18 group by key; -- The plan is not changed in the presence of a grouping key -EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; -select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key; +EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1_n18 group by key; +select key, count(distinct key), count(1), count(key), sum(distinct key) from T1_n18 group by key; -- The plan is not changed in the presence of a grouping key expression -EXPLAIN select count(distinct key+key) from T1; -select count(distinct key+key) from T1; +EXPLAIN select count(distinct key+key) from T1_n18; +select count(distinct key+key) from T1_n18; -EXPLAIN select count(distinct 1) from T1; -select count(distinct 1) from T1; +EXPLAIN select count(distinct 1) from T1_n18; +select count(distinct 1) from T1_n18; set hive.map.aggr=false; -- no plan change if map aggr is turned off -EXPLAIN select count(distinct key) from T1; -select count(distinct key) from T1; +EXPLAIN select count(distinct key) from T1_n18; +select count(distinct key) from T1_n18; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_1_23.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q index 91b693a..b27aec4 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q @@ -5,279 +5,279 @@ set hive.map.groupby.sorted=true; -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n80(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n80; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n80 select key, val from T1_n80; -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n18(key int, cnt int); -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM T1_n80 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM T1_n80 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -CREATE TABLE outputTbl2(key1 int, key2 string, cnt int); +CREATE TABLE outputTbl2_n5(key1 int, key2 string, cnt int); -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n5 +SELECT key, val, count(1) FROM T1_n80 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n5 +SELECT key, val, count(1) FROM T1_n80 GROUP BY key, val; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n5; -- It should work for sub-queries EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n80) subq1 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n80) subq1 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- It should work for sub-queries with column aliases EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n80) subq1 GROUP BY k; -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n80) subq1 GROUP BY k; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -CREATE TABLE outputTbl3(key1 int, key2 int, cnt int); +CREATE TABLE outputTbl3_n2(key1 int, key2 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n2 +SELECT 1, key, count(1) FROM T1_n80 GROUP BY 1, key; -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n2 +SELECT 1, key, count(1) FROM T1_n80 GROUP BY 1, key; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n2; -CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int); +CREATE TABLE outputTbl4_n2(key1 int, key2 int, key3 string, cnt int); -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n2 +SELECT key, 1, val, count(1) FROM T1_n80 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n2 +SELECT key, 1, val, count(1) FROM T1_n80 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n2; -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n2 +SELECT key, key + 1, count(1) FROM T1_n80 GROUP BY key, key + 1; -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n2 +SELECT key, key + 1, count(1) FROM T1_n80 GROUP BY key, key + 1; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n2; -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq1 group by key + key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq1 group by key + key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- group by followed by a union EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n80 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n80 GROUP BY key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n80 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n80 GROUP BY key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n80 GROUP BY key UNION ALL -SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1_n80 GROUP BY key + key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key +SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key UNION ALL -SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1_n80 GROUP BY key + key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- group by followed by a join EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq2 ON subq1.key = subq2.key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n18 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n80 GROUP BY key) subq2 ON subq1.key = subq2.key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) FROM T1_n80 GROUP BY key) subq1 JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 +(SELECT key, val, count(1) FROM T1_n80 GROUP BY key, val) subq2 ON subq1.key = subq2.key; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n49(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1; +INSERT OVERWRITE TABLE T2_n49 select key, val from T1_n80; -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM T2_n49 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n18 +SELECT key, count(1) FROM T2_n49 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n18; -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n2 +SELECT key, 1, val, count(1) FROM T2_n49 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n2 +SELECT key, 1, val, count(1) FROM T2_n49 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n2; -CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int); +CREATE TABLE outputTbl5_n2(key1 int, key2 int, key3 string, key4 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n2 +SELECT key, 1, val, 2, count(1) FROM T2_n49 GROUP BY key, 1, val, 2; -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n2 +SELECT key, 1, val, 2, count(1) FROM T2_n49 GROUP BY key, 1, val, 2; -SELECT * FROM outputTbl5 +SELECT * FROM outputTbl5_n2 ORDER BY key1, key2, key3, key4; -- contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n2 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n49)subq group by key, constant, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n2 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n49)subq group by key, constant, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n2; -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n2 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n49)subq )subq2 group by key, constant3, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n2 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n49)subq )subq2 group by key, constant3, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n2; set hive.map.aggr=true; set hive.multigroupby.singlereducer=false; set mapred.reduce.tasks=31; -CREATE TABLE DEST1(key INT, cnt INT); -CREATE TABLE DEST2(key INT, val STRING, cnt INT); +CREATE TABLE DEST1_n80(key INT, cnt INT); +CREATE TABLE DEST2_n18(key INT, val STRING, cnt INT); SET hive.exec.compress.intermediate=true; SET hive.exec.compress.output=true; EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n49 +INSERT OVERWRITE TABLE DEST1_n80 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n18 SELECT key, val, count(1) GROUP BY key, val; -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n49 +INSERT OVERWRITE TABLE DEST1_n80 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n18 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n80; +select * from DEST2_n18; -- multi-table insert with a sub-query EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n49 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n80 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n18 SELECT key, val, count(1) GROUP BY key, val; -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n49 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n80 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n18 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n80; +select * from DEST2_n18; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_2.q b/ql/src/test/queries/clientpositive/groupby_sort_2.q index 216a9b9..66905e1 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_2.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_2.q @@ -4,23 +4,23 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n51(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (val) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n51; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n51 select key, val from T1_n51; -CREATE TABLE outputTbl1(val string, cnt int); +CREATE TABLE outputTbl1_n10(val string, cnt int); -- The plan should not be converted to a map-side group by even though the group by key -- matches the sorted key. EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT val, count(1) FROM T1 GROUP BY val; +INSERT OVERWRITE TABLE outputTbl1_n10 +SELECT val, count(1) FROM T1_n51 GROUP BY val; -INSERT OVERWRITE TABLE outputTbl1 -SELECT val, count(1) FROM T1 GROUP BY val; +INSERT OVERWRITE TABLE outputTbl1_n10 +SELECT val, count(1) FROM T1_n51 GROUP BY val; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n10; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_3.q b/ql/src/test/queries/clientpositive/groupby_sort_3.q index 996f44d..e059e75 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_3.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_3.q @@ -4,34 +4,34 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n89(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n89; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n89 select key, val from T1_n89; -CREATE TABLE outputTbl1(key string, val string, cnt int); +CREATE TABLE outputTbl1_n20(key string, val string, cnt int); -- The plan should be converted to a map-side group by EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n20 +SELECT key, val, count(1) FROM T1_n89 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n20 +SELECT key, val, count(1) FROM T1_n89 GROUP BY key, val; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n20; -CREATE TABLE outputTbl2(key string, cnt int); +CREATE TABLE outputTbl2_n7(key string, cnt int); -- The plan should be converted to a map-side group by EXPLAIN -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl2_n7 +SELECT key, count(1) FROM T1_n89 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl2_n7 +SELECT key, count(1) FROM T1_n89 GROUP BY key; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n7; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_4.q b/ql/src/test/queries/clientpositive/groupby_sort_4.q index df998d0..6c21138 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_4.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_4.q @@ -4,36 +4,36 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n133(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n133; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n133 select key, val from T1_n133; -CREATE TABLE outputTbl1(key STRING, cnt INT); +CREATE TABLE outputTbl1_n31(key STRING, cnt INT); -- The plan should not be converted to a map-side group by. -- However, there should no hash-based aggregation on the map-side EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n31 +SELECT key, count(1) FROM T1_n133 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n31 +SELECT key, count(1) FROM T1_n133 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n31; -CREATE TABLE outputTbl2(key STRING, val STRING, cnt INT); +CREATE TABLE outputTbl2_n8(key STRING, val STRING, cnt INT); -- The plan should not be converted to a map-side group by. -- Hash-based aggregations should be performed on the map-side EXPLAIN -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n8 +SELECT key, val, count(1) FROM T1_n133 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n8 +SELECT key, val, count(1) FROM T1_n133 GROUP BY key, val; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n8; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_5.q b/ql/src/test/queries/clientpositive/groupby_sort_5.q index 1fdd404..3d85d14 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_5.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_5.q @@ -5,73 +5,73 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n6(key STRING, val STRING) CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n6; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n6 select key, val from T1_n6; -CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT); +CREATE TABLE outputTbl1_n5(key STRING, val STRING, cnt INT); -- The plan should be converted to a map-side group by, since the -- sorting columns and grouping columns match, and all the bucketing columns -- are part of sorting columns EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n5 +SELECT key, val, count(1) FROM T1_n6 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n5 +SELECT key, val, count(1) FROM T1_n6 GROUP BY key, val; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n5; -DROP TABLE T1; +DROP TABLE T1_n6; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n6(key STRING, val STRING) CLUSTERED BY (val, key) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n6; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n6 select key, val from T1_n6; -- The plan should be converted to a map-side group by, since the -- sorting columns and grouping columns match, and all the bucketing columns -- are part of sorting columns EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n5 +SELECT key, val, count(1) FROM T1_n6 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n5 +SELECT key, val, count(1) FROM T1_n6 GROUP BY key, val; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n5; -DROP TABLE T1; +DROP TABLE T1_n6; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n6(key STRING, val STRING) CLUSTERED BY (val) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n6; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n6 select key, val from T1_n6; -CREATE TABLE outputTbl2(key STRING, cnt INT); +CREATE TABLE outputTbl2_n1(key STRING, cnt INT); -- The plan should not be converted to a map-side group by, since although the -- sorting columns and grouping columns match, all the bucketing columns -- are not part of sorting columns. However, no hash map aggregation is required -- on the mapside. EXPLAIN -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl2_n1 +SELECT key, count(1) FROM T1_n6 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl2_n1 +SELECT key, count(1) FROM T1_n6 GROUP BY key; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n1; -DROP TABLE T1; +DROP TABLE T1_n6; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_6.q b/ql/src/test/queries/clientpositive/groupby_sort_6.q index 02fec42..3aa0636 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_6.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_6.q @@ -4,39 +4,39 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string); +CREATE TABLE T1_n61(key STRING, val STRING) PARTITIONED BY (ds string); -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n15(key int, cnt int); -- The plan should not be converted to a map-side group since no partition is being accessed EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n15; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='2'); +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n61 PARTITION (ds='2'); -- The plan should not be converted to a map-side group since no partition is being accessed EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '1' GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n15; -- The plan should not be converted to a map-side group since the partition being accessed -- is neither bucketed not sorted EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '2' GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n15 +SELECT key, count(1) FROM T1_n61 where ds = '2' GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n15; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_7.q b/ql/src/test/queries/clientpositive/groupby_sort_7.q index f0aea35..4dc34eb 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_7.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_7.q @@ -5,26 +5,26 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CREATE TABLE T1_n104(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1'); +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n104 PARTITION (ds='1'); -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'; +INSERT OVERWRITE TABLE T1_n104 PARTITION (ds='1') select key, val from T1_n104 where ds = '1'; -CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT); +CREATE TABLE outputTbl1_n26(key STRING, val STRING, cnt INT); -- The plan should be converted to a map-side group by, since the -- sorting columns and grouping columns match, and all the bucketing columns -- are part of sorting columns EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n26 +SELECT key, val, count(1) FROM T1_n104 where ds = '1' GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl1_n26 +SELECT key, val, count(1) FROM T1_n104 where ds = '1' GROUP BY key, val; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n26; -DROP TABLE T1; +DROP TABLE T1_n104; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q index 2c20b29..11568e9 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_8.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q @@ -5,19 +5,19 @@ set hive.map.groupby.sorted=true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CREATE TABLE T1_n45(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1'); +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n45 PARTITION (ds='1'); -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'; +INSERT OVERWRITE TABLE T1_n45 PARTITION (ds='1') select key, val from T1_n45 where ds = '1'; -- The plan is not converted to a map-side, since although the sorting columns and grouping -- columns match, the user is issueing a distinct. -- However, after HIVE-4310, partial aggregation is performed on the mapper EXPLAIN -select count(distinct key) from T1; -select count(distinct key) from T1; +select count(distinct key) from T1_n45; +select count(distinct key) from T1_n45; -DROP TABLE T1; +DROP TABLE T1_n45; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_9.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_9.q b/ql/src/test/queries/clientpositive/groupby_sort_9.q index abf1a8a..0f9be6a 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_9.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_9.q @@ -2,19 +2,19 @@ set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; -CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CREATE TABLE T1_n96(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1'); +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n96 PARTITION (ds='1'); -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'; -INSERT OVERWRITE TABLE T1 PARTITION (ds='2') select key, val from T1 where ds = '1'; +INSERT OVERWRITE TABLE T1_n96 PARTITION (ds='1') select key, val from T1_n96 where ds = '1'; +INSERT OVERWRITE TABLE T1_n96 PARTITION (ds='2') select key, val from T1_n96 where ds = '1'; -- The plan is not converted to a map-side, since although the sorting columns and grouping -- columns match, the user is querying multiple input partitions EXPLAIN -select key, count(1) from T1 group by key; -select key, count(1) from T1 group by key; +select key, count(1) from T1_n96 group by key; +select key, count(1) from T1_n96 group by key; -DROP TABLE T1; +DROP TABLE T1_n96; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q index 27cce8d..7836c4d 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q @@ -7,279 +7,279 @@ set hive.groupby.skewindata=true; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n35(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n35; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n35 select key, val from T1_n35; -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n8(key int, cnt int); -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM T1_n35 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM T1_n35 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -CREATE TABLE outputTbl2(key1 int, key2 string, cnt int); +CREATE TABLE outputTbl2_n2(key1 int, key2 string, cnt int); -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n2 +SELECT key, val, count(1) FROM T1_n35 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n2 +SELECT key, val, count(1) FROM T1_n35 GROUP BY key, val; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n2; -- It should work for sub-queries EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n35) subq1 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n35) subq1 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- It should work for sub-queries with column aliases EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n35) subq1 GROUP BY k; -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n35) subq1 GROUP BY k; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -CREATE TABLE outputTbl3(key1 int, key2 int, cnt int); +CREATE TABLE outputTbl3_n0(key1 int, key2 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n0 +SELECT 1, key, count(1) FROM T1_n35 GROUP BY 1, key; -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n0 +SELECT 1, key, count(1) FROM T1_n35 GROUP BY 1, key; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n0; -CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int); +CREATE TABLE outputTbl4_n0(key1 int, key2 int, key3 string, cnt int); -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n0 +SELECT key, 1, val, count(1) FROM T1_n35 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n0 +SELECT key, 1, val, count(1) FROM T1_n35 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n0; -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n0 +SELECT key, key + 1, count(1) FROM T1_n35 GROUP BY key, key + 1; -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n0 +SELECT key, key + 1, count(1) FROM T1_n35 GROUP BY key, key + 1; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n0; -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq1 group by key + key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT key + key, sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq1 group by key + key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- group by followed by a union EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n35 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n35 GROUP BY key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n35 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n35 GROUP BY key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n35 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT key + key as key, count(1) FROM T1_n35 GROUP BY key + key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key +SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT key + key as key, count(1) as cnt FROM T1_n35 GROUP BY key + key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- group by followed by a join EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq2 ON subq1.key = subq2.key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n8 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n35 GROUP BY key) subq2 ON subq1.key = subq2.key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) FROM T1_n35 GROUP BY key) subq1 JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 +(SELECT key, val, count(1) FROM T1_n35 GROUP BY key, val) subq2 ON subq1.key = subq2.key; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n23(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1; +INSERT OVERWRITE TABLE T2_n23 select key, val from T1_n35; -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM T2_n23 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n8 +SELECT key, count(1) FROM T2_n23 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n8; -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n0 +SELECT key, 1, val, count(1) FROM T2_n23 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n0 +SELECT key, 1, val, count(1) FROM T2_n23 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n0; -CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int); +CREATE TABLE outputTbl5_n0(key1 int, key2 int, key3 string, key4 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n0 +SELECT key, 1, val, 2, count(1) FROM T2_n23 GROUP BY key, 1, val, 2; -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n0 +SELECT key, 1, val, 2, count(1) FROM T2_n23 GROUP BY key, 1, val, 2; -SELECT * FROM outputTbl5 +SELECT * FROM outputTbl5_n0 ORDER BY key1, key2, key3, key4; -- contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n0 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n23)subq group by key, constant, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n0 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n23)subq group by key, constant, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n0; -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n0 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n23)subq )subq2 group by key, constant3, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n0 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n23)subq )subq2 group by key, constant3, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n0; set hive.map.aggr=true; set hive.multigroupby.singlereducer=false; set mapred.reduce.tasks=31; -CREATE TABLE DEST1(key INT, cnt INT); -CREATE TABLE DEST2(key INT, val STRING, cnt INT); +CREATE TABLE DEST1_n30(key INT, cnt INT); +CREATE TABLE DEST2_n6(key INT, val STRING, cnt INT); SET hive.exec.compress.intermediate=true; SET hive.exec.compress.output=true; EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n23 +INSERT OVERWRITE TABLE DEST1_n30 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n6 SELECT key, val, count(1) GROUP BY key, val; -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n23 +INSERT OVERWRITE TABLE DEST1_n30 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n6 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n30; +select * from DEST2_n6; -- multi-table insert with a sub-query EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n23 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n30 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n6 SELECT key, val, count(1) GROUP BY key, val; -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n23 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n30 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n6 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n30; +select * from DEST2_n6; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q index 51e08a6..8919f3b 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q @@ -6,279 +6,279 @@ set hive.groupby.skewindata=true; -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n56(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n56; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n56 select key, val from T1_n56; -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n13(key int, cnt int); -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM T1_n56 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM T1_n56 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -CREATE TABLE outputTbl2(key1 int, key2 string, cnt int); +CREATE TABLE outputTbl2_n3(key1 int, key2 string, cnt int); -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n3 +SELECT key, val, count(1) FROM T1_n56 GROUP BY key, val; -INSERT OVERWRITE TABLE outputTbl2 -SELECT key, val, count(1) FROM T1 GROUP BY key, val; +INSERT OVERWRITE TABLE outputTbl2_n3 +SELECT key, val, count(1) FROM T1_n56 GROUP BY key, val; -SELECT * FROM outputTbl2; +SELECT * FROM outputTbl2_n3; -- It should work for sub-queries EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n56) subq1 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM (SELECT key, val FROM T1_n56) subq1 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- It should work for sub-queries with column aliases EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n56) subq1 GROUP BY k; -INSERT OVERWRITE TABLE outputTbl1 -SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1_n56) subq1 GROUP BY k; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -CREATE TABLE outputTbl3(key1 int, key2 int, cnt int); +CREATE TABLE outputTbl3_n1(key1 int, key2 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n1 +SELECT 1, key, count(1) FROM T1_n56 GROUP BY 1, key; -INSERT OVERWRITE TABLE outputTbl3 -SELECT 1, key, count(1) FROM T1 GROUP BY 1, key; +INSERT OVERWRITE TABLE outputTbl3_n1 +SELECT 1, key, count(1) FROM T1_n56 GROUP BY 1, key; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n1; -CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int); +CREATE TABLE outputTbl4_n1(key1 int, key2 int, key3 string, cnt int); -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n1 +SELECT key, 1, val, count(1) FROM T1_n56 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n1 +SELECT key, 1, val, count(1) FROM T1_n56 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n1; -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n1 +SELECT key, key + 1, count(1) FROM T1_n56 GROUP BY key, key + 1; -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1; +INSERT OVERWRITE TABLE outputTbl3_n1 +SELECT key, key + 1, count(1) FROM T1_n56 GROUP BY key, key + 1; -SELECT * FROM outputTbl3; +SELECT * FROM outputTbl3_n1; -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT cast(key + key as string), sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq1 group by key + key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT cast(key + key as string), sum(cnt) from -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq1 group by key + key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- group by followed by a union EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n56 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n56 GROUP BY key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n56 GROUP BY key UNION ALL -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n56 GROUP BY key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT * FROM ( -SELECT key, count(1) FROM T1 GROUP BY key +SELECT key, count(1) FROM T1_n56 GROUP BY key UNION ALL -SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1_n56 GROUP BY key + key ) subq1; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT * FROM ( -SELECT key, count(1) as cnt FROM T1 GROUP BY key +SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key UNION ALL -SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1_n56 GROUP BY key + key ) subq1; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- group by followed by a join EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq2 ON subq1.key = subq2.key; -INSERT OVERWRITE TABLE outputTbl1 +INSERT OVERWRITE TABLE outputTbl1_n13 SELECT subq1.key, subq1.cnt+subq2.cnt FROM -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq1 JOIN -(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 +(SELECT key, count(1) as cnt FROM T1_n56 GROUP BY key) subq2 ON subq1.key = subq2.key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM -(SELECT key, count(1) FROM T1 GROUP BY key) subq1 +(SELECT key, count(1) FROM T1_n56 GROUP BY key) subq1 JOIN -(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 +(SELECT key, val, count(1) FROM T1_n56 GROUP BY key, val) subq2 ON subq1.key = subq2.key; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n34(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T2 select key, val from T1; +INSERT OVERWRITE TABLE T2_n34 select key, val from T1_n56; -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM T2_n34 GROUP BY key; -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T2 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n13 +SELECT key, count(1) FROM T2_n34 GROUP BY key; -SELECT * FROM outputTbl1; +SELECT * FROM outputTbl1_n13; -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n1 +SELECT key, 1, val, count(1) FROM T2_n34 GROUP BY key, 1, val; -INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val; +INSERT OVERWRITE TABLE outputTbl4_n1 +SELECT key, 1, val, count(1) FROM T2_n34 GROUP BY key, 1, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n1; -CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int); +CREATE TABLE outputTbl5_n1(key1 int, key2 int, key3 string, key4 int, cnt int); -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n1 +SELECT key, 1, val, 2, count(1) FROM T2_n34 GROUP BY key, 1, val, 2; -INSERT OVERWRITE TABLE outputTbl5 -SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2; +INSERT OVERWRITE TABLE outputTbl5_n1 +SELECT key, 1, val, 2, count(1) FROM T2_n34 GROUP BY key, 1, val, 2; -SELECT * FROM outputTbl5 +SELECT * FROM outputTbl5_n1 ORDER BY key1, key2, key3, key4; -- contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n1 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n34)subq group by key, constant, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n1 SELECT key, constant, val, count(1) from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n34)subq group by key, constant, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n1; -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n1 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n34)subq )subq2 group by key, constant3, val; -INSERT OVERWRITE TABLE outputTbl4 +INSERT OVERWRITE TABLE outputTbl4_n1 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from -(SELECT key, 1 as constant, val from T2)subq +(SELECT key, 1 as constant, val from T2_n34)subq )subq2 group by key, constant3, val; -SELECT * FROM outputTbl4; +SELECT * FROM outputTbl4_n1; set hive.map.aggr=true; set hive.multigroupby.singlereducer=false; set mapred.reduce.tasks=31; -CREATE TABLE DEST1(key INT, cnt INT); -CREATE TABLE DEST2(key INT, val STRING, cnt INT); +CREATE TABLE DEST1_n57(key INT, cnt INT); +CREATE TABLE DEST2_n12(key INT, val STRING, cnt INT); SET hive.exec.compress.intermediate=true; SET hive.exec.compress.output=true; EXPLAIN -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n34 +INSERT OVERWRITE TABLE DEST1_n57 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n12 SELECT key, val, count(1) GROUP BY key, val; -FROM T2 -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM T2_n34 +INSERT OVERWRITE TABLE DEST1_n57 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n12 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n57; +select * from DEST2_n12; -- multi-table insert with a sub-query EXPLAIN -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n34 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n57 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n12 SELECT key, val, count(1) GROUP BY key, val; -FROM (select key, val from T2 where key = 8) x -INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key -INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val; +FROM (select key, val from T2_n34 where key = 8) x +INSERT OVERWRITE TABLE DEST1_n57 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE DEST2_n12 SELECT key, val, count(1) GROUP BY key, val; -select * from DEST1; -select * from DEST2; +select * from DEST1_n57; +select * from DEST2_n12; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/groupby_sort_test_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q index 7e847a2..b1f4f1d 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q @@ -2,18 +2,18 @@ set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n164(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n164; -- perform an insert to make sure there are 2 files -INSERT OVERWRITE TABLE T1 select key, val from T1; +INSERT OVERWRITE TABLE T1_n164 select key, val from T1_n164; -CREATE TABLE outputTbl1(key int, cnt int); +CREATE TABLE outputTbl1_n35(key int, cnt int); -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key. However, in test mode, the group by wont be converted. EXPLAIN -INSERT OVERWRITE TABLE outputTbl1 -SELECT key, count(1) FROM T1 GROUP BY key; +INSERT OVERWRITE TABLE outputTbl1_n35 +SELECT key, count(1) FROM T1_n164 GROUP BY key;