http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q index cc43b5b..359560a 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q @@ -4,15 +4,15 @@ set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; -CREATE TABLE tab1(key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; -CREATE TABLE tab2 (key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab1_n0(key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab2_n0 (key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; -- HIVE-18721 : Make sure only certain buckets have data. -insert into tab1 VALUES (1,"abc"),(4,"def"),(8, "ghi"); -insert into tab2 VALUES (1, "abc"), (5, "aa"); +insert into tab1_n0 VALUES (1,"abc"),(4,"def"),(8, "ghi"); +insert into tab2_n0 VALUES (1, "abc"), (5, "aa"); set hive.convert.join.bucket.mapjoin.tez = true; -explain select * from tab1, tab2 where tab1.key1 = tab2.key1; -select * from tab1, tab2 where tab1.key1 = tab2.key1; +explain select * from tab1_n0, tab2_n0 where tab1_n0.key1 = tab2_n0.key1; +select * from tab1_n0, tab2_n0 where tab1_n0.key1 = tab2_n0.key1;
http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucket_num_reducers2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_num_reducers2.q b/ql/src/test/queries/clientpositive/bucket_num_reducers2.q index 6428f09..6e5bc39 100644 --- a/ql/src/test/queries/clientpositive/bucket_num_reducers2.q +++ b/ql/src/test/queries/clientpositive/bucket_num_reducers2.q @@ -4,11 +4,11 @@ set hive.exec.reducers.max = 2; -- This test sets the maximum number of reduce tasks to 2 for overwriting a -- table with 3 buckets, and uses a post-hook to confirm that 1 reducer was used -CREATE TABLE test_table(key int, value string) CLUSTERED BY (key) INTO 3 BUCKETS; +CREATE TABLE test_table_n4(key int, value string) CLUSTERED BY (key) INTO 3 BUCKETS; -explain extended insert overwrite table test_table +explain extended insert overwrite table test_table_n4 select * from src; -insert overwrite table test_table +insert overwrite table test_table_n4 select * from src; -drop table test_table; +drop table test_table_n4; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_1.q b/ql/src/test/queries/clientpositive/bucketcontext_1.q index 876bc0f..4658a65 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_1.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_1.q @@ -2,25 +2,25 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small 1 part, 2 bucket & big 2 part, 4 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n14 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n14 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n14 partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n14 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n14 partition(ds='2008-04-09'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n14 a JOIN bucket_big_n14 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n14 a JOIN bucket_big_n14 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n14 a JOIN bucket_big_n14 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n14 a JOIN bucket_big_n14 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_2.q b/ql/src/test/queries/clientpositive/bucketcontext_2.q index 53518c4..e409dae 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_2.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_2.q @@ -2,23 +2,23 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small 1 part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n8 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n8 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n8 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small_n8 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small_n8 partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n8 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n8 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n8 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n8 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n8 partition(ds='2008-04-09'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n8 a JOIN bucket_big_n8 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n8 a JOIN bucket_big_n8 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n8 a JOIN bucket_big_n8 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n8 a JOIN bucket_big_n8 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_3.q b/ql/src/test/queries/clientpositive/bucketcontext_3.q index 5eadd1d..aaf212b 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_3.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_3.q @@ -2,23 +2,23 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small 2 part, 2 bucket & big 1 part, 4 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n4 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n4 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n4 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n4 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n4 partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n4 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n4 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n4 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n4 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n4 partition(ds='2008-04-08'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n4 a JOIN bucket_big_n4 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n4 a JOIN bucket_big_n4 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n4 a JOIN bucket_big_n4 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n4 a JOIN bucket_big_n4 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_5.q b/ql/src/test/queries/clientpositive/bucketcontext_5.q index 85acdd9..0687a1d 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_5.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_5.q @@ -1,20 +1,20 @@ set hive.strict.checks.bucketing=false; -- small no part, 4 bucket & big no part, 2 bucket -CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small; +CREATE TABLE bucket_small_n13 (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n13; +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n13; +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small_n13; +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small_n13; -CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big; -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big; +CREATE TABLE bucket_big_n13 (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n13; +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n13; set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n13 a JOIN bucket_big_n13 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n13 a JOIN bucket_big_n13 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n13 a JOIN bucket_big_n13 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n13 a JOIN bucket_big_n13 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_6.q b/ql/src/test/queries/clientpositive/bucketcontext_6.q index d7ab5e3..35fec2f 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_6.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_6.q @@ -2,23 +2,23 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small no part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small; -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small; +CREATE TABLE bucket_small_n7 (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n7; +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n7; +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small_n7; +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small_n7; -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n7 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n7 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n7 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n7 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n7 partition(ds='2008-04-09'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n7 a JOIN bucket_big_n7 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n7 a JOIN bucket_big_n7 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n7 a JOIN bucket_big_n7 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n7 a JOIN bucket_big_n7 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_7.q b/ql/src/test/queries/clientpositive/bucketcontext_7.q index 2682c7d..b17590c 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_7.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_7.q @@ -2,28 +2,28 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small 2 part, 4 bucket & big 2 part, 2 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n2 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_small_n2 partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n2 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n2 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n2 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n2 partition(ds='2008-04-09'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n2 a JOIN bucket_big_n2 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n2 a JOIN bucket_big_n2 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n2 a JOIN bucket_big_n2 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n2 a JOIN bucket_big_n2 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketcontext_8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketcontext_8.q b/ql/src/test/queries/clientpositive/bucketcontext_8.q index 60be000..225c215 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_8.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_8.q @@ -2,28 +2,28 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -- small 2 part, 2 bucket & big 2 part, 4 bucket -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n10 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n10 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n10 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n10 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n10 partition(ds='2008-04-09'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n10 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n10 partition(ds='2008-04-09'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n10 a JOIN bucket_big_n10 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n10 a JOIN bucket_big_n10 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; -select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n10 a JOIN bucket_big_n10 b ON a.key = b.key; +select /*+ MAPJOIN(a) */ count(*) FROM bucket_small_n10 a JOIN bucket_big_n10 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q b/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q index a87fa1a..bdc0090 100644 --- a/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q +++ b/ql/src/test/queries/clientpositive/bucketizedhiveinputformat.q @@ -2,37 +2,37 @@ set hive.compute.query.using.stats=false; set hive.mapred.mode=nonstrict; set mapred.max.split.size = 32000000; -CREATE TABLE T1(name STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n125(name STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1_n125; -CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE; +CREATE TABLE T2_n74(name STRING) STORED AS SEQUENCEFILE; -INSERT OVERWRITE TABLE T2 SELECT * FROM ( +INSERT OVERWRITE TABLE T2_n74 SELECT * FROM ( SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 + SELECT name, 'MMM' AS n FROM T1_n125) tmp1 + JOIN (SELECT 'MMM' AS n FROM T1_n125) tmp2 + JOIN (SELECT 'MMM' AS n FROM T1_n125) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000; -CREATE TABLE T3(name STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3; -LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3; +CREATE TABLE T3_n28(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3_n28; +LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3_n28; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.ShowMapredStatsHook; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -- 2 split by max.split.size -SELECT COUNT(1) FROM T2; +SELECT COUNT(1) FROM T2_n74; -- 1 split for two file -SELECT COUNT(1) FROM T3; +SELECT COUNT(1) FROM T3_n28; set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -- 1 split -SELECT COUNT(1) FROM T2; +SELECT COUNT(1) FROM T2_n74; -- 2 split for two file -SELECT COUNT(1) FROM T3; +SELECT COUNT(1) FROM T3_n28; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q b/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q index 5bcce90..871f959 100644 --- a/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q +++ b/ql/src/test/queries/clientpositive/bucketizedhiveinputformat_auto.q @@ -1,26 +1,26 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small partition(ds='2008-04-08'); +CREATE TABLE bucket_small_n16 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_small_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_small_n16 partition(ds='2008-04-08'); -CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-08'); +CREATE TABLE bucket_big_n16 (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-08'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big partition(ds='2008-04-09'); -load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000000_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000001_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000002_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-09'); +load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO TABLE bucket_big_n16 partition(ds='2008-04-09'); set hive.optimize.bucketmapjoin = true; -select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select /* + MAPJOIN(a) */ count(*) FROM bucket_small_n16 a JOIN bucket_big_n16 b ON a.key = b.key; set hive.optimize.bucketmapjoin.sortedmerge = true; -select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select /* + MAPJOIN(a) */ count(*) FROM bucket_small_n16 a JOIN bucket_big_n16 b ON a.key = b.key; set hive.input.format = org.apache.hadoop.hive.ql.io.HiveInputFormat; -select /* + MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select /* + MAPJOIN(a) */ count(*) FROM bucket_small_n16 a JOIN bucket_big_n16 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin1.q b/ql/src/test/queries/clientpositive/bucketmapjoin1.q index e04c120..39f0b9b 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin1.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin1.q @@ -2,110 +2,110 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_n1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part_n1 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part_2_n1 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; set hive.optimize.bucketmapjoin = true; -- empty partitions (HIVE-3205) explain extended select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n1 a join srcbucket_mapjoin_part_2_n1 b on a.key=b.key where b.ds="2008-04-08"; select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n1 a join srcbucket_mapjoin_part_2_n1 b on a.key=b.key where b.ds="2008-04-08"; explain extended select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n1 a join srcbucket_mapjoin_part_2_n1 b on a.key=b.key where b.ds="2008-04-08"; select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n1 a join srcbucket_mapjoin_part_2_n1 b on a.key=b.key where b.ds="2008-04-08"; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin; -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n1; +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_n1; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n1 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n1 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n1 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n1 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n1 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n1 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n0 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n0 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n0 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n0; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1_n0 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n0; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n0; +insert overwrite table bucketmapjoin_hash_result_2_n0 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n0; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n0 a left outer join bucketmapjoin_hash_result_2_n0 b on a.key = b.key; set hive.optimize.bucketmapjoin = true; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n0; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +insert overwrite table bucketmapjoin_hash_result_1_n0 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n0; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n0 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_n1 a join srcbucket_mapjoin_part_n1 b on a.key=b.key where b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n0; +insert overwrite table bucketmapjoin_hash_result_2_n0 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n0; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n0 a left outer join bucketmapjoin_hash_result_2_n0 b on a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin10.q b/ql/src/test/queries/clientpositive/bucketmapjoin10.q index 87f23f3..2191227 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin10.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin10.q @@ -3,27 +3,27 @@ set hive.strict.checks.bucketing=false; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_1_n6 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n6 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n6 PARTITION (part='1'); -ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +ALTER TABLE srcbucket_mapjoin_part_1_n6 CLUSTERED BY (key) INTO 3 BUCKETS; +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n6 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n6 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_1_n6 PARTITION (part='2'); -CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_2_n13 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n13 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n13 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2_n13 PARTITION (part='1'); -ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); +ALTER TABLE srcbucket_mapjoin_part_2_n13 CLUSTERED BY (key) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n13 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n13 PARTITION (part='2'); -ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; +ALTER TABLE srcbucket_mapjoin_part_2_n13 CLUSTERED BY (key) INTO 3 BUCKETS; set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; @@ -31,9 +31,9 @@ set hive.optimize.bucketmapjoin=true; EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n6 a JOIN srcbucket_mapjoin_part_2_n13 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n6 a JOIN srcbucket_mapjoin_part_2_n13 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin11.q b/ql/src/test/queries/clientpositive/bucketmapjoin11.q index daec21c..d1247ea 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin11.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin11.q @@ -3,27 +3,27 @@ set hive.strict.checks.bucketing=false; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_1_n2 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='1'); -ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +ALTER TABLE srcbucket_mapjoin_part_1_n2 CLUSTERED BY (key) INTO 4 BUCKETS; +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_1_n2 PARTITION (part='2'); -CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_2_n6 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='1'); -ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); +ALTER TABLE srcbucket_mapjoin_part_2_n6 CLUSTERED BY (key) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n6 PARTITION (part='2'); set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; @@ -33,18 +33,18 @@ set hive.optimize.bucketmapjoin=true; EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL; EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n2 a JOIN srcbucket_mapjoin_part_2_n6 b ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin12.q b/ql/src/test/queries/clientpositive/bucketmapjoin12.q index 02d1ebb..f0d9ae7 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin12.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin12.q @@ -8,12 +8,12 @@ CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); -CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_2_n0 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); -LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n0 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n0 PARTITION (part='1'); -ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED; +ALTER TABLE srcbucket_mapjoin_part_2_n0 NOT CLUSTERED; CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING) STORED AS TEXTFILE; @@ -28,11 +28,11 @@ set hive.optimize.bucketmapjoin=true; EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2_n0 b ON a.key = b.key AND a.part = '1' and b.part = '1'; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2_n0 b ON a.key = b.key AND a.part = '1' and b.part = '1'; -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin13.q b/ql/src/test/queries/clientpositive/bucketmapjoin13.q index 510a20d..d6e47b2 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin13.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin13.q @@ -2,73 +2,73 @@ SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.exec.reducers.max=1; -CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_1_n8 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (value) INTO 2 BUCKETS; --- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' -INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +-- part=1 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'value' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1_n8 PARTITION (part='1') SELECT * FROM src; -ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 2 BUCKETS; +ALTER TABLE srcbucket_mapjoin_part_1_n8 CLUSTERED BY (key) INTO 2 BUCKETS; --- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' -INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +-- part=2 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1_n8 PARTITION (part='2') SELECT * FROM src; -CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CREATE TABLE srcbucket_mapjoin_part_2_n18 (key INT, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (key) INTO 2 BUCKETS; --- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' -INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +-- part=1 partition for srcbucket_mapjoin_part_2_n18 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2_n18 PARTITION (part='1') SELECT * FROM src; set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; --- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +-- part=1 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'value' -- and it is also being joined. So, bucketed map-join cannot be performed EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; --- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- part=2 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'key' -- and it is being joined. So, bucketed map-join can be performed EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key and a.part = '2'; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key and a.part = '2'; -ALTER TABLE srcbucket_mapjoin_part_1 drop partition (part = '1'); +ALTER TABLE srcbucket_mapjoin_part_1_n8 drop partition (part = '1'); --- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- part=2 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'key' -- and it is being joined. So, bucketed map-join can be performed EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; -ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (value) INTO 2 BUCKETS; +ALTER TABLE srcbucket_mapjoin_part_1_n8 CLUSTERED BY (value) INTO 2 BUCKETS; --- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- part=2 partition for srcbucket_mapjoin_part_1_n8 is bucketed by 'key' -- and it is being joined. So, bucketed map-join can be performed -- The fact that the table is being bucketed by 'value' does not matter EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; SELECT /*+ MAPJOIN(b) */ count(*) -FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +FROM srcbucket_mapjoin_part_1_n8 a JOIN srcbucket_mapjoin_part_2_n18 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin2.q b/ql/src/test/queries/clientpositive/bucketmapjoin2.q index cff3a8c..2e48f11 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin2.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin2.q @@ -2,115 +2,115 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_n6 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n6 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n6 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n6 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n6 partition(ds='2008-04-08'); -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_2_n5 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n5 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n5 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n2 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n2 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_1_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_2_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n2 a left outer join bucketmapjoin_hash_result_2_n2 b on a.key = b.key; set hive.optimize.bucketmapjoin = true; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_1_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_2_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n2 a left outer join bucketmapjoin_hash_result_2_n2 b on a.key = b.key; -- HIVE-3210 -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n5 partition(ds='2008-04-09'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n5 partition(ds='2008-04-09'); set hive.optimize.bucketmapjoin = true; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_1_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n2 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n6 a join srcbucket_mapjoin_part_2_n5 b on a.key=b.key; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n2; +insert overwrite table bucketmapjoin_hash_result_2_n2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n2; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n2 a left outer join bucketmapjoin_hash_result_2_n2 b on a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/9bf28a3c/ql/src/test/queries/clientpositive/bucketmapjoin3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin3.q b/ql/src/test/queries/clientpositive/bucketmapjoin3.q index 5904068..e798e2f 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin3.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin3.q @@ -1,81 +1,81 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; -CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin; -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin; +CREATE TABLE srcbucket_mapjoin_n12(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n12; +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_n12; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_n13 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n13 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n13 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n13 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n13 partition(ds='2008-04-08'); -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_2_n11 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n11 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n11 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n4 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n4 (key bigint , value1 bigint, value2 bigint); set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n6 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n6; +insert overwrite table bucketmapjoin_hash_result_1_n4 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n6; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(b)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n6; +insert overwrite table bucketmapjoin_hash_result_2_n4 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n6; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n4 a left outer join bucketmapjoin_hash_result_2_n4 b on a.key = b.key; set hive.optimize.bucketmapjoin = true; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n6; +insert overwrite table bucketmapjoin_hash_result_2_n4 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n6; set hive.optimize.bucketmapjoin = false; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n6 select /*+mapjoin(a)*/ a.key, a.value, b.value -from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +from srcbucket_mapjoin_part_2_n11 a join srcbucket_mapjoin_part_n13 b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_2 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n6; +insert overwrite table bucketmapjoin_hash_result_2_n4 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n6; select a.key-b.key, a.value1-b.value1, a.value2-b.value2 -from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +from bucketmapjoin_hash_result_1_n4 a left outer join bucketmapjoin_hash_result_2_n4 b on a.key = b.key;