phy...

namit Mon, 18 Feb 2013 21:18:26 -0800

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,57 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+
+insert overwrite table tbl1
+select * from src where key < 10;
+
+insert overwrite table tbl2
+select * from src where key < 10;
+
+set hive.auto.convert.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+set hive.auto.convert.sortmerge.join=true;
+
+-- One of the subqueries contains a union, so it should not be converted to a 
sort-merge join.
+explain
+select count(*) from 
+  (
+  select * from
+  (select a.key as key, a.value as value from tbl1 a where key < 6
+     union all
+   select a.key as key, a.value as value from tbl1 a where key < 6
+  ) usubq1 ) subq1
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (
+  select * from
+  (select a.key as key, a.value as value from tbl1 a where key < 6
+     union all
+   select a.key as key, a.value as value from tbl1 a where key < 6
+  ) usubq1 ) subq1
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+-- One of the subqueries contains a groupby, so it should not be converted to 
a sort-merge join.
+explain
+select count(*) from 
+  (select a.key as key, count(*) as value from tbl1 a where key < 6 group by 
a.key) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key as key, count(*) as value from tbl1 a where key < 6 group by 
a.key) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;


Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,24 @@
+-- small 1 part, 4 bucket & big 2 part, 2 bucket
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds 
string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) 
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Since the leftmost table is assumed as the big table, arrange the tables in 
the join accordingly
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,27 @@
+-- small 2 part, 2 bucket & big 1 part, 4 bucket
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds 
string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) 
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+-- Since size is being used to find the big table, the order of the tables in 
the join does not matter
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON 
a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,29 @@
+-- small 2 part, 4 bucket & big 1 part, 2 bucket
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds 
string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) 
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+
+-- Since size is being used to find the big table, the order of the tables in 
the join does not matter
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON 
a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,23 @@
+-- small no part, 4 bucket & big no part, 2 bucket
+CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED 
BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small;
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small;
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small;
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small;
+
+CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED 
BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big;
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big;
+
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+
+-- Since size is being used to find the big table, the order of the tables in 
the join does not matter
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON 
a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,36 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+CREATE TABLE tbl3(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+CREATE TABLE tbl4(key int, value string) CLUSTERED BY (value) SORTED BY 
(value) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src;
+insert overwrite table tbl2 select * from src;
+insert overwrite table tbl3 select * from src;
+insert overwrite table tbl4 select * from src;
+
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=200;
+
+-- A SMB join is being followed by a regular join on a non-bucketed table on a 
different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on 
c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value 
= a.value;
+
+-- A SMB join is being followed by a regular join on a non-bucketed table on 
the same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on 
c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = 
a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on the 
same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c 
on c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key 
= a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on a 
different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c 
on c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on 
c.value = a.value;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,32 @@
+-- small 2 part, 4 bucket & big 2 part, 2 bucket
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds 
string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket3outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket4outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) 
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+
+-- Since size is being used to find the big table, the order of the tables in 
the join does not matter
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON 
a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,32 @@
+-- small 2 part, 2 bucket & big 2 part, 4 bucket
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds 
string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-08');
+
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO 
TABLE bucket_small partition(ds='2008-04-09');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) 
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE 
bucket_big partition(ds='2008-04-09');
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+-- Since size is being used to find the big table, the order of the tables in 
the join does not matter
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON 
a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON 
a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q?rev=1447593&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q 
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q Tue 
Feb 19 05:17:52 2013
@@ -0,0 +1,283 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+
+insert overwrite table tbl1
+select * from src where key < 10;
+
+insert overwrite table tbl2
+select * from src where key < 10;
+
+set hive.auto.convert.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join=true;
+
+-- The join is being performed as part of sub-query. It should be converted to 
a sort-merge join
+explain
+select count(*) from (
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
+) subq1;
+
+select count(*) from (
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
+) subq1;
+
+-- The join is being performed as part of sub-query. It should be converted to 
a sort-merge join
+-- Add a order by at the end to make the results deterministic.
+explain
+select key, count(*) from 
+(
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+select key, count(*) from 
+(
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+-- The join is being performed as part of more than one sub-query. It should 
be converted to a sort-merge join
+explain
+select count(*) from
+(
+  select key, count(*) from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq1
+  group by key
+) subq2;
+
+select count(*) from
+(
+  select key, count(*) from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq1
+  group by key
+) subq2;
+
+-- A join is being performed across different sub-queries, where a join is 
being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join 
tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+-- The subquery itself is being joined. Since the sub-query only contains 
selects and filters, it should 
+-- be converted to a sort-merge join.
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains 
selects and filters, it should 
+-- be converted to a sort-merge join, although there is more than one level of 
sub-query
+explain
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join tbl2 b
+  on subq2.key = b.key;
+
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join tbl2 b
+  on subq2.key = b.key;
+
+-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq3 
+  where key < 6
+  ) subq4
+  on subq2.key = subq4.key;
+
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq3 
+  where key < 6
+  ) subq4
+  on subq2.key = subq4.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains 
selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note 
that the sub-query modifies one 
+-- item, but that is not part of the join key.
+explain
+select count(*) from 
+  (select a.key as key, concat(a.value, a.value) as value from tbl1 a where 
key < 8) subq1 
+    join
+  (select a.key as key, concat(a.value, a.value) as value from tbl2 a where 
key < 8) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key as key, concat(a.value, a.value) as value from tbl1 a where 
key < 8) subq1 
+    join
+  (select a.key as key, concat(a.value, a.value) as value from tbl2 a where 
key < 8) subq2
+  on subq1.key = subq2.key;
+
+-- Since the join key is modified by the sub-query, neither sort-merge join 
not bucketized mapside
+-- join should be performed
+explain
+select count(*) from 
+  (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) 
subq1 
+    join
+  (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) 
subq1 
+    join
+  (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2
+  on subq1.key = subq2.key;
+
+-- The left table is a sub-query and the right table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join tbl2 a on subq1.key = a.key;
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join tbl2 a on subq1.key = a.key;
+
+-- The right table is a sub-query and the left table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from tbl1 a
+  join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 
+  on a.key = subq1.key;
+
+select count(*) from tbl1 a
+  join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 
+  on a.key = subq1.key;
+
+-- There are more than 2 inputs to the join, all of them being sub-queries. 
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on (subq1.key = subq2.key)
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+  on (subq1.key = subq3.key);
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+  on (subq1.key = subq3.key);
+
+-- The join is being performed on a nested sub-query, and an aggregation is 
performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+  select subq2.key as key, subq2.value as value1, b.value as value2 from
+  (
+    select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1
+    where key < 6
+  ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+select count(*) from (
+  select subq2.key as key, subq2.value as value1, b.value as value2 from
+  (
+    select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1
+    where key < 6
+  ) subq2
+join tbl2 b
+on subq2.key = b.key) a;

svn commit: r1447593 [3/8] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/phy...

Reply via email to