http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/setop_no_distinct.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/setop_no_distinct.q b/ql/src/test/queries/clientpositive/setop_no_distinct.q index 207954a..798f9c7 100644 --- a/ql/src/test/queries/clientpositive/setop_no_distinct.q +++ b/ql/src/test/queries/clientpositive/setop_no_distinct.q @@ -1,51 +1,51 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=true; -create table a(key int, value int); +create table a_n1(key int, value int); -insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3); +insert into table a_n1 values (1,2),(1,2),(1,2),(1,3),(2,3); -create table b(key int, value int); +create table b_n1(key int, value int); -insert into table b values (1,2),(1,2),(2,3); +insert into table b_n1 values (1,2),(1,2),(2,3); -select * from a intersect select * from b; +select * from a_n1 intersect select * from b_n1; -(select * from b intersect (select * from a)) intersect select * from b; +(select * from b_n1 intersect (select * from a_n1)) intersect select * from b_n1; -select * from b intersect all select * from a intersect select * from b; +select * from b_n1 intersect all select * from a_n1 intersect select * from b_n1; -(select * from b) intersect all ((select * from a) intersect select * from b); +(select * from b_n1) intersect all ((select * from a_n1) intersect select * from b_n1); -select * from (select a.key, b.value from a join b on a.key=b.key)sub1 +select * from (select a_n1.key, b_n1.value from a_n1 join b_n1 on a_n1.key=b_n1.key)sub1 intersect -select * from (select a.key, b.value from a join b on a.key=b.key)sub2; +select * from (select a_n1.key, b_n1.value from a_n1 join b_n1 on a_n1.key=b_n1.key)sub2; -drop table a; +drop table a_n1; -drop table b; +drop table b_n1; -create table a(key int); +create table a_n1(key int); -insert into table a values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL); +insert into table a_n1 values (0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL); -create table b(key bigint); +create table b_n1(key bigint); -insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL); +insert into table b_n1 values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL); -select * from a except select * from b; +select * from a_n1 except select * from b_n1; -(select * from a) minus select * from b union (select * from a) minus select * from b; +(select * from a_n1) minus select * from b_n1 union (select * from a_n1) minus select * from b_n1; -(select * from a) minus select * from b union all ((select * from a) minus select * from b); +(select * from a_n1) minus select * from b_n1 union all ((select * from a_n1) minus select * from b_n1); -(select * from a) minus select * from b union all (select * from a) minus all select * from b; +(select * from a_n1) minus select * from b_n1 union all (select * from a_n1) minus all select * from b_n1; -select * from a minus select * from b minus (select * from a minus select * from b); +select * from a_n1 minus select * from b_n1 minus (select * from a_n1 minus select * from b_n1); -(select * from a) minus (select * from b minus (select * from a minus select * from b)); +(select * from a_n1) minus (select * from b_n1 minus (select * from a_n1 minus select * from b_n1)); -drop table a; +drop table a_n1; -drop table b; +drop table b_n1;
http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_columns.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_columns.q b/ql/src/test/queries/clientpositive/show_columns.q index fdd1ea8..aa45bae 100644 --- a/ql/src/test/queries/clientpositive/show_columns.q +++ b/ql/src/test/queries/clientpositive/show_columns.q @@ -8,32 +8,32 @@ SHOW COLUMNS from shcol_test; -- SHOW COLUMNS CREATE DATABASE test_db; USE test_db; -CREATE TABLE foo(col1 INT, col2 INT, col3 INT, cola INT, colb INT, colc INT, a INT, b INT, c INT); +CREATE TABLE foo_n7(col1 INT, col2 INT, col3 INT, cola INT, colb INT, colc INT, a INT, b INT, c INT); -- SHOW COLUMNS basic syntax tests USE test_db; -SHOW COLUMNS from foo; -SHOW COLUMNS in foo; -SHOW COLUMNS in foo 'col*'; -SHOW COLUMNS in foo "col*"; -SHOW COLUMNS from foo 'col*'; -SHOW COLUMNS from foo "col*"; -SHOW COLUMNS from foo "col1|cola"; +SHOW COLUMNS from foo_n7; +SHOW COLUMNS in foo_n7; +SHOW COLUMNS in foo_n7 'col*'; +SHOW COLUMNS in foo_n7 "col*"; +SHOW COLUMNS from foo_n7 'col*'; +SHOW COLUMNS from foo_n7 "col*"; +SHOW COLUMNS from foo_n7 "col1|cola"; -- SHOW COLUMNS from a database with a name that requires escaping CREATE DATABASE `database`; USE `database`; -CREATE TABLE foo(col1 INT, col2 INT, col3 INT, cola INT, colb INT, colc INT, a INT, b INT, c INT); -SHOW COLUMNS from foo; -SHOW COLUMNS in foo "col*"; +CREATE TABLE foo_n7(col1 INT, col2 INT, col3 INT, cola INT, colb INT, colc INT, a INT, b INT, c INT); +SHOW COLUMNS from foo_n7; +SHOW COLUMNS in foo_n7 "col*"; -- Non existing column pattern -SHOW COLUMNS in foo "nomatch*"; -SHOW COLUMNS in foo "col+"; -SHOW COLUMNS in foo "nomatch"; +SHOW COLUMNS in foo_n7 "nomatch*"; +SHOW COLUMNS in foo_n7 "col+"; +SHOW COLUMNS in foo_n7 "nomatch"; use default; -SHOW COLUMNS from test_db.foo; -SHOW COLUMNS from foo from test_db; -SHOW COLUMNS from foo from test_db "col*"; -SHOW COLUMNS from foo from test_db like 'col*'; +SHOW COLUMNS from test_db.foo_n7; +SHOW COLUMNS from foo_n7 from test_db; +SHOW COLUMNS from foo_n7 from test_db "col*"; +SHOW COLUMNS from foo_n7 from test_db like 'col*'; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_create_table_alter.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_create_table_alter.q b/ql/src/test/queries/clientpositive/show_create_table_alter.q index 1158a69..3218ade 100644 --- a/ql/src/test/queries/clientpositive/show_create_table_alter.q +++ b/ql/src/test/queries/clientpositive/show_create_table_alter.q @@ -1,23 +1,23 @@ -- Test SHOW CREATE TABLE on an external, clustered and sorted table. Then test the query again after ALTERs. -CREATE EXTERNAL TABLE tmp_showcrt1 (key smallint, value float) +CREATE EXTERNAL TABLE tmp_showcrt1_n1 (key smallint, value float) CLUSTERED BY (key) SORTED BY (value DESC) INTO 5 BUCKETS; -SHOW CREATE TABLE tmp_showcrt1; +SHOW CREATE TABLE tmp_showcrt1_n1; -- Add a comment to the table, change the EXTERNAL property, and test SHOW CREATE TABLE on the change. -ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE'); -SHOW CREATE TABLE tmp_showcrt1; +ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE'); +SHOW CREATE TABLE tmp_showcrt1_n1; -- Alter the table comment, change the EXTERNAL property back and test SHOW CREATE TABLE on the change. -ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE'); -SHOW CREATE TABLE tmp_showcrt1; +ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE'); +SHOW CREATE TABLE tmp_showcrt1_n1; -- Change the 'SORTBUCKETCOLSPREFIX' property and test SHOW CREATE TABLE. The output should not change. -ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE'); -SHOW CREATE TABLE tmp_showcrt1; +ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE'); +SHOW CREATE TABLE tmp_showcrt1_n1; -- Alter the storage handler of the table, and test SHOW CREATE TABLE. -ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'); -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'); +SHOW CREATE TABLE tmp_showcrt1_n1; +DROP TABLE tmp_showcrt1_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_create_table_partitioned.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_create_table_partitioned.q b/ql/src/test/queries/clientpositive/show_create_table_partitioned.q index 3b69154..34ae2e7 100644 --- a/ql/src/test/queries/clientpositive/show_create_table_partitioned.q +++ b/ql/src/test/queries/clientpositive/show_create_table_partitioned.q @@ -1,8 +1,8 @@ -- Test SHOW CREATE TABLE on a table with partitions and column comments. -CREATE EXTERNAL TABLE tmp_showcrt1 (key string, newvalue boolean COMMENT 'a new value') +CREATE EXTERNAL TABLE tmp_showcrt1_n2 (key string, newvalue boolean COMMENT 'a new value') COMMENT 'temporary table' PARTITIONED BY (value bigint COMMENT 'some value'); -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +SHOW CREATE TABLE tmp_showcrt1_n2; +DROP TABLE tmp_showcrt1_n2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_create_table_serde.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_create_table_serde.q b/ql/src/test/queries/clientpositive/show_create_table_serde.q index a94379b..c692b07 100644 --- a/ql/src/test/queries/clientpositive/show_create_table_serde.q +++ b/ql/src/test/queries/clientpositive/show_create_table_serde.q @@ -1,33 +1,33 @@ -- Test SHOW CREATE TABLE on a table with serde. -CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint); -ALTER TABLE tmp_showcrt1 SET SERDEPROPERTIES ('custom.property.key1'='custom.property.value1', 'custom.property.key2'='custom.property.value2'); -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +CREATE TABLE tmp_showcrt1_n0 (key int, value string, newvalue bigint); +ALTER TABLE tmp_showcrt1_n0 SET SERDEPROPERTIES ('custom.property.key1'='custom.property.value1', 'custom.property.key2'='custom.property.value2'); +SHOW CREATE TABLE tmp_showcrt1_n0; +DROP TABLE tmp_showcrt1_n0; -- without a storage handler -CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +CREATE TABLE tmp_showcrt1_n0 (key int, value string, newvalue bigint) COMMENT 'temporary table' ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'; -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +SHOW CREATE TABLE tmp_showcrt1_n0; +DROP TABLE tmp_showcrt1_n0; -- without a storage handler / with custom serde params -CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +CREATE TABLE tmp_showcrt1_n0 (key int, value string, newvalue bigint) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' WITH SERDEPROPERTIES ('custom.property.key1'='custom.property.value1', 'custom.property.key2'='custom.property.value2') STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'; -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +SHOW CREATE TABLE tmp_showcrt1_n0; +DROP TABLE tmp_showcrt1_n0; -- with a storage handler and serde properties -CREATE EXTERNAL TABLE tmp_showcrt1 (key string, value boolean) +CREATE EXTERNAL TABLE tmp_showcrt1_n0 (key string, value boolean) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' WITH SERDEPROPERTIES ('field.delim'=',', 'serialization.format'='$'); -SHOW CREATE TABLE tmp_showcrt1; -DROP TABLE tmp_showcrt1; +SHOW CREATE TABLE tmp_showcrt1_n0; +DROP TABLE tmp_showcrt1_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_materialized_views.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_materialized_views.q b/ql/src/test/queries/clientpositive/show_materialized_views.q index df7485f..50d0315 100644 --- a/ql/src/test/queries/clientpositive/show_materialized_views.q +++ b/ql/src/test/queries/clientpositive/show_materialized_views.q @@ -47,14 +47,14 @@ SHOW MATERIALIZED VIEWS IN test2 LIKE "nomatch"; -- SHOW MATERIALIZED VIEWS from a database with a name that requires escaping CREATE DATABASE `database`; USE `database`; -CREATE TABLE foo(a INT); +CREATE TABLE foo_n0(a INT); CREATE VIEW fooview AS -SELECT * FROM foo; +SELECT * FROM foo_n0; USE default; SHOW MATERIALIZED VIEWS FROM `database` LIKE "fooview"; DROP MATERIALIZED VIEW fooview; -DROP TABLE foo; +DROP TABLE foo_n0; USE test1; DROP MATERIALIZED VIEW shtb_test1_view1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_tables.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_tables.q b/ql/src/test/queries/clientpositive/show_tables.q index 6190657..772d63a 100644 --- a/ql/src/test/queries/clientpositive/show_tables.q +++ b/ql/src/test/queries/clientpositive/show_tables.q @@ -1,5 +1,5 @@ -CREATE TABLE shtb_test1(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; -CREATE TABLE shtb_test2(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +CREATE TABLE shtb_test1_n0(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +CREATE TABLE shtb_test2_n0(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; EXPLAIN SHOW TABLES 'shtb_*'; @@ -7,15 +7,15 @@ SHOW TABLES 'shtb_*'; SHOW TABLES 'shtb_*'; EXPLAIN -SHOW TABLES LIKE 'shtb_test1|shtb_test2'; +SHOW TABLES LIKE 'shtb_test1_n0|shtb_test2_n0'; -SHOW TABLES LIKE 'shtb_test1|shtb_test2'; +SHOW TABLES LIKE 'shtb_test1_n0|shtb_test2_n0'; -- SHOW TABLES FROM/IN database CREATE DATABASE test_db; USE test_db; -CREATE TABLE foo(a INT); -CREATE TABLE bar(a INT); +CREATE TABLE foo_n4(a INT); +CREATE TABLE bar_n0(a INT); CREATE TABLE baz(a INT); -- SHOW TABLES basic syntax tests @@ -26,10 +26,10 @@ SHOW TABLES IN test_db "test*"; SHOW TABLES IN test_db LIKE "nomatch"; -- SHOW TABLE EXTENDED basic syntax tests and wildcard -SHOW TABLE EXTENDED IN test_db LIKE foo; -SHOW TABLE EXTENDED IN test_db LIKE "foo"; -SHOW TABLE EXTENDED IN test_db LIKE 'foo'; -SHOW TABLE EXTENDED IN test_db LIKE `foo`; +SHOW TABLE EXTENDED IN test_db LIKE foo_n4; +SHOW TABLE EXTENDED IN test_db LIKE "foo_n4"; +SHOW TABLE EXTENDED IN test_db LIKE 'foo_n4'; +SHOW TABLE EXTENDED IN test_db LIKE `foo_n4`; SHOW TABLE EXTENDED IN test_db LIKE 'ba*'; SHOW TABLE EXTENDED IN test_db LIKE "ba*"; SHOW TABLE EXTENDED IN test_db LIKE `ba*`; @@ -37,6 +37,6 @@ SHOW TABLE EXTENDED IN test_db LIKE `ba*`; -- SHOW TABLES from a database with a name that requires escaping CREATE DATABASE `database`; USE `database`; -CREATE TABLE foo(a INT); +CREATE TABLE foo_n4(a INT); USE default; -SHOW TABLES FROM `database` LIKE "foo"; +SHOW TABLES FROM `database` LIKE "foo_n4"; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/show_views.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/show_views.q b/ql/src/test/queries/clientpositive/show_views.q index 61aad43..726ad4b 100644 --- a/ql/src/test/queries/clientpositive/show_views.q +++ b/ql/src/test/queries/clientpositive/show_views.q @@ -2,15 +2,15 @@ CREATE DATABASE test1; CREATE DATABASE test2; USE test1; -CREATE TABLE shtb_test1(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; -CREATE VIEW shtb_test1_view1 AS SELECT * FROM shtb_test1 where KEY > 1000 and KEY < 2000; -CREATE VIEW shtb_test1_view2 AS SELECT * FROM shtb_test1 where KEY > 100 and KEY < 200; -CREATE VIEW shtb_full_view2 AS SELECT * FROM shtb_test1; +CREATE TABLE shtb_test1_n1(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +CREATE VIEW shtb_test1_view1_n0 AS SELECT * FROM shtb_test1_n1 where KEY > 1000 and KEY < 2000; +CREATE VIEW shtb_test1_view2_n0 AS SELECT * FROM shtb_test1_n1 where KEY > 100 and KEY < 200; +CREATE VIEW shtb_full_view2_n0 AS SELECT * FROM shtb_test1_n1; USE test2; -CREATE TABLE shtb_test1(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; -CREATE TABLE shtb_test2(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; -CREATE VIEW shtb_test1_view1 AS SELECT * FROM shtb_test1 where KEY > 1000 and KEY < 2000; -CREATE VIEW shtb_test2_view2 AS SELECT * FROM shtb_test2 where KEY > 100 and KEY < 200; +CREATE TABLE shtb_test1_n1(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +CREATE TABLE shtb_test2_n1(KEY INT, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +CREATE VIEW shtb_test1_view1_n0 AS SELECT * FROM shtb_test1_n1 where KEY > 1000 and KEY < 2000; +CREATE VIEW shtb_test2_view2_n0 AS SELECT * FROM shtb_test2_n1 where KEY > 100 and KEY < 200; USE test1; SHOW VIEWS; @@ -33,24 +33,24 @@ SHOW VIEWS IN test2 LIKE "nomatch"; -- SHOW VIEWS from a database with a name that requires escaping CREATE DATABASE `database`; USE `database`; -CREATE TABLE foo(a INT); -CREATE VIEW fooview AS SELECT * FROM foo; +CREATE TABLE foo_n8(a INT); +CREATE VIEW fooview_n0 AS SELECT * FROM foo_n8; USE default; -SHOW VIEWS FROM `database` LIKE "fooview"; +SHOW VIEWS FROM `database` LIKE "fooview_n0"; -DROP VIEW fooview; -DROP TABLE foo; +DROP VIEW fooview_n0; +DROP TABLE foo_n8; USE test1; -DROP VIEW shtb_test1_view1; -DROP VIEW shtb_test1_view2; -DROP VIEW shtb_full_view2; -DROP TABLE shtb_test1; +DROP VIEW shtb_test1_view1_n0; +DROP VIEW shtb_test1_view2_n0; +DROP VIEW shtb_full_view2_n0; +DROP TABLE shtb_test1_n1; DROP DATABASE test1; USE test2; -DROP VIEW shtb_test1_view1; -DROP VIEW shtb_test2_view2; -DROP TABLE shtb_test1; -DROP TABLE shtb_test2; +DROP VIEW shtb_test1_view1_n0; +DROP VIEW shtb_test2_view2_n0; +DROP TABLE shtb_test1_n1; +DROP TABLE shtb_test2_n1; DROP DATABASE test2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin.q b/ql/src/test/queries/clientpositive/skewjoin.q index 6e35e48..d7c0570 100644 --- a/ql/src/test/queries/clientpositive/skewjoin.q +++ b/ql/src/test/queries/clientpositive/skewjoin.q @@ -8,50 +8,50 @@ set hive.skewjoin.key = 2; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; -CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; -CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE; -CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n128(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n76(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T3_n30(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T4_n17(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE dest_j1_n17(key INT, value STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n128; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n76; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n30; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4_n17; EXPLAIN FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value; +INSERT OVERWRITE TABLE dest_j1_n17 SELECT src1.key, src2.value; FROM src src1 JOIN src src2 ON (src1.key = src2.key) -INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value; +INSERT OVERWRITE TABLE dest_j1_n17 SELECT src1.key, src2.value; -SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1; +SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1_n17; set hive.cbo.enable=false; EXPLAIN SELECT /*+ STREAMTABLE(a) */ * -FROM T1 a JOIN T2 b ON a.key = b.key - JOIN T3 c ON b.key = c.key - JOIN T4 d ON c.key = d.key; +FROM T1_n128 a JOIN T2_n76 b ON a.key = b.key + JOIN T3_n30 c ON b.key = c.key + JOIN T4_n17 d ON c.key = d.key; SELECT /*+ STREAMTABLE(a) */ * -FROM T1 a JOIN T2 b ON a.key = b.key - JOIN T3 c ON b.key = c.key - JOIN T4 d ON c.key = d.key; +FROM T1_n128 a JOIN T2_n76 b ON a.key = b.key + JOIN T3_n30 c ON b.key = c.key + JOIN T4_n17 d ON c.key = d.key; EXPLAIN SELECT /*+ STREAMTABLE(a,c) */ * -FROM T1 a JOIN T2 b ON a.key = b.key - JOIN T3 c ON b.key = c.key - JOIN T4 d ON c.key = d.key; +FROM T1_n128 a JOIN T2_n76 b ON a.key = b.key + JOIN T3_n30 c ON b.key = c.key + JOIN T4_n17 d ON c.key = d.key; SELECT /*+ STREAMTABLE(a,c) */ * -FROM T1 a JOIN T2 b ON a.key = b.key - JOIN T3 c ON b.key = c.key - JOIN T4 d ON c.key = d.key; +FROM T1_n128 a JOIN T2_n76 b ON a.key = b.key + JOIN T3_n30 c ON b.key = c.key + JOIN T4_n17 d ON c.key = d.key; -EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); -FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +EXPLAIN FROM T1_n128 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +FROM T1_n128 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); EXPLAIN FROM (SELECT src.* FROM src) x @@ -103,28 +103,28 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80; EXPLAIN -SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key; -SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key; +SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1_n128 k LEFT OUTER JOIN T1_n128 v ON k.key+1=v.key; +SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1_n128 k LEFT OUTER JOIN T1_n128 v ON k.key+1=v.key; -select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val; +select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1_n128 k join T1_n128 v on k.key=v.val; -select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key; +select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1_n128 k join T1_n128 v on k.key=v.key; -select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key; +select sum(hash(k.key)), sum(hash(v.val)) from T1_n128 k join T1_n128 v on k.key=v.key; -select count(1) from T1 a join T1 b on a.key = b.key; +select count(1) from T1_n128 a join T1_n128 b on a.key = b.key; -FROM T1 a LEFT OUTER JOIN T2 c ON c.key+1=a.key SELECT sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +FROM T1_n128 a LEFT OUTER JOIN T2_n76 c ON c.key+1=a.key SELECT sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); -FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +FROM T1_n128 a RIGHT OUTER JOIN T2_n76 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); -FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +FROM T1_n128 a FULL OUTER JOIN T2_n76 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); -SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1 src1 LEFT OUTER JOIN T2 src2 ON src1.key+1 = src2.key RIGHT OUTER JOIN T2 src3 ON src2.key = src3.key; +SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1_n128 src1 LEFT OUTER JOIN T2_n76 src2 ON src1.key+1 = src2.key RIGHT OUTER JOIN T2_n76 src3 ON src2.key = src3.key; -SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1 src1 JOIN T2 src2 ON src1.key+1 = src2.key JOIN T2 src3 ON src2.key = src3.key; +SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1_n128 src1 JOIN T2_n76 src2 ON src1.key+1 = src2.key JOIN T2_n76 src3 ON src2.key = src3.key; -select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key; +select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1_n128 k left outer join T1_n128 v on k.key+1=v.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q index 70feb9d..4e8057b 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q @@ -2,15 +2,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n67(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n67; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n40(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n40; -- copy from skewjoinopt1 -- test compile time skew join and auto map join @@ -18,27 +18,27 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding an order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n67 a JOIN T2_n40 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n67 a JOIN T2_n40 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n67 a RIGHT OUTER JOIN T2_n40 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n67 a RIGHT OUTER JOIN T2_n40 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything EXPLAIN -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n67 a JOIN T2_n40 b ON a.key = b.key; -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n67 a JOIN T2_n40 b ON a.key = b.key; EXPLAIN -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n67 a RIGHT OUTER JOIN T2_n40 b ON a.key = b.key; -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n67 a RIGHT OUTER JOIN T2_n40 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q index abfde79..1d96347 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q @@ -2,21 +2,21 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE tmpT1_n0(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1_n0; -- testing skew on other data types - int -CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)); -INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1; +CREATE TABLE T1_n151(key INT, val STRING) SKEWED BY (key) ON ((2)); +INSERT OVERWRITE TABLE T1_n151 SELECT key, val FROM tmpT1_n0; -CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE tmpT2_n0(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2_n0; -CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3)); +CREATE TABLE T2_n88(key INT, val STRING) SKEWED BY (key) ON ((3)); -INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2; +INSERT OVERWRITE TABLE T2_n88 SELECT key, val FROM tmpT2_n0; -- copy from skewjoinopt15 -- test compile time skew join and auto map join @@ -27,27 +27,27 @@ INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n151 a JOIN T2_n88 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n151 a JOIN T2_n88 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n151 a RIGHT OUTER JOIN T2_n88 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n151 a RIGHT OUTER JOIN T2_n88 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything EXPLAIN -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n151 a JOIN T2_n88 b ON a.key = b.key; -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n151 a JOIN T2_n88 b ON a.key = b.key; EXPLAIN -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n151 a RIGHT OUTER JOIN T2_n88 b ON a.key = b.key; -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n151 a RIGHT OUTER JOIN T2_n88 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q index f1f826b..77a94e6 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q @@ -2,15 +2,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n31(key STRING, val STRING) CLUSTERED BY (key) INTO 4 BUCKETS SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n31; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n21(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n21; -- copy from skewjoinopt19 -- test compile time skew join and auto map join @@ -20,7 +20,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n31 a JOIN T2_n21 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n31 a JOIN T2_n21 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q index f4f4489..581b9b3 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q @@ -2,15 +2,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n25(key STRING, val STRING) SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n25; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n16(key STRING, val STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n16; -- copy from skewjoinopt3 -- test compile time skew join and auto map join @@ -20,15 +20,15 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n25 a JOIN T2_n16 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n25 a JOIN T2_n16 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n25 a FULL OUTER JOIN T2_n16 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n25 a FULL OUTER JOIN T2_n16 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q index 280b8a5..1d5d2dd 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q @@ -2,15 +2,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n155(key STRING, val STRING) SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n155; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n91(key STRING, val STRING) SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n91; -- copy from skewjoinopt6 -- test compile time skew join and auto map join @@ -20,7 +20,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n155 a JOIN T2_n91 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n155 a JOIN T2_n91 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q index c9aa9e6..0badb37f 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q @@ -2,19 +2,19 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n124(key STRING, val STRING) SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n124; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n73(key STRING, val STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n73; -CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T3_n27(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n27; -- copy from skewjoinopt7 -- test compile time skew join and auto map join @@ -24,7 +24,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; +SELECT a.*, b.*, c.* FROM T1_n124 a JOIN T2_n73 b ON a.key = b.key JOIN T3_n27 c on a.key = c.key; -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +SELECT a.*, b.*, c.* FROM T1_n124 a JOIN T2_n73 b ON a.key = b.key JOIN T3_n27 c on a.key = c.key ORDER BY a.key, b.key, c.key, a.val, b.val, c.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q index 1c49b5f..4ad3006 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q @@ -2,14 +2,14 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n87(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n87; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n54(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n54; -- copy from skewjoinopt9 -- test compile time skew join and auto map join @@ -19,19 +19,19 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; EXPLAIN select * from ( -select key, val from T1 +select key, val from T1_n87 union all -select key, val from T1 +select key, val from T1_n87 ) subq1 -join T2 b on subq1.key = b.key; +join T2_n54 b on subq1.key = b.key; select * from ( -select key, val from T1 +select key, val from T1_n87 union all -select key, val from T1 +select key, val from T1_n87 ) subq1 -join T2 b on subq1.key = b.key +join T2_n54 b on subq1.key = b.key ORDER BY subq1.key, b.key, subq1.val, b.val; -- no skew join compile time optimization would be performed if one of the @@ -39,13 +39,13 @@ ORDER BY subq1.key, b.key, subq1.val, b.val; EXPLAIN select * from ( -select key, count(1) as cnt from T1 group by key +select key, count(1) as cnt from T1_n87 group by key ) subq1 -join T2 b on subq1.key = b.key; +join T2_n54 b on subq1.key = b.key; select * from ( -select key, count(1) as cnt from T1 group by key +select key, count(1) as cnt from T1_n87 group by key ) subq1 -join T2 b on subq1.key = b.key +join T2_n54 b on subq1.key = b.key ORDER BY subq1.key, b.key, subq1.cnt, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q index 2f263bf..9923c80 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q @@ -2,13 +2,13 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n39(key STRING, value STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n39; -drop table array_valued_T1; -create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8)); -insert overwrite table array_valued_T1 select key, array(value) from T1; +drop table array_valued_T1_n39; +create table array_valued_T1_n39 (key string, value array<string>) SKEWED BY (key) ON ((8)); +insert overwrite table array_valued_T1_n39 select key, array(value) from T1_n39; -- copy from skewjoinopt10 -- test compile time skew join and auto map join @@ -16,7 +16,7 @@ insert overwrite table array_valued_T1 select key, array(value) from T1; -- adding a order by at the end to make the results deterministic explain -select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val; +select * from (select a.key as key, b.value as array_val from T1_n39 a join array_valued_T1_n39 b on a.key=b.key) i lateral view explode (array_val) c as val; -select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +select * from (select a.key as key, b.value as array_val from T1_n39 a join array_valued_T1_n39 b on a.key=b.key) i lateral view explode (array_val) c as val ORDER BY key, val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q index 78dcc90..67fa143 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q @@ -2,14 +2,14 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n157(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n157; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n92(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n92; -- copy from skewjoinopt11 -- test compile time skew join and auto map join @@ -21,15 +21,15 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; EXPLAIN select * from ( - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n157 a join T2_n92 b on a.key = b.key union all - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n157 a join T2_n92 b on a.key = b.key ) subq1; select * from ( - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n157 a join T2_n92 b on a.key = b.key union all - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n157 a join T2_n92 b on a.key = b.key ) subq1 ORDER BY key, val1, val2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q index 34a8f54..ea0dc86 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q @@ -2,18 +2,18 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n29(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n29; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n20(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n20; -CREATE TABLE T3(key STRING, val STRING) +CREATE TABLE T3_n7(key STRING, val STRING) SKEWED BY (val) ON ((12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n7; -- copy from skewjoinopt13 -- test compile time skew join and auto map join @@ -27,12 +27,12 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; EXPLAIN select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +T1_n29 a join T2_n20 b on a.key = b.key +join T3_n7 c on a.val = c.val; select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val +T1_n29 a join T2_n20 b on a.key = b.key +join T3_n7 c on a.val = c.val order by a.key, b.key, c.key, a.val, b.val, c.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q b/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q index c612974..ff164fc 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q +++ b/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q @@ -2,19 +2,19 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; set hive.auto.convert.join=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n152(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n152; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n89(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n89; -CREATE TABLE T3(key STRING, val STRING) +CREATE TABLE T3_n36(key STRING, val STRING) SKEWED BY (val) ON ((12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n36; -- copy from skewjoinopt14 -- test compile time skew join and auto map join @@ -29,12 +29,12 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; EXPLAIN select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +T1_n152 a join T2_n89 b on a.key = b.key +join T3_n36 c on a.val = c.val; select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val +T1_n152 a join T2_n89 b on a.key = b.key +join T3_n36 c on a.val = c.val order by a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q index 6d6b9cc..099b791 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q +++ b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q @@ -15,8 +15,8 @@ INSERT INTO TABLE nonskewtable VALUES ("1", "val_1"); INSERT INTO TABLE nonskewtable VALUES ("2", "val_2"); EXPLAIN -CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key; -CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key; +CREATE TABLE result_n1 AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key; +CREATE TABLE result_n1 AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key; -SELECT * FROM result; +SELECT * FROM result_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q b/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q index 5e688d7..9d5571b 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q +++ b/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q @@ -17,52 +17,52 @@ set mapred.input.dir.recursive=true; -- Since this test creates sub-directories for the output, it might be easier to run the test -- only on hadoop 23 -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n57(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n57; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n35(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n35; -- a simple join query with skew on both the tables on the join key EXPLAIN -SELECT * FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT * FROM T1_n57 a JOIN T2_n35 b ON a.key = b.key; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +SELECT * FROM T1_n57 a JOIN T2_n35 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n57 a RIGHT OUTER JOIN T2_n35 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n57 a RIGHT OUTER JOIN T2_n35 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -create table DEST1(key1 STRING, val1 STRING, key2 STRING, val2 STRING); +create table DEST1_n58(key1 STRING, val1 STRING, key2 STRING, val2 STRING); EXPLAIN -INSERT OVERWRITE TABLE DEST1 -SELECT * FROM T1 a JOIN T2 b ON a.key = b.key; +INSERT OVERWRITE TABLE DEST1_n58 +SELECT * FROM T1_n57 a JOIN T2_n35 b ON a.key = b.key; -INSERT OVERWRITE TABLE DEST1 -SELECT * FROM T1 a JOIN T2 b ON a.key = b.key; +INSERT OVERWRITE TABLE DEST1_n58 +SELECT * FROM T1_n57 a JOIN T2_n35 b ON a.key = b.key; -SELECT * FROM DEST1 +SELECT * FROM DEST1_n58 ORDER BY key1, key2, val1, val2; EXPLAIN -INSERT OVERWRITE TABLE DEST1 -SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +INSERT OVERWRITE TABLE DEST1_n58 +SELECT * FROM T1_n57 a RIGHT OUTER JOIN T2_n35 b ON a.key = b.key; -INSERT OVERWRITE TABLE DEST1 -SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +INSERT OVERWRITE TABLE DEST1_n58 +SELECT * FROM T1_n57 a RIGHT OUTER JOIN T2_n35 b ON a.key = b.key; -SELECT * FROM DEST1 +SELECT * FROM DEST1_n58 ORDER BY key1, key2, val1, val2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q b/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q index 3e2610f..06ebfdc 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q +++ b/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q @@ -9,19 +9,19 @@ set hive.merge.mapredfiles=false; set hive.merge.sparkfiles=false; set mapred.input.dir.recursive=true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n8(key STRING, val STRING) SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n8; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n4(key STRING, val STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n4; -CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T3_n2(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n2; -- This is to test the union->selectstar->filesink and skewjoin optimization -- Union of 3 map-reduce subqueries is performed for the skew join @@ -32,9 +32,9 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; -- to run the test only on hadoop 23 EXPLAIN -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key; +SELECT a.*, b.*, c.* FROM T1_n8 a JOIN T2_n4 b ON a.key = b.key JOIN T3_n2 c on a.key = c.key; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +SELECT a.*, b.*, c.* FROM T1_n8 a JOIN T2_n4 b ON a.key = b.key JOIN T3_n2 c on a.key = c.key ORDER BY a.key, b.key, c.key, a.val, b.val, c.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt1.q b/ql/src/test/queries/clientpositive/skewjoinopt1.q index e32a583..2679462 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt1.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt1.q @@ -1,41 +1,41 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n101(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n101; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n64(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n64; -- a simple join query with skew on both the tables on the join key -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n101 a JOIN T2_n64 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n101 a JOIN T2_n64 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n101 a RIGHT OUTER JOIN T2_n64 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n101 a RIGHT OUTER JOIN T2_n64 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything EXPLAIN -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n101 a JOIN T2_n64 b ON a.key = b.key; -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n101 a JOIN T2_n64 b ON a.key = b.key; EXPLAIN -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n101 a RIGHT OUTER JOIN T2_n64 b ON a.key = b.key; -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n101 a RIGHT OUTER JOIN T2_n64 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt10.q b/ql/src/test/queries/clientpositive/skewjoinopt10.q index 16de44c..3695d17 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt10.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt10.q @@ -1,19 +1,19 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n88(key STRING, value STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n88; -drop table array_valued_T1; -create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8)); -insert overwrite table array_valued_T1 select key, array(value) from T1; +drop table array_valued_T1_n0; +create table array_valued_T1_n0 (key string, value array<string>) SKEWED BY (key) ON ((8)); +insert overwrite table array_valued_T1_n0 select key, array(value) from T1_n88; -- This test is to verify the skew join compile optimization when the join is followed by a lateral view -- adding a order by at the end to make the results deterministic explain -select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val; +select * from (select a.key as key, b.value as array_val from T1_n88 a join array_valued_T1_n0 b on a.key=b.key) i lateral view explode (array_val) c as val; -select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +select * from (select a.key as key, b.value as array_val from T1_n88 a join array_valued_T1_n0 b on a.key=b.key) i lateral view explode (array_val) c as val ORDER BY key, val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt11.q b/ql/src/test/queries/clientpositive/skewjoinopt11.q index 880e8f1..074165f 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt11.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt11.q @@ -1,14 +1,14 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n122(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n122; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n72(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n72; -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used @@ -18,15 +18,15 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; EXPLAIN select * from ( - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n122 a join T2_n72 b on a.key = b.key union all - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n122 a join T2_n72 b on a.key = b.key ) subq1; select * from ( - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n122 a join T2_n72 b on a.key = b.key union all - select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + select a.key, a.val as val1, b.val as val2 from T1_n122 a join T2_n72 b on a.key = b.key ) subq1 ORDER BY key, val1, val2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt12.q b/ql/src/test/queries/clientpositive/skewjoinopt12.q index 3d7b170..e1e4689 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt12.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt12.q @@ -1,22 +1,22 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n159(key STRING, val STRING) SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n159; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n93(key STRING, val STRING) SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n93; -- Both the join tables are skewed by 2 keys, and one of the skewed values -- is common to both the tables. The join key matches the skewed key set. -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1_n159 a JOIN T2_n93 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +SELECT a.*, b.* FROM T1_n159 a JOIN T2_n93 b ON a.key = b.key and a.val = b.val ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt13.q b/ql/src/test/queries/clientpositive/skewjoinopt13.q index 36ad3c6..ef6f7a5 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt13.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt13.q @@ -1,18 +1,18 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T1_n38(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n38; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n25(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n25; -CREATE TABLE T3(key STRING, val STRING) +CREATE TABLE T3_n9(key STRING, val STRING) SKEWED BY (val) ON ((12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n9; -- This test is for skewed join compile time optimization for more than 2 tables. -- The join key for table 3 is different from the join key used for joining @@ -24,12 +24,12 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; EXPLAIN select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +T1_n38 a join T2_n25 b on a.key = b.key +join T3_n9 c on a.val = c.val; select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val +T1_n38 a join T2_n25 b on a.key = b.key +join T3_n9 c on a.val = c.val order by a.key, b.key, c.key, a.val, b.val, c.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt14.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt14.q b/ql/src/test/queries/clientpositive/skewjoinopt14.q index 94dab19..5fac94e 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt14.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt14.q @@ -1,19 +1,19 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n65(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n65; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n39(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n39; -CREATE TABLE T3(key STRING, val STRING) +CREATE TABLE T3_n14(key STRING, val STRING) SKEWED BY (val) ON ((12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3_n14; -- This test is for skewed join compile time optimization for more than 2 tables. -- The join key for table 3 is different from the join key used for joining @@ -26,12 +26,12 @@ LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; EXPLAIN select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val; +T1_n65 a join T2_n39 b on a.key = b.key +join T3_n14 c on a.val = c.val; select * from -T1 a join T2 b on a.key = b.key -join T3 c on a.val = c.val +T1_n65 a join T2_n39 b on a.key = b.key +join T3_n14 c on a.val = c.val order by a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt15.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt15.q b/ql/src/test/queries/clientpositive/skewjoinopt15.q index ba86256..69934d8 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt15.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt15.q @@ -1,21 +1,21 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE tmpT1_n109(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1_n109; -- testing skew on other data types - int -CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)); -INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1; +CREATE TABLE T1_n109(key INT, val STRING) SKEWED BY (key) ON ((2)); +INSERT OVERWRITE TABLE T1_n109 SELECT key, val FROM tmpT1_n109; -CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE tmpT2_n66(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2_n66; -CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3)); +CREATE TABLE T2_n66(key INT, val STRING) SKEWED BY (key) ON ((3)); -INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2; +INSERT OVERWRITE TABLE T2_n66 SELECT key, val FROM tmpT2_n66; -- The skewed key is a integer column. -- Otherwise this test is similar to skewjoinopt1.q @@ -24,27 +24,27 @@ INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n109 a JOIN T2_n66 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n109 a JOIN T2_n66 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n109 a RIGHT OUTER JOIN T2_n66 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n109 a RIGHT OUTER JOIN T2_n66 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- an aggregation at the end should not change anything EXPLAIN -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n109 a JOIN T2_n66 b ON a.key = b.key; -SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n109 a JOIN T2_n66 b ON a.key = b.key; EXPLAIN -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n109 a RIGHT OUTER JOIN T2_n66 b ON a.key = b.key; -SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key; +SELECT count(1) FROM T1_n109 a RIGHT OUTER JOIN T2_n66 b ON a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt16.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt16.q b/ql/src/test/queries/clientpositive/skewjoinopt16.q index 1345334..44ac708 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt16.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt16.q @@ -1,22 +1,22 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n154(key STRING, val STRING) SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n154; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n90(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n90; -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the both the columns -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1_n154 a JOIN T2_n90 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +SELECT a.*, b.* FROM T1_n154 a JOIN T2_n90 b ON a.key = b.key and a.val = b.val ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt17.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt17.q b/ql/src/test/queries/clientpositive/skewjoinopt17.q index 1c866a5..476e40e 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt17.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt17.q @@ -1,15 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n27(key STRING, val STRING) SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n27; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n18(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n18; -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the first skewed column @@ -18,31 +18,31 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n27 a JOIN T2_n18 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n27 a JOIN T2_n18 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -DROP TABLE T1; -DROP TABLE T2; +DROP TABLE T1_n27; +DROP TABLE T2_n18; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n27(key STRING, val STRING) SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n27; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n18(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n18; -- One of the tables is skewed by 2 columns, and the other table is -- skewed by one column. Ths join is performed on the both the columns -- In this case, the skewed join value is repeated in the filter. EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1_n27 a JOIN T2_n18 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +SELECT a.*, b.* FROM T1_n27 a JOIN T2_n18 b ON a.key = b.key and a.val = b.val ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt18.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt18.q b/ql/src/test/queries/clientpositive/skewjoinopt18.q index a2a2618..cd32393 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt18.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt18.q @@ -1,21 +1,21 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE tmpT1_n1(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1_n1; -- testing skew on other data types - int -CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)); -INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1; +CREATE TABLE T1_n160(key INT, val STRING) SKEWED BY (key) ON ((2)); +INSERT OVERWRITE TABLE T1_n160 SELECT key, val FROM tmpT1_n1; -- Tke skewed column is same in both the tables, however it is -- INT in one of the tables, and STRING in the other table -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n94(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n94; -- Once HIVE-3445 is fixed, the compile time skew join optimization would be -- applicable here. Till the above jira is fixed, it would be performed as a @@ -23,7 +23,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n160 a JOIN T2_n94 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n160 a JOIN T2_n94 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt19.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt19.q b/ql/src/test/queries/clientpositive/skewjoinopt19.q index 784c4fc..02cadda 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt19.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt19.q @@ -1,15 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n34(key STRING, val STRING) CLUSTERED BY (key) INTO 4 BUCKETS SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n34; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n22(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n22; -- add a test where the skewed key is also the bucketized key -- it should not matter, and the compile time skewed join @@ -17,7 +17,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n34 a JOIN T2_n22 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n34 a JOIN T2_n22 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt2.q b/ql/src/test/queries/clientpositive/skewjoinopt2.q index 7cca946..c1b59d3 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt2.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt2.q @@ -3,15 +3,15 @@ set hive.optimize.skewjoin.compiletime = true; -- SORT_QUERY_RESULTS -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n139(key STRING, val STRING) SKEWED BY (key) ON ((2), (7)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n139; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n81(key STRING, val STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n81; -- a simple query with skew on both the tables on the join key -- multiple skew values are present for the skewed keys @@ -20,27 +20,27 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1_n139 a JOIN T2_n81 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +SELECT a.*, b.* FROM T1_n139 a JOIN T2_n81 b ON a.key = b.key and a.val = b.val ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val; +SELECT a.*, b.* FROM T1_n139 a LEFT OUTER JOIN T2_n81 b ON a.key = b.key and a.val = b.val; -SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val +SELECT a.*, b.* FROM T1_n139 a LEFT OUTER JOIN T2_n81 b ON a.key = b.key and a.val = b.val ORDER BY a.key, b.key, a.val, b.val; -- a group by at the end should not change anything EXPLAIN -SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key; +SELECT a.key, count(1) FROM T1_n139 a JOIN T2_n81 b ON a.key = b.key and a.val = b.val group by a.key; -SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key; +SELECT a.key, count(1) FROM T1_n139 a JOIN T2_n81 b ON a.key = b.key and a.val = b.val group by a.key; EXPLAIN -SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key; +SELECT a.key, count(1) FROM T1_n139 a LEFT OUTER JOIN T2_n81 b ON a.key = b.key and a.val = b.val group by a.key; -SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key; +SELECT a.key, count(1) FROM T1_n139 a LEFT OUTER JOIN T2_n81 b ON a.key = b.key and a.val = b.val group by a.key; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt20.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt20.q b/ql/src/test/queries/clientpositive/skewjoinopt20.q index 190cd98..160e5b8 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt20.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt20.q @@ -1,15 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n103(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1_n103; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n65(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n65; -- add a test where the skewed key is also the bucketized/sorted key -- it should not matter, and the compile time skewed join @@ -17,7 +17,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n103 a JOIN T2_n65 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n103 a JOIN T2_n65 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt21.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt21.q b/ql/src/test/queries/clientpositive/skewjoinopt21.q index 7ff086d..692c87f 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt21.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt21.q @@ -1,15 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n63(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n63; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n38(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n38; -- a simple join query with skew on both the tables on the join key -- adding a order by at the end to make the results deterministic @@ -17,15 +17,15 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; EXPLAIN SELECT a.*, b.* FROM - (SELECT key as k, val as v FROM T1) a + (SELECT key as k, val as v FROM T1_n63) a JOIN - (SELECT key as k, val as v FROM T2) b + (SELECT key as k, val as v FROM T2_n38) b ON a.k = b.k; SELECT a.*, b.* FROM - (SELECT key as k, val as v FROM T1) a + (SELECT key as k, val as v FROM T1_n63) a JOIN - (SELECT key as k, val as v FROM T2) b + (SELECT key as k, val as v FROM T2_n38) b ON a.k = b.k ORDER BY a.k, b.k, a.v, b.v; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt3.q b/ql/src/test/queries/clientpositive/skewjoinopt3.q index 35cc41a..cfb2080 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt3.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt3.q @@ -1,15 +1,15 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n12(key STRING, val STRING) SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n12; -CREATE TABLE T2(key STRING, val STRING) +CREATE TABLE T2_n7(key STRING, val STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n7; -- a simple query with skew on both the tables. One of the skewed -- value is common to both the tables. The skewed value should not be @@ -17,15 +17,15 @@ LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n12 a JOIN T2_n7 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n12 a JOIN T2_n7 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- test outer joins also EXPLAIN -SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n12 a FULL OUTER JOIN T2_n7 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n12 a FULL OUTER JOIN T2_n7 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/skewjoinopt4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skewjoinopt4.q b/ql/src/test/queries/clientpositive/skewjoinopt4.q index b66a02a..ba47d64 100644 --- a/ql/src/test/queries/clientpositive/skewjoinopt4.q +++ b/ql/src/test/queries/clientpositive/skewjoinopt4.q @@ -1,28 +1,28 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin.compiletime = true; -CREATE TABLE T1(key STRING, val STRING) +CREATE TABLE T1_n52(key STRING, val STRING) SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n52; -CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2_n32(key STRING, val STRING) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2_n32; -- only of the tables of the join (the left table of the join) is skewed -- the skewed filter would still be applied to both the tables -- adding a order by at the end to make the results deterministic EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +SELECT a.*, b.* FROM T1_n52 a JOIN T2_n32 b ON a.key = b.key; -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +SELECT a.*, b.* FROM T1_n52 a JOIN T2_n32 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val; -- the order of the join should not matter, just confirming EXPLAIN -SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key; +SELECT a.*, b.* FROM T2_n32 a JOIN T1_n52 b ON a.key = b.key; -SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +SELECT a.*, b.* FROM T2_n32 a JOIN T1_n52 b ON a.key = b.key ORDER BY a.key, b.key, a.val, b.val;