http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_alter_table_update_columns.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_alter_table_update_columns.q b/ql/src/test/queries/clientpositive/avro_alter_table_update_columns.q index 4b03fff..279d05d 100644 --- a/ql/src/test/queries/clientpositive/avro_alter_table_update_columns.q +++ b/ql/src/test/queries/clientpositive/avro_alter_table_update_columns.q @@ -1,6 +1,6 @@ -- verify schema changes introduced in avro.schema.literal/url sync with HMS if ALTER TABLE UPDATE COLUMNS is called -CREATE TABLE avro_extschema_literal +CREATE TABLE avro_extschema_literal_n1 STORED AS AVRO TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", @@ -11,9 +11,9 @@ CREATE TABLE avro_extschema_literal { "name":"first_name", "type":"string" }, { "name":"last_name", "type":"string" } ] }'); -DESCRIBE avro_extschema_literal; +DESCRIBE avro_extschema_literal_n1; -ALTER TABLE avro_extschema_literal SET +ALTER TABLE avro_extschema_literal_n1 SET TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", "name": "ext_schema", @@ -21,12 +21,12 @@ ALTER TABLE avro_extschema_literal SET "fields": [ { "name":"newCol", "type":"int" } ] }'); -DESCRIBE avro_extschema_literal; +DESCRIBE avro_extschema_literal_n1; -ALTER TABLE avro_extschema_literal UNSET TBLPROPERTIES ('avro.schema.literal'); -DESCRIBE avro_extschema_literal; +ALTER TABLE avro_extschema_literal_n1 UNSET TBLPROPERTIES ('avro.schema.literal'); +DESCRIBE avro_extschema_literal_n1; -ALTER TABLE avro_extschema_literal SET +ALTER TABLE avro_extschema_literal_n1 SET TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", "name": "ext_schema", @@ -34,36 +34,36 @@ ALTER TABLE avro_extschema_literal SET "fields": [ { "name":"newCol", "type":"int" } ] }'); -ALTER TABLE avro_extschema_literal UPDATE COLUMNS CASCADE; -DESCRIBE avro_extschema_literal; +ALTER TABLE avro_extschema_literal_n1 UPDATE COLUMNS CASCADE; +DESCRIBE avro_extschema_literal_n1; -ALTER TABLE avro_extschema_literal UNSET TBLPROPERTIES ('avro.schema.literal'); -DESCRIBE avro_extschema_literal; +ALTER TABLE avro_extschema_literal_n1 UNSET TBLPROPERTIES ('avro.schema.literal'); +DESCRIBE avro_extschema_literal_n1; dfs -cp ${system:hive.root}data/files/grad.avsc ${system:test.tmp.dir}/; dfs -cp ${system:hive.root}data/files/grad2.avsc ${system:test.tmp.dir}/; -CREATE TABLE avro_extschema_url +CREATE TABLE avro_extschema_url_n1 STORED AS AVRO TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/grad.avsc'); -DESCRIBE avro_extschema_url; +DESCRIBE avro_extschema_url_n1; -ALTER TABLE avro_extschema_url SET +ALTER TABLE avro_extschema_url_n1 SET TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/grad2.avsc'); -DESCRIBE avro_extschema_url; +DESCRIBE avro_extschema_url_n1; -ALTER TABLE avro_extschema_url UNSET TBLPROPERTIES ('avro.schema.url'); -DESCRIBE avro_extschema_url; +ALTER TABLE avro_extschema_url_n1 UNSET TBLPROPERTIES ('avro.schema.url'); +DESCRIBE avro_extschema_url_n1; -ALTER TABLE avro_extschema_url SET +ALTER TABLE avro_extschema_url_n1 SET TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/grad2.avsc'); -ALTER TABLE avro_extschema_url UPDATE COLUMNS CASCADE; -DESCRIBE avro_extschema_url; +ALTER TABLE avro_extschema_url_n1 UPDATE COLUMNS CASCADE; +DESCRIBE avro_extschema_url_n1; -ALTER TABLE avro_extschema_url UNSET TBLPROPERTIES ('avro.schema.url'); -DESCRIBE avro_extschema_url; +ALTER TABLE avro_extschema_url_n1 UNSET TBLPROPERTIES ('avro.schema.url'); +DESCRIBE avro_extschema_url_n1;
http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_compression_enabled.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_compression_enabled.q b/ql/src/test/queries/clientpositive/avro_compression_enabled.q index e0fa2b6..cd8095a 100644 --- a/ql/src/test/queries/clientpositive/avro_compression_enabled.q +++ b/ql/src/test/queries/clientpositive/avro_compression_enabled.q @@ -1,7 +1,7 @@ --! qt:dataset:src -- verify that new joins bring in correct schemas (including evolved schemas) -CREATE TABLE doctors4 +CREATE TABLE doctors4_n0 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -36,7 +36,7 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }'); -LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4_n0; set hive.exec.compress.output=true; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_compression_enabled_native.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_compression_enabled_native.q b/ql/src/test/queries/clientpositive/avro_compression_enabled_native.q index b168880f..8c9e47d 100644 --- a/ql/src/test/queries/clientpositive/avro_compression_enabled_native.q +++ b/ql/src/test/queries/clientpositive/avro_compression_enabled_native.q @@ -1,14 +1,14 @@ --! qt:dataset:src -- verify that new joins bring in correct schemas (including evolved schemas) -CREATE TABLE doctors4 ( +CREATE TABLE doctors4_n1 ( number int, first_name string, last_name string, extra_field string) STORED AS AVRO; -LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4_n1; set hive.exec.compress.output=true; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_decimal.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_decimal.q b/ql/src/test/queries/clientpositive/avro_decimal.q index 7dce0a6..3b25097 100644 --- a/ql/src/test/queries/clientpositive/avro_decimal.q +++ b/ql/src/test/queries/clientpositive/avro_decimal.q @@ -1,15 +1,15 @@ -DROP TABLE IF EXISTS `dec`; +DROP TABLE IF EXISTS `dec_n0`; -CREATE TABLE `dec`(name string, value decimal(8,4)); +CREATE TABLE `dec_n0`(name string, value decimal(8,4)); -LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE `dec`; +LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE `dec_n0`; -ANALYZE TABLE `dec` COMPUTE STATISTICS FOR COLUMNS value; -DESC FORMATTED `dec` value; +ANALYZE TABLE `dec_n0` COMPUTE STATISTICS FOR COLUMNS value; +DESC FORMATTED `dec_n0` value; -DROP TABLE IF EXISTS avro_dec; +DROP TABLE IF EXISTS avro_dec_n0; -CREATE TABLE `avro_dec`( +CREATE TABLE `avro_dec_n0`( `name` string COMMENT 'from deserializer', `value` decimal(5,2) COMMENT 'from deserializer') COMMENT 'just drop the schema right into the HQL' @@ -24,15 +24,15 @@ TBLPROPERTIES ( 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":5,\"scale\":2}}]}' ); -DESC avro_dec; +DESC avro_dec_n0; -INSERT OVERWRITE TABLE avro_dec select name, value from `dec`; +INSERT OVERWRITE TABLE avro_dec_n0 select name, value from `dec_n0`; -SELECT * FROM avro_dec; +SELECT * FROM avro_dec_n0; -DROP TABLE IF EXISTS avro_dec1; +DROP TABLE IF EXISTS avro_dec1_n0; -CREATE TABLE `avro_dec1`( +CREATE TABLE `avro_dec1_n0`( `name` string COMMENT 'from deserializer', `value` decimal(4,1) COMMENT 'from deserializer') COMMENT 'just drop the schema right into the HQL' @@ -47,12 +47,12 @@ TBLPROPERTIES ( 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":4,\"scale\":1}}]}' ); -DESC avro_dec1; +DESC avro_dec1_n0; -LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1; +LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1_n0; -select value from avro_dec1; +select value from avro_dec1_n0; -DROP TABLE `dec`; -DROP TABLE avro_dec; -DROP TABLE avro_dec1; +DROP TABLE `dec_n0`; +DROP TABLE avro_dec_n0; +DROP TABLE avro_dec1_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_joins.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_joins.q b/ql/src/test/queries/clientpositive/avro_joins.q index 9d06881..8ada7e2 100644 --- a/ql/src/test/queries/clientpositive/avro_joins.q +++ b/ql/src/test/queries/clientpositive/avro_joins.q @@ -2,7 +2,7 @@ -- verify that new joins bring in correct schemas (including evolved schemas) -CREATE TABLE doctors4 +CREATE TABLE doctors4_n2 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -37,11 +37,11 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }'); -DESCRIBE doctors4; +DESCRIBE doctors4_n2; -LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4_n2; -CREATE TABLE episodes +CREATE TABLE episodes_n3 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -49,7 +49,7 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n3", "type": "record", "fields": [ { @@ -70,11 +70,11 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }'); -DESCRIBE episodes; +DESCRIBE episodes_n3; -LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n3; SELECT e.title, e.air_date, d.first_name, d.last_name, d.extra_field, e.air_date -FROM doctors4 d JOIN episodes e ON (d.number=e.doctor); +FROM doctors4_n2 d JOIN episodes_n3 e ON (d.number=e.doctor); http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_joins_native.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_joins_native.q b/ql/src/test/queries/clientpositive/avro_joins_native.q index ca95c16..4a4be0f 100644 --- a/ql/src/test/queries/clientpositive/avro_joins_native.q +++ b/ql/src/test/queries/clientpositive/avro_joins_native.q @@ -12,15 +12,15 @@ DESCRIBE doctors4; LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; -CREATE TABLE episodes ( +CREATE TABLE episodes_n1 ( title string COMMENT "episode title", air_date string COMMENT "initial date", doctor int COMMENT "main actor playing the Doctor in episode") STORED AS AVRO; -DESCRIBE episodes; +DESCRIBE episodes_n1; -LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n1; SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date -FROM doctors4 d JOIN episodes e ON (d.number=e.doctor); \ No newline at end of file +FROM doctors4 d JOIN episodes_n1 e ON (d.number=e.doctor); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_native.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_native.q b/ql/src/test/queries/clientpositive/avro_native.q index 61d1bc6..9d25ab6 100644 --- a/ql/src/test/queries/clientpositive/avro_native.q +++ b/ql/src/test/queries/clientpositive/avro_native.q @@ -1,14 +1,14 @@ -- SORT_QUERY_RESULTS -- verify that we can actually read avro files -CREATE TABLE doctors ( +CREATE TABLE doctors_n4 ( number int, first_name string, last_name string) STORED AS AVRO; -DESCRIBE doctors; +DESCRIBE doctors_n4; -LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_n4; -SELECT * FROM doctors; \ No newline at end of file +SELECT * FROM doctors_n4; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_partitioned.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_partitioned.q b/ql/src/test/queries/clientpositive/avro_partitioned.q index d475dff..bcc19e8 100644 --- a/ql/src/test/queries/clientpositive/avro_partitioned.q +++ b/ql/src/test/queries/clientpositive/avro_partitioned.q @@ -1,7 +1,7 @@ set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS -- Verify that table scans work with partitioned Avro tables -CREATE TABLE episodes +CREATE TABLE episodes_n2 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -9,7 +9,7 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n2", "type": "record", "fields": [ { @@ -30,9 +30,9 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }'); -LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n2; -CREATE TABLE episodes_partitioned +CREATE TABLE episodes_partitioned_n1 PARTITIONED BY (doctor_pt INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -41,7 +41,7 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n2", "type": "record", "fields": [ { @@ -63,19 +63,19 @@ TBLPROPERTIES ('avro.schema.literal'='{ }'); SET hive.exec.dynamic.partition.mode=nonstrict; -INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; +INSERT OVERWRITE TABLE episodes_partitioned_n1 PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2; -SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; +SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt > 6; -- Verify that Fetch works in addition to Map -SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; +SELECT * FROM episodes_partitioned_n1 ORDER BY air_date LIMIT 5; -- Fetch w/filter to specific partition -SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; +SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 6; -- Fetch w/non-existent partition -SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; +SELECT * FROM episodes_partitioned_n1 WHERE doctor_pt = 7 LIMIT 5; -- Alter table add an empty partition -ALTER TABLE episodes_partitioned ADD PARTITION (doctor_pt=7); -SELECT COUNT(*) FROM episodes_partitioned; +ALTER TABLE episodes_partitioned_n1 ADD PARTITION (doctor_pt=7); +SELECT COUNT(*) FROM episodes_partitioned_n1; -- Verify that reading from an Avro partition works -- even if it has an old schema relative to the current table level schema @@ -87,7 +87,7 @@ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH SERDEPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n2", "type": "record", "fields": [ { @@ -112,14 +112,14 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; -- Insert data into a partition -INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n2; set hive.metastore.disallow.incompatible.col.type.changes=false; -- Evolve the table schema by adding new array field "cast_and_crew" ALTER TABLE episodes_partitioned_serdeproperties SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH SERDEPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n2", "type": "record", "fields": [ { http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_sanity_test.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_sanity_test.q b/ql/src/test/queries/clientpositive/avro_sanity_test.q index 59e914b..4a51e67 100644 --- a/ql/src/test/queries/clientpositive/avro_sanity_test.q +++ b/ql/src/test/queries/clientpositive/avro_sanity_test.q @@ -1,7 +1,7 @@ -- SORT_QUERY_RESULTS -- verify that we can actually read avro files -CREATE TABLE doctors +CREATE TABLE doctors_n1 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -9,7 +9,7 @@ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "doctors", + "name": "doctors_n1", "type": "record", "fields": [ { @@ -30,9 +30,9 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }'); -DESCRIBE doctors; +DESCRIBE doctors_n1; -LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_n1; -SELECT * FROM doctors; +SELECT * FROM doctors_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q b/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q index efeb167..7ba35b9 100644 --- a/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q +++ b/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q @@ -2,15 +2,15 @@ set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS -- Verify that table scans work with partitioned Avro tables -CREATE TABLE episodes ( +CREATE TABLE episodes_n0 ( title string COMMENT "episode title", air_date string COMMENT "initial date", doctor int COMMENT "main actor playing the Doctor in episode") STORED AS AVRO; -LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes_n0; -CREATE TABLE episodes_partitioned ( +CREATE TABLE episodes_partitioned_n0 ( title string COMMENT "episode title", air_date string COMMENT "initial date", doctor int COMMENT "main actor playing the Doctor in episode") @@ -18,16 +18,16 @@ PARTITIONED BY (doctor_pt INT) STORED AS AVRO; SET hive.exec.dynamic.partition.mode=nonstrict; -INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) -SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; -DESCRIBE FORMATTED episodes_partitioned; +INSERT OVERWRITE TABLE episodes_partitioned_n0 PARTITION (doctor_pt) +SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes_n0; +DESCRIBE FORMATTED episodes_partitioned_n0; -ALTER TABLE episodes_partitioned +ALTER TABLE episodes_partitioned_n0 SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH SERDEPROPERTIES ('avro.schema.literal'='{ "namespace": "testing.hive.avro.serde", - "name": "episodes", + "name": "episodes_n0", "type": "record", "fields": [ { @@ -53,29 +53,29 @@ SERDEPROPERTIES ('avro.schema.literal'='{ } ] }'); -DESCRIBE FORMATTED episodes_partitioned; +DESCRIBE FORMATTED episodes_partitioned_n0; set hive.fetch.task.conversion=more; EXPLAIN -SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt > 6; -SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt > 6; -- Verify that Fetch works in addition to Map -SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; +SELECT * FROM episodes_partitioned_n0 ORDER BY air_date LIMIT 5; -- Fetch w/filter to specific partition -SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt = 6; -- Fetch w/non-existent partition -SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt = 7 LIMIT 5; set hive.fetch.task.conversion=none; EXPLAIN -SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt > 6; -SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt > 6; -SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; -SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; -SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; \ No newline at end of file +SELECT * FROM episodes_partitioned_n0 ORDER BY air_date LIMIT 5; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt = 6; +SELECT * FROM episodes_partitioned_n0 WHERE doctor_pt = 7 LIMIT 5; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q index e6b75c6..0f57607 100644 --- a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q +++ b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q @@ -3,7 +3,7 @@ SET hive.optimize.update.table.properties.from.serde=true; dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; -CREATE TABLE avro_extschema_literal +CREATE TABLE avro_extschema_literal_n0 STORED AS AVRO TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", @@ -14,18 +14,18 @@ TBLPROPERTIES ('avro.schema.literal'='{ { "name":"col2", "type":"long" }, { "name":"col3", "type":"string" } ] }'); -INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2'); +INSERT INTO TABLE avro_extschema_literal_n0 VALUES('s1', 1, 's2'); -DESCRIBE EXTENDED avro_extschema_literal; -SELECT * FROM avro_extschema_literal; +DESCRIBE EXTENDED avro_extschema_literal_n0; +SELECT * FROM avro_extschema_literal_n0; -CREATE TABLE avro_extschema_url +CREATE TABLE avro_extschema_url_n0 STORED AS AVRO TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc'); -INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2'); +INSERT INTO TABLE avro_extschema_url_n0 VALUES('s1', 1, 's2'); -DESCRIBE EXTENDED avro_extschema_url; -SELECT * FROM avro_extschema_url; +DESCRIBE EXTENDED avro_extschema_url_n0; +SELECT * FROM avro_extschema_url_n0; CREATE TABLE avro_extschema_literal1 ROW FORMAT SERDE http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avrocountemptytbl.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avrocountemptytbl.q b/ql/src/test/queries/clientpositive/avrocountemptytbl.q index 98c3a45..163272a 100644 --- a/ql/src/test/queries/clientpositive/avrocountemptytbl.q +++ b/ql/src/test/queries/clientpositive/avrocountemptytbl.q @@ -1,11 +1,11 @@ --! qt:dataset:src -- SORT_QUERY_RESULTS -drop table if exists emptyavro; -create table emptyavro (a int) stored as avro; -select count(*) from emptyavro; -insert into emptyavro select count(*) from emptyavro; -select count(*) from emptyavro; -insert into emptyavro select key from src where key = 100 limit 1; -select * from emptyavro; +drop table if exists emptyavro_n0; +create table emptyavro_n0 (a int) stored as avro; +select count(*) from emptyavro_n0; +insert into emptyavro_n0 select count(*) from emptyavro_n0; +select count(*) from emptyavro_n0; +insert into emptyavro_n0 select key from src where key = 100 limit 1; +select * from emptyavro_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/avrotblsjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avrotblsjoin.q b/ql/src/test/queries/clientpositive/avrotblsjoin.q index 8c1f084..dedd137 100644 --- a/ql/src/test/queries/clientpositive/avrotblsjoin.q +++ b/ql/src/test/queries/clientpositive/avrotblsjoin.q @@ -1,10 +1,10 @@ -drop table if exists table1; +drop table if exists table1_n1; drop table if exists table1_1; dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; dfs -cp ${system:hive.root}data/files/table1_1.avsc ${system:test.tmp.dir}/; -create table table1 +create table table1_n1 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT @@ -20,9 +20,9 @@ create table table1_1 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1_1.avsc'); -insert into table1 values ("1", "2", "3"); +insert into table1_n1 values ("1", "2", "3"); insert into table1_1 values (1, "2"); set hive.auto.convert.join=false; set hive.strict.checks.type.safety=false; set hive.mapred.mode=nonstrict; -select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1"; +select table1_n1.col1, table1_1.* from table1_n1 join table1_1 on table1_n1.col1=table1_1.col1 where table1_1.col1="1"; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/ba_table1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ba_table1.q b/ql/src/test/queries/clientpositive/ba_table1.q index cce7ccb..158d987 100644 --- a/ql/src/test/queries/clientpositive/ba_table1.q +++ b/ql/src/test/queries/clientpositive/ba_table1.q @@ -1,16 +1,16 @@ --! qt:dataset:src -- SORT_QUERY_RESULTS -drop table ba_test; +drop table ba_test_n4; -- This query tests a) binary type works correctly in grammar b) string can be cast into binary c) binary can be stored in a table d) binary data can be loaded back again and queried d) order-by on a binary key -create table ba_test (ba_key binary, ba_val binary) ; +create table ba_test_n4 (ba_key binary, ba_val binary) ; -describe extended ba_test; +describe extended ba_test_n4; -from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary); +from src insert overwrite table ba_test_n4 select cast (src.key as binary), cast (src.value as binary); -select * from ba_test tablesample (10 rows); +select * from ba_test_n4 tablesample (10 rows); -drop table ba_test; +drop table ba_test_n4; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/ba_table2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ba_table2.q b/ql/src/test/queries/clientpositive/ba_table2.q index 71689f0..570f846 100644 --- a/ql/src/test/queries/clientpositive/ba_table2.q +++ b/ql/src/test/queries/clientpositive/ba_table2.q @@ -1,19 +1,19 @@ --! qt:dataset:src -- SORT_QUERY_RESULTS -drop table ba_test; +drop table ba_test_n3; -- All the test in ba_test1.q + using LazyBinarySerde instead of LazySimpleSerde -create table ba_test (ba_key binary, ba_val binary) ; -alter table ba_test set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'; +create table ba_test_n3 (ba_key binary, ba_val binary) ; +alter table ba_test_n3 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'; -describe extended ba_test; +describe extended ba_test_n3; -from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary); +from src insert overwrite table ba_test_n3 select cast (src.key as binary), cast (src.value as binary); -select * from ba_test tablesample (10 rows); +select * from ba_test_n3 tablesample (10 rows); -drop table ba_test; +drop table ba_test_n3; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/ba_table3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ba_table3.q b/ql/src/test/queries/clientpositive/ba_table3.q index 0423327..6271eb7 100644 --- a/ql/src/test/queries/clientpositive/ba_table3.q +++ b/ql/src/test/queries/clientpositive/ba_table3.q @@ -1,14 +1,14 @@ --! qt:dataset:src -drop table ba_test; +drop table ba_test_n2; -- All the tests of ba_table1.q + test for a group-by and aggregation on a binary key. -create table ba_test (ba_key binary, ba_val binary) ; +create table ba_test_n2 (ba_key binary, ba_val binary) ; -from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary); +from src insert overwrite table ba_test_n2 select cast (src.key as binary), cast (src.value as binary); -select ba_test.ba_key, count(ba_test.ba_val) from ba_test group by ba_test.ba_key order by ba_key limit 5; +select ba_test_n2.ba_key, count(ba_test_n2.ba_val) from ba_test_n2 group by ba_test_n2.ba_key order by ba_key limit 5; -drop table ba_test; +drop table ba_test_n2; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/ba_table_udfs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ba_table_udfs.q b/ql/src/test/queries/clientpositive/ba_table_udfs.q index 676e06a..73c01aa 100644 --- a/ql/src/test/queries/clientpositive/ba_table_udfs.q +++ b/ql/src/test/queries/clientpositive/ba_table_udfs.q @@ -3,11 +3,11 @@ USE default; -CREATE TABLE dest1(bytes1 BINARY, +CREATE TABLE dest1_n146(bytes1 BINARY, bytes2 BINARY, string STRING); -FROM src INSERT OVERWRITE TABLE dest1 +FROM src INSERT OVERWRITE TABLE dest1_n146 SELECT CAST(key AS BINARY), CAST(value AS BINARY), @@ -16,7 +16,7 @@ ORDER BY value LIMIT 100; --Add in a null row for good measure -INSERT INTO TABLE dest1 SELECT NULL, NULL, NULL FROM dest1 LIMIT 1; +INSERT INTO TABLE dest1_n146 SELECT NULL, NULL, NULL FROM dest1_n146 LIMIT 1; -- this query tests all the udfs provided to work with binary types @@ -35,4 +35,4 @@ SELECT UNBASE64(BASE64(bytes1)), HEX(ENCODE(string, 'US-ASCII')), DECODE(ENCODE(string, 'US-ASCII'), 'US-ASCII') -FROM dest1; +FROM dest1_n146; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/binary_output_format.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/binary_output_format.q b/ql/src/test/queries/clientpositive/binary_output_format.q index 9ead0cc..3c067ac 100644 --- a/ql/src/test/queries/clientpositive/binary_output_format.q +++ b/ql/src/test/queries/clientpositive/binary_output_format.q @@ -1,6 +1,6 @@ --! qt:dataset:src -- Create a table with binary output format -CREATE TABLE dest1(mydata STRING) +CREATE TABLE dest1_n109(mydata STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( @@ -12,7 +12,7 @@ STORED AS -- Insert into that table using transform EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest1 +INSERT OVERWRITE TABLE dest1_n109 SELECT TRANSFORM(*) USING 'cat' AS mydata STRING @@ -24,7 +24,7 @@ SELECT TRANSFORM(*) RECORDREADER 'org.apache.hadoop.hive.ql.exec.BinaryRecordReader' FROM src; -INSERT OVERWRITE TABLE dest1 +INSERT OVERWRITE TABLE dest1_n109 SELECT TRANSFORM(*) USING 'cat' AS mydata STRING @@ -37,4 +37,4 @@ SELECT TRANSFORM(*) FROM src; -- Test the result -SELECT * FROM dest1; +SELECT * FROM dest1_n109; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/binary_table_bincolserde.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/binary_table_bincolserde.q b/ql/src/test/queries/clientpositive/binary_table_bincolserde.q index 7b99e40..f842a5d 100644 --- a/ql/src/test/queries/clientpositive/binary_table_bincolserde.q +++ b/ql/src/test/queries/clientpositive/binary_table_bincolserde.q @@ -1,17 +1,17 @@ --! qt:dataset:src -drop table ba_test; +drop table ba_test_n1; -- Tests everything in binary_table_colserde.q + uses LazyBinaryColumnarSerde -create table ba_test (ba_key binary, ba_val binary) stored as rcfile; -alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +create table ba_test_n1 (ba_key binary, ba_val binary) stored as rcfile; +alter table ba_test_n1 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; -describe extended ba_test; +describe extended ba_test_n1; -from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary); +from src insert overwrite table ba_test_n1 select cast (src.key as binary), cast (src.value as binary); -select ba_key, ba_val from ba_test order by ba_key limit 10; +select ba_key, ba_val from ba_test_n1 order by ba_key limit 10; -drop table ba_test; +drop table ba_test_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/binary_table_colserde.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/binary_table_colserde.q b/ql/src/test/queries/clientpositive/binary_table_colserde.q index 71a4b22..ecd259a 100644 --- a/ql/src/test/queries/clientpositive/binary_table_colserde.q +++ b/ql/src/test/queries/clientpositive/binary_table_colserde.q @@ -1,17 +1,17 @@ --! qt:dataset:src -drop table ba_test; +drop table ba_test_n0; -- Everything in ba_table1.q + columnar serde in RCFILE. -create table ba_test (ba_key binary, ba_val binary) stored as rcfile; -alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; +create table ba_test_n0 (ba_key binary, ba_val binary) stored as rcfile; +alter table ba_test_n0 set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; -describe extended ba_test; +describe extended ba_test_n0; -from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary); +from src insert overwrite table ba_test_n0 select cast (src.key as binary), cast (src.value as binary); -select ba_key, ba_val from ba_test order by ba_key limit 10; +select ba_key, ba_val from ba_test_n0 order by ba_key limit 10; -drop table ba_test; +drop table ba_test_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/binarysortable_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/binarysortable_1.q b/ql/src/test/queries/clientpositive/binarysortable_1.q index 39c1d25..63d7424 100644 --- a/ql/src/test/queries/clientpositive/binarysortable_1.q +++ b/ql/src/test/queries/clientpositive/binarysortable_1.q @@ -1,21 +1,21 @@ -CREATE TABLE mytable(key STRING, value STRING) +CREATE TABLE mytable_n0(key STRING, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '9' STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable; +LOAD DATA LOCAL INPATH '../../data/files/string.txt' INTO TABLE mytable_n0; EXPLAIN SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value FROM ( SELECT key, sum(value) as value - FROM mytable + FROM mytable_n0 GROUP BY key ) a; SELECT REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(key, '\001', '^A'), '\0', '^@'), '\002', '^B'), value FROM ( SELECT key, sum(value) as value - FROM mytable + FROM mytable_n0 GROUP BY key ) a; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q b/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q index 956a61f..cc4fc47 100644 --- a/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q +++ b/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q @@ -1,13 +1,13 @@ dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter; -create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1'; -Create table t2 (dt string) stored as orc; +create table t1_n99 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1'; +Create table t2_n62 (dt string) stored as orc; dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS; SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; SET hive.optimize.bucketmapjoin=true; -SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt); +SELECT /*+ MAPJOIN(b) */ a.dt FROM t1_n99 a JOIN t2_n62 b ON (a.dt = b.dt); SET hive.optimize.bucketmapjoin=false; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_1.q b/ql/src/test/queries/clientpositive/bucket_map_join_1.q index f170a71..58aa265 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_1.q @@ -1,18 +1,18 @@ set hive.strict.checks.bucketing=false; -drop table table1; -drop table table2; +drop table table1_n9; +drop table table2_n5; ; -create table table1(key string, value string) clustered by (key, value) +create table table1_n9(key string, value string) clustered by (key, value) sorted by (key, value) into 1 BUCKETS stored as textfile; -create table table2(key string, value string) clustered by (value, key) +create table table2_n5(key string, value string) clustered by (value, key) sorted by (value, key) into 1 BUCKETS stored as textfile; -load data local inpath '../../data/files/SortCol1Col2/000000_0' overwrite into table table1; -load data local inpath '../../data/files/SortCol2Col1/000000_0' overwrite into table table2; +load data local inpath '../../data/files/SortCol1Col2/000000_0' overwrite into table table1_n9; +load data local inpath '../../data/files/SortCol2Col1/000000_0' overwrite into table table2_n5; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; @@ -22,7 +22,7 @@ set hive.cbo.enable=false; -- Neither bucketed map-join, nor sort-merge join should be performed explain extended -select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value; +select /*+ mapjoin(b) */ count(*) from table1_n9 a join table2_n5 b on a.key=b.key and a.value=b.value; -select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value; +select /*+ mapjoin(b) */ count(*) from table1_n9 a join table2_n5 b on a.key=b.key and a.value=b.value; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_spark1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_spark1.q b/ql/src/test/queries/clientpositive/bucket_map_join_spark1.q index 16b555d..778a468 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_spark1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_spark1.q @@ -2,56 +2,56 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_n19 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n19 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n19 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n19 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n19 partition(ds='2008-04-08'); -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_2_n16 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2_n16 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2_n16 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n7 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n7 (key bigint , value1 bigint, value2 bigint); set hive.auto.convert.join = true; set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n9 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n19 a join srcbucket_mapjoin_part_2_n16 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n19 a join srcbucket_mapjoin_part_2_n16 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n9; +insert overwrite table bucketmapjoin_hash_result_1_n7 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n9; set hive.optimize.bucketmapjoin = false; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n19 a join srcbucket_mapjoin_part_2_n16 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n9 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n19 a join srcbucket_mapjoin_part_2_n16 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n9; +insert overwrite table bucketmapjoin_hash_result_1_n7 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n9; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_spark2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_spark2.q b/ql/src/test/queries/clientpositive/bucket_map_join_spark2.q index d8b6561..9e3222d 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_spark2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_spark2.q @@ -2,54 +2,54 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_n12 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n12 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n12 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n12 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n12 partition(ds='2008-04-08'); -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_2_n10 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n10 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n10 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n3 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n3 (key bigint , value1 bigint, value2 bigint); set hive.auto.convert.join = true; set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n5 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n12 a join srcbucket_mapjoin_part_2_n10 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n12 a join srcbucket_mapjoin_part_2_n10 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n5; +insert overwrite table bucketmapjoin_hash_result_1_n3 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n5; set hive.optimize.bucketmapjoin = true; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n12 a join srcbucket_mapjoin_part_2_n10 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n5 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n12 a join srcbucket_mapjoin_part_2_n10 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n5; +insert overwrite table bucketmapjoin_hash_result_1_n3 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n5; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_spark3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_spark3.q b/ql/src/test/queries/clientpositive/bucket_map_join_spark3.q index 2adb777..d68a4a8 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_spark3.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_spark3.q @@ -2,54 +2,54 @@ SET hive.vectorized.execution.enabled=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_n4 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_n4 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_n4 partition(ds='2008-04-08'); -CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +CREATE TABLE srcbucket_mapjoin_part_2_n3 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_2_n3 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_2_n3 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_2_n3 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_2_n3 partition(ds='2008-04-08'); -create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); -create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_1_n1 (key bigint , value1 bigint, value2 bigint); +create table bucketmapjoin_hash_result_2_n1 (key bigint , value1 bigint, value2 bigint); set hive.auto.convert.join = true; set hive.optimize.bucketmapjoin = true; -create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); +create table bucketmapjoin_tmp_result_n1 (key string , value1 string, value2 string); explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n4 a join srcbucket_mapjoin_part_2_n3 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n4 a join srcbucket_mapjoin_part_2_n3 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n1; +insert overwrite table bucketmapjoin_hash_result_1_n1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n1; set hive.optimize.bucketmapjoin = false; explain extended -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n4 a join srcbucket_mapjoin_part_2_n3 b on a.key=b.key and b.ds="2008-04-08"; -insert overwrite table bucketmapjoin_tmp_result +insert overwrite table bucketmapjoin_tmp_result_n1 select a.key, a.value, b.value -from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +from srcbucket_mapjoin_part_n4 a join srcbucket_mapjoin_part_2_n3 b on a.key=b.key and b.ds="2008-04-08"; -select count(1) from bucketmapjoin_tmp_result; -insert overwrite table bucketmapjoin_hash_result_1 -select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result; +select count(1) from bucketmapjoin_tmp_result_n1; +insert overwrite table bucketmapjoin_hash_result_1_n1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_spark4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_spark4.q b/ql/src/test/queries/clientpositive/bucket_map_join_spark4.q index f3dc097..ffc64e3 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_spark4.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_spark4.q @@ -7,14 +7,14 @@ set hive.exec.reducers.max = 1; -- SORT_QUERY_RESULTS -CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE tbl1_n0(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE tbl2_n0(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; CREATE TABLE tbl3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -insert overwrite table tbl1 +insert overwrite table tbl1_n0 select * from src where key < 10; -insert overwrite table tbl2 +insert overwrite table tbl2_n0 select * from src where key < 10; insert overwrite table tbl3 @@ -30,16 +30,16 @@ set hive.optimize.bucketmapjoin = true; explain extended select a.key as key, a.value as val1, b.value as val2, c.value as val3 -from tbl1 a join tbl2 b on a.key = b.key join tbl3 c on a.value = c.value; +from tbl1_n0 a join tbl2_n0 b on a.key = b.key join tbl3 c on a.value = c.value; select a.key as key, a.value as val1, b.value as val2, c.value as val3 -from tbl1 a join tbl2 b on a.key = b.key join tbl3 c on a.value = c.value; +from tbl1_n0 a join tbl2_n0 b on a.key = b.key join tbl3 c on a.value = c.value; set hive.optimize.bucketmapjoin = false; explain extended select a.key as key, a.value as val1, b.value as val2, c.value as val3 -from tbl1 a join tbl2 b on a.key = b.key join tbl3 c on a.value = c.value; +from tbl1_n0 a join tbl2_n0 b on a.key = b.key join tbl3 c on a.value = c.value; select a.key as key, a.value as val1, b.value as val2, c.value as val3 -from tbl1 a join tbl2 b on a.key = b.key join tbl3 c on a.value = c.value; +from tbl1_n0 a join tbl2_n0 b on a.key = b.key join tbl3 c on a.value = c.value; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index 5622ce2..8248035 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -6,46 +6,46 @@ set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; -CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_n14(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part_n9 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part_n15 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n14 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n14 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); set hive.optimize.bucketingsorting=false; -insert overwrite table tab_part partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin_part; +insert overwrite table tab_part_n9 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part_n15; -CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin; +CREATE TABLE tab_n8(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab_n8 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n14; -analyze table srcbucket_mapjoin compute statistics for columns; -analyze table srcbucket_mapjoin_part compute statistics for columns; -analyze table tab compute statistics for columns; -analyze table tab_part compute statistics for columns; +analyze table srcbucket_mapjoin_n14 compute statistics for columns; +analyze table srcbucket_mapjoin_part_n15 compute statistics for columns; +analyze table tab_n8 compute statistics for columns; +analyze table tab_part_n9 compute statistics for columns; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key order by a.key, a.value, b.value; +from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value; select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key order by a.key, a.value, b.value; +from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key order by a.key, a.value, b.value; +from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value; select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key order by a.key, a.value, b.value; +from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value; set hive.auto.convert.join.noconditionaltask.size=900; @@ -53,228 +53,228 @@ set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from -(select distinct key from tab_part) a join tab b on a.key = b.key; +(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key; select count(*) from -(select distinct key from tab_part) a join tab b on a.key = b.key; +(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select count(*) from -(select distinct key from tab_part) a join tab b on a.key = b.key; +(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key; select count(*) from -(select distinct key from tab_part) a join tab b on a.key = b.key; +(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c join -tab_part d on c.key = d.key; +tab_part_n9 d on c.key = d.key; select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c join -tab_part d on c.key = d.key; +tab_part_n9 d on c.key = d.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c join -tab_part d on c.key = d.key; +tab_part_n9 d on c.key = d.key; select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c join -tab_part d on c.key = d.key; +tab_part_n9 d on c.key = d.key; set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from -tab_part d +tab_part_n9 d join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key; select count(*) from -tab_part d +tab_part_n9 d join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select count(*) from -tab_part d +tab_part_n9 d join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key; select count(*) from -tab_part d +tab_part_n9 d join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key; --- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table. +-- one side is really bucketed. srcbucket_mapjoin_n14 is not really a bucketed table. -- In this case the sub-query is chosen as the big table. set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.join.noconditionaltask.size=1000; explain select a.k1, a.v1, b.value -from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab b on a.k1 = b.key; +from (select sum(substr(srcbucket_mapjoin_n14.value,5)) as v1, key as k1 from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a +join tab_n8 b on a.k1 = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.k1, a.v1, b.value -from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a - join tab b on a.k1 = b.key; +from (select sum(substr(srcbucket_mapjoin_n14.value,5)) as v1, key as k1 from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a + join tab_n8 b on a.k1 = b.key; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.k1, a.v1, b.value -from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a -join tab b on a.k1 = b.key; +from (select sum(substr(tab_n8.value,5)) as v1, key as k1 from tab_part_n9 join tab_n8 on tab_part_n9.key = tab_n8.key GROUP BY tab_n8.key) a +join tab_n8 b on a.k1 = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.k1, a.v1, b.value -from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a - join tab b on a.k1 = b.key; +from (select sum(substr(tab_n8.value,5)) as v1, key as k1 from tab_part_n9 join tab_n8 on tab_part_n9.key = tab_n8.key GROUP BY tab_n8.key) a + join tab_n8 b on a.k1 = b.key; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.k1, a.v1, b.value -from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a -join tab_part b on a.k1 = b.key; +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab_n8 x join tab_n8 y on x.key = y.key GROUP BY x.key) a +join tab_part_n9 b on a.k1 = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.k1, a.v1, b.value -from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a - join tab_part b on a.k1 = b.key; +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab_n8 x join tab_n8 y on x.key = y.key GROUP BY x.key) a + join tab_part_n9 b on a.k1 = b.key; -- multi-way join set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.join.noconditionaltask.size=20000; explain select a.key, a.value, b.value -from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key; +from tab_part_n9 a join tab_n8 b on a.key = b.key join tab_n8 c on a.key = c.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key; +from tab_part_n9 a join tab_n8 b on a.key = b.key join tab_n8 c on a.key = c.key; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, c.value -from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key; +from (select x.key, x.value from tab_part_n9 x join tab_n8 y on x.key = y.key) a join tab_n8 c on a.key = c.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, c.value -from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key; +from (select x.key, x.value from tab_part_n9 x join tab_n8 y on x.key = y.key) a join tab_n8 c on a.key = c.key; -- in this case sub-query is the small table set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.join.noconditionaltask.size=900; explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key; +from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a +join tab_part_n9 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a - join tab_part b on a.key = b.key; +from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a + join tab_part_n9 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = false; set hive.map.aggr=false; explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key; +from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a +join tab_part_n9 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a - join tab_part b on a.key = b.key; +from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a + join tab_part_n9 b on a.key = b.key; -- join on non-bucketed column results in shuffle join. set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value -from tab a join tab_part b on a.value = b.value; +from tab_n8 a join tab_part_n9 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab a join tab_part b on a.value = b.value; +from tab_n8 a join tab_part_n9 b on a.value = b.value; -CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -insert overwrite table tab1 -select key,value from srcbucket_mapjoin; +CREATE TABLE tab1_n4(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab1_n4 +select key,value from srcbucket_mapjoin_n14; set hive.auto.convert.join.noconditionaltask.size=20000; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value -from tab1 a join tab_part b on a.key = b.key; +from tab1_n4 a join tab_part_n9 b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab1 a join tab_part b on a.key = b.key; +from tab1_n4 a join tab_part_n9 b on a.key = b.key; -- No map joins should be created. set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.join.noconditionaltask.size=15000; -explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; -explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = false; -- This wont have any effect as the column ds is partition column which is not bucketed. explain select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key and a.ds = b.ds; +from tab_n8 a join tab_part_n9 b on a.key = b.key and a.ds = b.ds; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key and a.ds = b.ds; +from tab_n8 a join tab_part_n9 b on a.key = b.key and a.ds = b.ds; -- HIVE-17792 : Enable Bucket Map Join when there are extra keys other than bucketed columns set hive.auto.convert.join.noconditionaltask.size=20000; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value - from tab a join tab_part b on a.key = b.key and a.value = b.value; + from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value; select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key and a.value = b.value +from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value order by a.key, a.value, b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value - from tab a join tab_part b on a.key = b.key and a.value = b.value; + from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value; select a.key, a.value, b.value -from tab a join tab_part b on a.key = b.key and a.value = b.value +from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value order by a.key, a.value, b.value; -- With non-bucketed small table -CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; -insert overwrite table tab2 partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin; -analyze table tab2 compute statistics for columns; +CREATE TABLE tab2_n4(key int, value string) PARTITIONED BY(ds STRING) STORED AS TEXTFILE; +insert overwrite table tab2_n4 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n14; +analyze table tab2_n4 compute statistics for columns; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value - from tab2 a join tab_part b on a.key = b.key and a.value = b.value; + from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value; select a.key, a.value, b.value -from tab2 a join tab_part b on a.key = b.key and a.value = b.value +from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value order by a.key, a.value, b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value - from tab2 a join tab_part b on a.key = b.key and a.value = b.value; + from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value; select a.key, a.value, b.value -from tab2 a join tab_part b on a.key = b.key and a.value = b.value +from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value order by a.key, a.value, b.value; http://git-wip-us.apache.org/repos/asf/hive/blob/38d3b8e1/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index 0ee49fc..7af8854 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -8,94 +8,94 @@ set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; -CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part_n11 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part_n20 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n18 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n18 partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); -load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n20 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n20 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n20 partition(ds='2008-04-08'); +load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n20 partition(ds='2008-04-08'); set hive.optimize.bucketingsorting=false; -insert overwrite table tab_part partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin_part; +insert overwrite table tab_part_n11 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part_n20; -CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin; +CREATE TABLE tab_n10(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab_n10 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_n18; -analyze table srcbucket_mapjoin compute statistics for columns; -analyze table srcbucket_mapjoin_part compute statistics for columns; -analyze table tab compute statistics for columns; -analyze table tab_part compute statistics for columns; +analyze table srcbucket_mapjoin_n18 compute statistics for columns; +analyze table srcbucket_mapjoin_part_n20 compute statistics for columns; +analyze table tab_n10 compute statistics for columns; +analyze table tab_part_n11 compute statistics for columns; set hive.auto.convert.join.noconditionaltask.size=1500; set hive.convert.join.bucket.mapjoin.tez = false; -explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +explain select a.key, b.key from tab_part_n11 a join tab_part_n11 c on a.key = c.key join tab_part_n11 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; -explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +explain select a.key, b.key from tab_part_n11 a join tab_part_n11 c on a.key = c.key join tab_part_n11 b on a.value = b.value; -CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; -insert overwrite table tab1 -select key,value from srcbucket_mapjoin; -analyze table tab1 compute statistics for columns; +CREATE TABLE tab1_n5(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab1_n5 +select key,value from srcbucket_mapjoin_n18; +analyze table tab1_n5 compute statistics for columns; -- A negative test as src is not bucketed. set hive.auto.convert.join.noconditionaltask.size=20000; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value -from tab1 a join src b on a.key = b.key; +from tab1_n5 a join src b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value -from tab1 a join src b on a.key = b.key; +from tab1_n5 a join src b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=500; set hive.convert.join.bucket.mapjoin.tez = false; explain -select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain -select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = false; explain -select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a left outer join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain -select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a left outer join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = false; explain -select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a right outer join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.convert.join.bucket.mapjoin.tez = true; explain -select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key; +select a.key, b.key from (select key from tab_part_n11 where key > 1) a right outer join (select key from tab_part_n11 where key > 2) b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=300; set hive.convert.join.bucket.mapjoin.tez = false; -explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key; +explain select a.key, b.key from (select distinct key from tab_n10) a join tab_n10 b on b.key = a.key; set hive.convert.join.bucket.mapjoin.tez = true; -explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key; +explain select a.key, b.key from (select distinct key from tab_n10) a join tab_n10 b on b.key = a.key; set hive.convert.join.bucket.mapjoin.tez = false; -explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value; +explain select a.value, b.value from (select distinct value from tab_n10) a join tab_n10 b on b.key = a.value; set hive.convert.join.bucket.mapjoin.tez = true; -explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value; +explain select a.value, b.value from (select distinct value from tab_n10) a join tab_n10 b on b.key = a.value; --multi key CREATE TABLE tab_part1 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE; insert overwrite table tab_part1 partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin_part; +select key,value from srcbucket_mapjoin_part_n20; analyze table tab_part1 compute statistics for columns; set hive.auto.convert.join.noconditionaltask.size=20000; @@ -103,12 +103,12 @@ set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from -(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value; +(select distinct key,value from tab_part_n11) a join tab_n10 b on a.key = b.key and a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select count(*) from -(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value; +(select distinct key,value from tab_part_n11) a join tab_n10 b on a.key = b.key and a.value = b.value; --HIVE-17939 @@ -122,10 +122,10 @@ explain select small.i, big.i from small,big where small.i=big.i; select small.i, big.i from small,big where small.i=big.i order by small.i, big.i; -- Bucket map join disabled for external tables --- Create external table equivalent of tab_part +-- Create external table equivalent of tab_part_n11 CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; insert overwrite table tab_part_ext partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin_part; +select key,value from srcbucket_mapjoin_part_n20; analyze table tab_part_ext compute statistics for columns; set hive.auto.convert.join.noconditionaltask.size=1500; @@ -133,7 +133,7 @@ set hive.convert.join.bucket.mapjoin.tez = true; set hive.disable.unsafe.external.table.operations=true; set test.comment=Bucket map join should work here; set test.comment; -explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +explain select a.key, b.key from tab_part_n11 a join tab_part_n11 c on a.key = c.key join tab_part_n11 b on a.value = b.value; set test.comment=External tables, bucket map join should be disabled; set test.comment;