[29/50] [abbrv] hive git commit: HIVE-17216: Additional qtests for HoS DPP (Sahil Takiar, reviewed by Sergio Pena)

weiz Wed, 06 Sep 2017 22:59:24 -0700

HIVE-17216: Additional qtests for HoS DPP (Sahil Takiar, reviewed by Sergio 
Pena)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/642acdf7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/642acdf7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/642acdf7

Branch: refs/heads/hive-14535
Commit: 642acdf76a46d7bb5bd036510d5bcdaa85e9f613
Parents: 3573549
Author: Sahil Takiar <takiar.sa...@gmail.com>
Authored: Thu Aug 31 09:05:16 2017 -0700
Committer: Sahil Takiar <stak...@cloudera.com>
Committed: Thu Aug 31 09:06:10 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    1 +
 .../clientpositive/spark_constprog_dpp.q        |   17 +
 .../spark_dynamic_partition_pruning.q           |   40 +-
 .../spark_dynamic_partition_pruning_3.q         |  236 ++-
 .../spark/spark_constprog_dpp.q.out             |  138 ++
 .../spark/spark_dynamic_partition_pruning.q.out |  351 ++--
 .../spark_dynamic_partition_pruning_3.q.out     | 1781 ++++++++++++++++--
 7 files changed, 2272 insertions(+), 292 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index fa6a2aa..6f2efa7 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1403,6 +1403,7 @@ 
miniSparkOnYarn.only.query.files=spark_combine_equivalent_work.q,\
   spark_dynamic_partition_pruning_2.q,\
   spark_dynamic_partition_pruning_3.q,\
   spark_dynamic_partition_pruning_mapjoin_only.q,\
+  spark_constprog_dpp.q,\
   dynamic_rdd_cache.q, \
   spark_multi_insert_parallel_orderby.q,\
   spark_explainuser_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/ql/src/test/queries/clientpositive/spark_constprog_dpp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_constprog_dpp.q 
b/ql/src/test/queries/clientpositive/spark_constprog_dpp.q
new file mode 100644
index 0000000..cbe6aa2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/spark_constprog_dpp.q
@@ -0,0 +1,17 @@
+set hive.mapred.mode=nonstrict;
+set hive.optimize.constant.propagation=true;
+set hive.spark.dynamic.partition.pruning=true;
+
+drop table if exists tb1;
+create table tb1 (id int);
+
+drop table if exists tb2;
+create table tb2 (id smallint);
+
+explain
+select a.id from tb1 a
+left outer join
+(select id from tb2
+union all
+select 2 as id from tb2 limit 1) b
+on a.id=b.id;

http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q 
b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
index 78e7515..71a7399 100644
--- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
@@ -16,7 +16,7 @@ create table srcpart_hour as select hr as hr, hr as hour from 
srcpart group by h
 create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr 
as hour from srcpart group by ds, hr;
 create table srcpart_double_hour as select (hr*2) as hr, hr as hour from 
srcpart group by hr;
 
--- single column, single key
+-- single column, single key -- join a partitioned table to a non-partitioned 
table, static filter on the non-partitioned table
 EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.spark.dynamic.partition.pruning=false;
@@ -41,8 +41,8 @@ select count(*) from srcpart join srcpart_date on 
abs(negative(cast(concat(cast(
 EXPLAIN select count(*) from srcpart join srcpart_date on cast(day(srcpart.ds) 
as smallint) = cast(day(srcpart_date.ds) as decimal) where srcpart_date.`date` 
= '2008-04-08';
 select count(*) from srcpart join srcpart_date on cast(day(srcpart.ds) as 
smallint) = cast(day(srcpart_date.ds) as decimal) where srcpart_date.`date` = 
'2008-04-08';
 
--- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+-- multiple sources, single key -- filter partitioned table on both 
partitioned columns via join with non-partitioned table
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
@@ -54,7 +54,7 @@ where srcpart_date.`date` = '2008-04-08' and 
srcpart_hour.hour = 11;
 set hive.spark.dynamic.partition.pruning=true;
 select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
 
--- multiple columns single source
+-- multiple columns single source -- filter partitioned table on both 
partitioned columns via join with non-partitioned table, filter non-partitioned 
table
 EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 set hive.spark.dynamic.partition.pruning=false;
@@ -63,7 +63,7 @@ select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_dat
 set hive.spark.dynamic.partition.pruning=true;
 select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
 
--- empty set
+-- empty set -- join a partitioned table to a non-partitioned table, static 
filter on the non-partitioned table that doesn't filter out anything
 EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
 set hive.spark.dynamic.partition.pruning=false;
@@ -72,7 +72,7 @@ select count(*) from srcpart join srcpart_date on (srcpart.ds 
= srcpart_date.ds)
 set hive.spark.dynamic.partition.pruning=true;
 select count(*) from srcpart where ds = 'I DONT EXIST';
 
--- expressions
+-- expressions -- triggers DPP with various expressions - e.g. cast, 
multiplication, division
 EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
 select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
 EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 
= srcpart_double_hour.hr) where srcpart_double_hour.hour = 11;
@@ -89,8 +89,7 @@ select count(*) from srcpart join srcpart_double_hour on 
(cast(srcpart.hr*2 as s
 set hive.spark.dynamic.partition.pruning=true;
 select count(*) from srcpart where cast(hr as string) = 11;
 
-
--- parent is reduce tasks
+-- parent is reduce tasks -- join a partitioned table to a non-partitioned 
table, where the non-partitioned table is a subquery, static filter on the 
subquery
 EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from 
srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
 select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart 
group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
 select count(*) from srcpart where ds = '2008-04-08';
@@ -155,8 +154,7 @@ select count(*) from srcpart where ds = '2008-04-08' and hr 
= 11;
 
 -- empty set
 EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
--- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
 
 -- expressions
 EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = 
cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
@@ -170,10 +168,6 @@ EXPLAIN select count(*) from srcpart join (select ds as 
ds, ds as `date` from sr
 select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart 
group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
 select count(*) from srcpart where ds = '2008-04-08';
 
--- single column, single key, udf with typechange
-EXPLAIN select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = 
day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08';
-select count(*) from srcpart join srcpart_date on (day(srcpart.ds) = 
day(srcpart_date.ds)) where srcpart_date.`date` = '2008-04-08';
-
 -- left join
 EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
@@ -188,16 +182,26 @@ select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds)
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and 
srcpart.hr = 11;
 EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
--- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
--- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
+select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
 
 -- union + subquery
 EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
 select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) 
from srcpart union all select min(srcpart.ds) from srcpart);
 
+-- different file format
+create table srcpart_parquet (key int, value string) partitioned by (ds 
string, hr int) stored as parquet;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=false;
+set hive.exec.max.dynamic.partitions=1000;
+
+insert into table srcpart_parquet partition (ds, hr) select key, value, ds, hr 
from srcpart;
+EXPLAIN select count(*) from srcpart_parquet join srcpart_date_hour on 
(srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = 
srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and 
(srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09');
+select count(*) from srcpart_parquet join srcpart_date_hour on 
(srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = 
srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and 
(srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09');
+select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') 
and hr = 11;
 
-drop table srcpart_orc;
+drop table srcpart_parquet;
 drop table srcpart_date;
 drop table srcpart_hour;
 drop table srcpart_date_hour;

http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q 
b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q
index ac4ff93..8863cf4 100644
--- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q
+++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q
@@ -1,16 +1,228 @@
-set hive.spark.dynamic.partition.pruning=true;
-set hive.auto.convert.join=true;
+CREATE TABLE partitioned_table1 (col int) PARTITIONED BY (part_col int);
+CREATE TABLE partitioned_table2 (col int) PARTITIONED BY (part_col int);
+CREATE TABLE partitioned_table3 (col int) PARTITIONED BY (part_col int);
+CREATE TABLE partitioned_table4 (col int) PARTITIONED BY (part_col1 int, 
part_col2 int);
+CREATE TABLE partitioned_table5 (col int) PARTITIONED BY (part_col1 int, 
part_col2 int);
 
-create table partitioned_table1 (col int) partitioned by (part_col int);
-create table partitioned_table2 (col int) partitioned by (part_col int);
-create table regular_table (col int);
-insert into table regular_table values (1);
+CREATE TABLE regular_table1 (col1 int, col2 int);
+CREATE TABLE regular_table2 (col1 int, col2 int);
 
-alter table partitioned_table1 add partition (part_col = 1);
-insert into table partitioned_table1 partition (part_col = 1) values (1), (2), 
(3), (4), (5), (6), (7), (8), (9), (10);
+ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 1);
+ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 2);
+ALTER TABLE partitioned_table1 ADD PARTITION (part_col = 3);
 
-alter table partitioned_table2 add partition (part_col = 1);
-insert into table partitioned_table2 partition (part_col = 1) values (1), (2), 
(3), (4), (5), (6), (7), (8), (9), (10);
+ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 1);
+ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 2);
+ALTER TABLE partitioned_table2 ADD PARTITION (part_col = 3);
 
-explain select * from partitioned_table1, partitioned_table2 where 
partitioned_table1.part_col = partitioned_table2.part_col;
-explain select * from partitioned_table1 where partitioned_table1.part_col in 
(select regular_table.col from regular_table join partitioned_table2 on 
regular_table.col = partitioned_table2.part_col);
\ No newline at end of file
+ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 1);
+ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 2);
+ALTER TABLE partitioned_table3 ADD PARTITION (part_col = 3);
+
+ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 1, part_col2 = 1);
+ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 2, part_col2 = 2);
+ALTER TABLE partitioned_table4 ADD PARTITION (part_col1 = 3, part_col2 = 3);
+
+ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 1, part_col2 = 1);
+ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 2, part_col2 = 2);
+ALTER TABLE partitioned_table5 ADD PARTITION (part_col1 = 3, part_col2 = 3);
+
+INSERT INTO TABLE regular_table1 VALUES (0, 0), (1, 1), (2, 2);
+INSERT INTO TABLE regular_table2 VALUES (0, 0), (1, 1), (2, 2);
+
+INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 1) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 2) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table1 PARTITION (part_col = 3) VALUES (1), (2), 
(3), (4), (5), (6);
+
+INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 1) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 2) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table2 PARTITION (part_col = 3) VALUES (1), (2), 
(3), (4), (5), (6);
+
+INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 1) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 2) VALUES (1), (2), 
(3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table3 PARTITION (part_col = 3) VALUES (1), (2), 
(3), (4), (5), (6);
+
+INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 1, part_col2 = 1) 
VALUES (1), (2), (3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 2, part_col2 = 2) 
VALUES (1), (2), (3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table4 PARTITION (part_col1 = 3, part_col2 = 3) 
VALUES (1), (2), (3), (4), (5), (6);
+
+INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 1, part_col2 = 1) 
VALUES (1), (2), (3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 2, part_col2 = 2) 
VALUES (1), (2), (3), (4), (5), (6);
+INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 3, part_col2 = 3) 
VALUES (1), (2), (3), (4), (5), (6);
+
+SET hive.spark.dynamic.partition.pruning.map.join.only=true;
+SET hive.strict.checks.cartesian.product=false;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask.size=15; -- ensure the 
partitioned tables are treated as big tables
+
+-- left semi join where the subquery is a join between a partitioned and a 
non-partitioned table
+EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE 
partitioned_table1.part_col IN (
+SELECT regular_table1.col1 FROM regular_table1 JOIN partitioned_table2 ON
+regular_table1.col1 = partitioned_table2.part_col AND partitioned_table2.col > 
3 AND regular_table1.col1 > 1);
+
+SELECT count(*)
+FROM   partitioned_table1
+WHERE  partitioned_table1.part_col IN (SELECT regular_table1.col1
+                                       FROM   regular_table1
+                                              JOIN partitioned_table2
+                                                ON
+              regular_table1.col1 = partitioned_table2.part_col AND 
partitioned_table2.col > 3 AND regular_table1.col1 > 1);
+
+-- three-table join with dpp against one partitioned table
+EXPLAIN SELECT count(*) FROM partitioned_table1, regular_table1 rt1,
+regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 =
+partitioned_table1.part_col AND rt1.col2 > 0 AND rt2.col2 > 1; 
+
+SELECT count(*)
+FROM   partitioned_table1,
+       regular_table1 rt1,
+       regular_table1 rt2
+WHERE  rt1.col1 = partitioned_table1.part_col
+       AND rt2.col1 = partitioned_table1.part_col
+       AND rt1.col2 > 0
+       AND rt2.col2 > 1;
+
+-- four-table join with dpp against two separate partitioned tables
+EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2, 
regular_table1 rt1 
+, regular_table1 rt2 WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 
= 
+partitioned_table2.part_col AND rt1.col2 > 0 AND rt2.col2 > 1; 
+
+SELECT count(*)
+FROM   partitioned_table1,
+          partitioned_table2,
+       regular_table1 rt1,
+       regular_table1 rt2
+WHERE  rt1.col1 = partitioned_table1.part_col
+       AND rt2.col1 = partitioned_table2.part_col
+       AND rt1.col2 > 0
+       AND rt2.col2 > 1;
+
+-- dpp with between filter
+EXPLAIN SELECT count(*) FROM regular_table1, partitioned_table1 WHERE 
regular_table1.col1
+= partitioned_table1.part_col AND regular_table1.col2 BETWEEN 1 AND 3;
+
+SELECT count(*)
+FROM   regular_table1,
+       partitioned_table1
+WHERE  regular_table1.col1 = partitioned_table1.part_col
+       AND regular_table1.col2 BETWEEN 1 AND 3;
+
+-- dpp with cte
+EXPLAIN WITH q1 AS (SELECT regular_table1.col1 AS col FROM regular_table1 WHERE
+regular_table1.col2 > 1), q2 AS (SELECT partitioned_table1.part_col AS col FROM
+partitioned_table1 WHERE partitioned_table1.col > 1) SELECT count(*) FROM q1 
JOIN q2 ON
+q1.col = q2.col;
+
+WITH q1
+     AS (SELECT regular_table1.col1 AS col
+         FROM   regular_table1
+         WHERE  regular_table1.col2 > 1),
+     q2
+     AS (SELECT partitioned_table1.part_col AS col
+         FROM   partitioned_table1
+         WHERE  partitioned_table1.col > 1)
+SELECT count(*)
+FROM   q1
+       JOIN q2
+         ON q1.col = q2.col;
+
+-- join two partitioned tables with a filter
+EXPLAIN SELECT count(*) FROM partitioned_table1, partitioned_table2 WHERE
+partitioned_table1.part_col = partitioned_table2.part_col AND 
partitioned_table2.col > 1;
+
+SELECT count(*)
+FROM   partitioned_table1,
+       partitioned_table2
+WHERE  partitioned_table1.part_col = partitioned_table2.part_col
+       AND partitioned_table2.col > 1;
+
+-- dpp betwen two partitioned tables, both with multiple partition columns
+SET hive.auto.convert.join.noconditionaltask.size=150; -- set auto convert 
size to a higher value so map-joins are triggered for the partitioned tables
+
+EXPLAIN SELECT count(*) FROM partitioned_table4, partitioned_table5 WHERE
+partitioned_table4.part_col1 = partitioned_table5.part_col1 AND
+partitioned_table4.part_col2 = partitioned_table5.part_col2;
+
+SELECT count(*)
+FROM   partitioned_table4,
+       partitioned_table5
+WHERE  partitioned_table4.part_col1 = partitioned_table5.part_col1
+       AND partitioned_table4.part_col2 = partitioned_table5.part_col2;
+
+-- dpp is pushed through multiple levels of joins
+EXPLAIN SELECT count(*) FROM partitioned_table1 JOIN regular_table1 ON
+partitioned_table1.part_col = regular_table1.col1 JOIN regular_table2 ON
+regular_table1.col1 = regular_table2.col1;
+
+SELECT count(*)
+FROM   partitioned_table1
+       JOIN regular_table1
+         ON partitioned_table1.part_col = regular_table1.col1
+       JOIN regular_table2
+         ON regular_table1.col1 = regular_table2.col1;
+
+SET hive.auto.convert.join.noconditionaltask.size=15; -- reset auto convert 
size to previous value
+
+-- three-way join where the partitioned table is the smallest table
+-- disabled until HIVE-17225 is fixed
+-- EXPLAIN SELECT * FROM partitioned_table1, regular_table1 rt1, 
regular_table2 rt2
+-- WHERE rt1.col1 = partitioned_table1.part_col AND rt2.col1 =
+-- partitioned_table1.part_col AND partitioned_table1.col > 3;
+
+-- SELECT *
+-- FROM   partitioned_table1,
+--        regular_table1 rt1,
+--        regular_table2 rt2
+-- WHERE  rt1.col1 = partitioned_table1.part_col
+--        AND rt2.col1 = partitioned_table1.part_col AND 
partitioned_table1.col > 3;
+
+-- dpp is pushed to partition columns that are added to each other
+-- disabled until HIVE-17244 is fixed
+-- EXPLAIN SELECT count(*) FROM partitioned_table4 pt4 JOIN regular_table1 rt1
+-- ON pt4.part_col1 + pt4.part_col2 = rt1.col1 + 1;
+
+-- SELECT count(*)
+-- FROM   partitioned_table4 pt4
+--        JOIN regular_table1 rt1
+--          ON pt4.part_col1 + pt4.part_col2 = rt1.col1 + 1;
+
+-- dpp pushed to all union operands
+-- disabled until HIVE-17239 is fixed
+-- EXPLAIN SELECT count(*) FROM (SELECT part_col FROM partitioned_table1 UNION 
ALL SELECT
+-- part_col FROM partitioned_table2) q1 JOIN regular_table1 JOIN regular_table2
+-- WHERE q1.part_col = regular_table1.col1 AND q1.part_col = 
regular_table2.col1;
+
+-- SELECT count(*)
+-- FROM   (
+--               SELECT part_col
+--               FROM   partitioned_table1
+--               UNION ALL
+--               SELECT part_col
+--               FROM   partitioned_table2) q1
+-- JOIN   regular_table1
+-- JOIN   regular_table2
+-- where  q1.part_col = regular_table1.col1
+-- AND    q1.part_col = regular_table2.col1;
+
+-- left semi join where the subquery is a join between a partitioned and a 
partitioned table
+-- disabled until HIVE-17238 is fixed
+-- EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE 
partitioned_table1.part_col IN (
+-- SELECT partitioned_table2.col FROM partitioned_table2 JOIN 
partitioned_table3 ON
+-- partitioned_table3.col = partitioned_table2.part_col AND 
partitioned_table2.col > 1);
+
+-- SELECT count(*)
+-- FROM   partitioned_table1
+-- WHERE  partitioned_table1.part_col IN (SELECT partitioned_table2.col
+--                                        FROM   partitioned_table2
+--                                               JOIN partitioned_table3
+--                                                 ON
+--               partitioned_table3.col = partitioned_table2.part_col
+--               AND partitioned_table2.col > 1);
+
+DROP TABLE partitioned_table1;
+DROP TABLE partitioned_table2;
+DROP TABLE partitioned_table3;
+DROP TABLE partitioned_table4;
+DROP TABLE partitioned_table5;
+DROP TABLE regular_table1;
+DROP TABLE regular_table2;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/ql/src/test/results/clientpositive/spark/spark_constprog_dpp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_constprog_dpp.q.out 
b/ql/src/test/results/clientpositive/spark/spark_constprog_dpp.q.out
new file mode 100644
index 0000000..79bbbdf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_constprog_dpp.q.out
@@ -0,0 +1,138 @@
+PREHOOK: query: drop table if exists tb1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tb1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tb1 (id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tb1
+POSTHOOK: query: create table tb1 (id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tb1
+PREHOOK: query: drop table if exists tb2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tb2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tb2 (id smallint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tb2
+POSTHOOK: query: create table tb2 (id smallint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tb2
+PREHOOK: query: explain
+select a.id from tb1 a
+left outer join
+(select id from tb2
+union all
+select 2 as id from tb2 limit 1) b
+on a.id=b.id
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.id from tb1 a
+left outer join
+(select id from tb2
+union all
+select 2 as id from tb2 limit 1) b
+on a.id=b.id
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 4 
(PARTITION-LEVEL SORT, 4)
+        Reducer 4 <- Map 3 (GROUP, 1), Map 5 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: tb2
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Select Operator
+                    expressions: UDFToInteger(id) (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                    Limit
+                      Number of rows: 1
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+                        TopN Hash Memory Usage: 0.1
+                        value expressions: _col0 (type: int)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: tb2
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 2 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                    Limit
+                      Number of rows: 1
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+                        TopN Hash Memory Usage: 0.1
+                        value expressions: _col0 (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 1
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/642acdf7/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index 63a9548..6c21f1a 100644
--- 
a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -976,10 +976,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 POSTHOOK: Input: default@srcpart_date
 #### A masked pattern was here ####
 1000
-PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr)
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr)
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -5107,6 +5107,25 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds 
= srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Input: default@srcpart_date
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds 
= srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Input: default@srcpart_date
+#### A masked pattern was here ####
+0
 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour 
on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where 
srcpart_double_hour.hour = 11
@@ -5493,130 +5512,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 #### A masked pattern was here ####
 1000
-PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = 
'2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on 
(day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = 
'2008-04-08'
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: srcpart_date
-                  filterExpr: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
-                  Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
-                    Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ds (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 day(_col0) (type: int)
-                          1 day(_col0) (type: int)
-                      Select Operator
-                        expressions: day(_col0) (type: int)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          keys: _col0 (type: int)
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                          Spark Partition Pruning Sink Operator
-                            Target column: ds (string)
-                            partition key expr: day(ds)
-                            Statistics: Num rows: 1 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                            target work: Map 1
-            Local Work:
-              Map Reduce Local Work
-
-  Stage: Stage-1
-    Spark
-      Edges:
-        Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: srcpart
-                  filterExpr: ds is not null (type: boolean)
-                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: ds (type: string)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 day(_col0) (type: int)
-                        1 day(_col0) (type: int)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from srcpart join srcpart_date on 
(day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = 
'2008-04-08'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
-PREHOOK: Input: default@srcpart_date
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date on 
(day(srcpart.ds) = day(srcpart_date.ds)) where srcpart_date.`date` = 
'2008-04-08'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
-POSTHOOK: Input: default@srcpart_date
-#### A masked pattern was here ####
-1000
 PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date 
on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
@@ -6201,6 +6096,21 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds 
= srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_hour
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds 
= srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_hour
+#### A masked pattern was here ####
+0
 PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in 
(select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from 
srcpart)
@@ -6472,10 +6382,195 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 #### A masked pattern was here ####
 2008-04-08
 2008-04-09
-PREHOOK: query: drop table srcpart_orc
+PREHOOK: query: create table srcpart_parquet (key int, value string) 
partitioned by (ds string, hr int) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_parquet
+POSTHOOK: query: create table srcpart_parquet (key int, value string) 
partitioned by (ds string, hr int) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_parquet
+PREHOOK: query: insert into table srcpart_parquet partition (ds, hr) select 
key, value, ds, hr from srcpart
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_parquet
+POSTHOOK: query: insert into table srcpart_parquet partition (ds, hr) select 
key, value, ds, hr from srcpart
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@srcpart_parquet@ds=2008-04-09/hr=12
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=11).key 
EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=11).value SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=12).key 
EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-08,hr=12).value SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=11).key 
EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=11).value SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=12).key 
EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: srcpart_parquet PARTITION(ds=2008-04-09,hr=12).value SIMPLE 
[(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: EXPLAIN select count(*) from srcpart_parquet join 
srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and 
srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 
and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_parquet join 
srcpart_date_hour on (srcpart_parquet.ds = srcpart_date_hour.ds and 
srcpart_parquet.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 
and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date_hour
+                  filterExpr: (((date = '2008-04-08') or (date = 
'2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not 
null) (type: boolean)
+                  Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (((date = '2008-04-08') or (date = 
'2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not 
null) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ds (type: string), hr (type: string)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: string), UDFToDouble(_col1) (type: 
double)
+                          1 _col0 (type: string), UDFToDouble(_col2) (type: 
double)
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col0 (type: string)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                          Spark Partition Pruning Sink Operator
+                            Target column: ds (string)
+                            partition key expr: ds
+                            Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                            target work: Map 1
+                      Select Operator
+                        expressions: UDFToDouble(_col2) (type: double)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col0 (type: double)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                          Spark Partition Pruning Sink Operator
+                            Target column: hr (int)
+                            partition key expr: UDFToDouble(hr)
+                            Statistics: Num rows: 2 Data size: 54 Basic stats: 
COMPLETE Column stats: NONE
+                            target work: Map 1
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_parquet
+                  Statistics: Num rows: 2000 Data size: 4000 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ds (type: string), hr (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2000 Data size: 4000 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 _col0 (type: string), UDFToDouble(_col1) (type: 
double)
+                        1 _col0 (type: string), UDFToDouble(_col2) (type: 
double)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 2200 Data size: 4400 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Local Work:
+              Map Reduce Local Work
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from srcpart_parquet join srcpart_date_hour on 
(srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = 
srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and 
(srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date_hour
+PREHOOK: Input: default@srcpart_parquet
+PREHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_parquet join srcpart_date_hour 
on (srcpart_parquet.ds = srcpart_date_hour.ds and srcpart_parquet.hr = 
srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and 
(srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = 
'2008-04-09')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date_hour
+POSTHOOK: Input: default@srcpart_parquet
+POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart_parquet@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+1000
+PREHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = 
'2008-04-09') and hr = 11
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = 
'2008-04-09') and hr = 11
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+#### A masked pattern was here ####
+1000
+PREHOOK: query: drop table srcpart_parquet
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table srcpart_orc
+PREHOOK: Input: default@srcpart_parquet
+PREHOOK: Output: default@srcpart_parquet
+POSTHOOK: query: drop table srcpart_parquet
 POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_parquet
+POSTHOOK: Output: default@srcpart_parquet
 PREHOOK: query: drop table srcpart_date
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@srcpart_date

[29/50] [abbrv] hive git commit: HIVE-17216: Additional qtests for HoS DPP (Sahil Takiar, reviewed by Sergio Pena)

Reply via email to