HIVE-18823: Vectorization: introduce qtest for SUM (IF/WHEN) with vectorization for ORC (Laszlo Bodor via Matt McCline)
Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ec0636c0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ec0636c0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ec0636c0 Branch: refs/heads/master Commit: ec0636c066d0348077fa872ef98e542e5e5b1b2c Parents: 4750e41 Author: Laszlo Bodor <bodorlaszlo0...@gmail.com> Authored: Thu Jun 7 10:17:10 2018 +0200 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Thu Jun 7 10:17:10 2018 +0200 ---------------------------------------------------------------------- .../clientpositive/vectorization_sum_if_when.q | 19 +++ .../vectorization_sum_if_when.q.out | 126 +++++++++++++++++++ 2 files changed, 145 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ec0636c0/ql/src/test/queries/clientpositive/vectorization_sum_if_when.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorization_sum_if_when.q b/ql/src/test/queries/clientpositive/vectorization_sum_if_when.q new file mode 100644 index 0000000..b70df7d --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorization_sum_if_when.q @@ -0,0 +1,19 @@ +set hive.vectorized.execution.enabled=false; +drop table if exists vectorization_sum_if_when_a; +drop table if exists vectorization_sum_if_when_b; +create table vectorization_sum_if_when_a (x int) stored as orc; +insert into table vectorization_sum_if_when_a values (0), (1), (0), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL); +create table vectorization_sum_if_when_b (x int) stored as orc; +insert into table vectorization_sum_if_when_b select least(t1.x + t2.x + t3.x + t4.x, 1) from vectorization_sum_if_when_a t1, vectorization_sum_if_when_a t2, vectorization_sum_if_when_a t3, vectorization_sum_if_when_a t4; +select count(*), x from vectorization_sum_if_when_b group by x; + +select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b; +select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b; +select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b; +select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b; + +set hive.vectorized.execution.enabled=true; +select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b; +select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b; +select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b; +select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b; http://git-wip-us.apache.org/repos/asf/hive/blob/ec0636c0/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out b/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out new file mode 100644 index 0000000..61090dc --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: drop table if exists vectorization_sum_if_when_a +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectorization_sum_if_when_a +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectorization_sum_if_when_b +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectorization_sum_if_when_b +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectorization_sum_if_when_a (x int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectorization_sum_if_when_a +POSTHOOK: query: create table vectorization_sum_if_when_a (x int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectorization_sum_if_when_a +PREHOOK: query: insert into table vectorization_sum_if_when_a values (0), (1), (0), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@vectorization_sum_if_when_a +POSTHOOK: query: insert into table vectorization_sum_if_when_a values (0), (1), (0), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL), (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@vectorization_sum_if_when_a +POSTHOOK: Lineage: vectorization_sum_if_when_a.x SCRIPT [] +PREHOOK: query: create table vectorization_sum_if_when_b (x int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectorization_sum_if_when_b +POSTHOOK: query: create table vectorization_sum_if_when_b (x int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectorization_sum_if_when_b +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: insert into table vectorization_sum_if_when_b select least(t1.x + t2.x + t3.x + t4.x, 1) from vectorization_sum_if_when_a t1, vectorization_sum_if_when_a t2, vectorization_sum_if_when_a t3, vectorization_sum_if_when_a t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_a +PREHOOK: Output: default@vectorization_sum_if_when_b +POSTHOOK: query: insert into table vectorization_sum_if_when_b select least(t1.x + t2.x + t3.x + t4.x, 1) from vectorization_sum_if_when_a t1, vectorization_sum_if_when_a t2, vectorization_sum_if_when_a t3, vectorization_sum_if_when_a t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_a +POSTHOOK: Output: default@vectorization_sum_if_when_b +POSTHOOK: Lineage: vectorization_sum_if_when_b.x EXPRESSION [(vectorization_sum_if_when_a)t1.FieldSchema(name:x, type:int, comment:null), (vectorization_sum_if_when_a)t2.FieldSchema(name:x, type:int, comment:null), (vectorization_sum_if_when_a)t3.FieldSchema(name:x, type:int, comment:null), (vectorization_sum_if_when_a)t4.FieldSchema(name:x, type:int, comment:null), ] +PREHOOK: query: select count(*), x from vectorization_sum_if_when_b group by x +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select count(*), x from vectorization_sum_if_when_b group by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +9919 NULL +16 0 +65 1 +PREHOOK: query: select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +9919 10000 +PREHOOK: query: select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 10000 +PREHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 +PREHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 65 +PREHOOK: query: select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum(IF(x is null, 1, 0)), count(1) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +9919 10000 +PREHOOK: query: select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum(IF(x=1, 1, 0)), count(1) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 10000 +PREHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 +PREHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b +PREHOOK: type: QUERY +PREHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +POSTHOOK: query: select sum((case WHEN x = 1 THEN 1 else 0 end)), sum((case WHEN x = 1 THEN 1 when x is null then 0 else 0 end)) from vectorization_sum_if_when_b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectorization_sum_if_when_b +#### A masked pattern was here #### +65 65