Repository: hive Updated Branches: refs/heads/branch-1 c8a30f676 -> 684d0e5e1
HIVE-11172 : Vectorization wrong results for aggregate query with where clause without group by (Hari Subramaniyan, reviewed by Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/684d0e5e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/684d0e5e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/684d0e5e Branch: refs/heads/branch-1 Commit: 684d0e5e18de2c7b7e4fd1c65fed1e17dba3277e Parents: c8a30f6 Author: Hari Subramaniyan <harisan...@apache.org> Authored: Tue Jul 21 03:03:45 2015 -0700 Committer: Hari Subramaniyan <harisan...@apache.org> Committed: Tue Jul 21 03:04:47 2015 -0700 ---------------------------------------------------------------------- .../UDAFTemplates/VectorUDAFMinMaxString.txt | 3 +- .../vector_aggregate_without_gby.q | 14 +++ .../vector_aggregate_without_gby.q.out | 96 ++++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/684d0e5e/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index 7e0dda6..cdce457 100644 --- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -319,7 +319,8 @@ public class <ClassName> extends VectorAggregateExpression { int batchSize, int[] selected) { - for (int i=0; i< batchSize; ++i) { + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; myagg.checkValue(inputColumn.vector[i], inputColumn.start[i], inputColumn.length[i]); http://git-wip-us.apache.org/repos/asf/hive/blob/684d0e5e/ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q b/ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q new file mode 100644 index 0000000..9a026ed --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_aggregate_without_gby.q @@ -0,0 +1,14 @@ +create table testvec(id int, dt int, greg_dt string) stored as orc; +insert into table testvec +values +(1,20150330, '2015-03-30'), +(2,20150301, '2015-03-01'), +(3,20150502, '2015-05-02'), +(4,20150401, '2015-04-01'), +(5,20150313, '2015-03-13'), +(6,20150314, '2015-03-14'), +(7,20150404, '2015-04-04'); +set hive.vectorized.execution.enabled=true; +set hive.map.aggr=true; +explain select max(dt), max(greg_dt) from testvec where id=5; +select max(dt), max(greg_dt) from testvec where id=5; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/684d0e5e/ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out b/ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out new file mode 100644 index 0000000..1175cb8 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: create table testvec(id int, dt int, greg_dt string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testvec +POSTHOOK: query: create table testvec(id int, dt int, greg_dt string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testvec +PREHOOK: query: insert into table testvec +values +(1,20150330, '2015-03-30'), +(2,20150301, '2015-03-01'), +(3,20150502, '2015-05-02'), +(4,20150401, '2015-04-01'), +(5,20150313, '2015-03-13'), +(6,20150314, '2015-03-14'), +(7,20150404, '2015-04-04') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@testvec +POSTHOOK: query: insert into table testvec +values +(1,20150330, '2015-03-30'), +(2,20150301, '2015-03-01'), +(3,20150502, '2015-05-02'), +(4,20150401, '2015-04-01'), +(5,20150313, '2015-03-13'), +(6,20150314, '2015-03-14'), +(7,20150404, '2015-04-04') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@testvec +POSTHOOK: Lineage: testvec.dt EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: testvec.greg_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: testvec.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testvec + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id = 5) (type: boolean) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: int), greg_dt (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col0), max(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@testvec +#### A masked pattern was here #### +POSTHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testvec +#### A masked pattern was here #### +20150313 2015-03-13