Repository: hive Updated Branches: refs/heads/master 9493dcfd4 -> a6091c32b
HIVE-20955: Fix Calcite Rule HiveExpandDistinctAggregatesRule seems throwing IndexOutOfBoundsException HIVE-21026: Druid Vectorize Reader is not using the correct input size (Vineet G via Slim B) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6091c32 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6091c32 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6091c32 Branch: refs/heads/master Commit: a6091c32b11c2537f1c13841329dffa7a3f85057 Parents: 9493dcf Author: Slim Bouguerra <bs...@apache.org> Authored: Mon Dec 10 14:00:40 2018 -0800 Committer: Slim Bouguerra <bs...@apache.org> Committed: Mon Dec 10 14:00:40 2018 -0800 ---------------------------------------------------------------------- .../hive/druid/io/DruidVectorizedWrapper.java | 2 +- .../rules/HiveExpandDistinctAggregatesRule.java | 2 +- .../clientpositive/druidmini_expressions.q | 6 +- .../druid/druidmini_expressions.q.out | 76 ++++++++++++++++++++ 4 files changed, 83 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a6091c32/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java ---------------------------------------------------------------------- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java index 5db6be8..586631d 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidVectorizedWrapper.java @@ -77,7 +77,7 @@ public class DruidVectorizedWrapper<T extends Comparable<T>> implements RecordRe } druidWritable = baseReader.createValue(); - rowBoat = new Object[projectedColumns.length]; + rowBoat = new Object[rbCtx.getDataColumnCount()]; } @Override public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException { http://git-wip-us.apache.org/repos/asf/hive/blob/a6091c32/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java index b33c4c5..c7bb23d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveExpandDistinctAggregatesRule.java @@ -165,7 +165,7 @@ public final class HiveExpandDistinctAggregatesRule extends RelOptRule { final RelMetadataQuery mq = call.getMetadataQuery(); if ((nonDistinctCount == 0) && (argListSets.size() == 1)) { for (Integer arg : argListSets.iterator().next()) { - Set<RelColumnOrigin> colOrigs = mq.getColumnOrigins(aggregate, arg); + Set<RelColumnOrigin> colOrigs = mq.getColumnOrigins(aggregate.getInput(), arg); if (null != colOrigs) { for (RelColumnOrigin colOrig : colOrigs) { RelOptHiveTable hiveTbl = (RelOptHiveTable)colOrig.getOriginTable(); http://git-wip-us.apache.org/repos/asf/hive/blob/a6091c32/ql/src/test/queries/clientpositive/druidmini_expressions.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index 36aad79..e491986 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -209,4 +209,8 @@ explain select count(cstring1) from (select `cfloat`, `cstring1`, `cint` from dr select max(cint * cdouble) from (select `cfloat`, `cstring1`, `cint`, `cdouble` from druid_table_alltypesorc limit 90000) as src; -explain select max(cint * cfloat) from (select `cfloat`, `cstring1`, `cint`, `cdouble` from druid_table_alltypesorc limit 90000) as src; \ No newline at end of file +explain select max(cint * cfloat) from (select `cfloat`, `cstring1`, `cint`, `cdouble` from druid_table_alltypesorc limit 90000) as src; + +explain select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src; + +select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a6091c32/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 973cade..56065ff 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -2429,3 +2429,79 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table_alltypesorc + properties: + druid.fieldNames extract,cint + druid.fieldTypes timestamp with local time zone,int + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"extraction","dimension":"__time","outputName":"extract","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"default","dimension":"cint","outputName":"cint","outputType":"LONG"}],"limitSpec":{"type":"default"},"aggregations":[],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.type groupBy + Statistics: Num rows: 9173 Data size: 34864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 9173 Data size: 34864 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(distinct `__time`, cint) from (select * from druid_table_alltypesorc) as src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3027