HIVE-20013: Add an Implicit cast to date type for to_date function (Nishant Bangarwa via Ashutosh Chauhan)
Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/826e5532 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/826e5532 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/826e5532 Branch: refs/heads/master Commit: 826e5532aae16e57cfc82eb4f39a96122a88486b Parents: fb7a676 Author: Nishant Bangarwa <nishant.mon...@gmail.com> Authored: Mon Aug 27 11:06:09 2018 +0200 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Mon Aug 27 11:53:49 2018 +0200 ---------------------------------------------------------------------- .../calcite/translator/RexNodeConverter.java | 19 +++++ .../clientpositive/druidmini_expressions.q | 22 ++++++ .../druid/druidmini_expressions.q.out | 79 ++++++++++++++++++++ .../clientpositive/fold_eq_with_case_when.q.out | 4 +- .../test/results/clientpositive/llap/kryo.q.out | 2 +- 5 files changed, 123 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index bc47969..f60091b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.Unsu import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -353,6 +354,8 @@ public class RexNodeConverter { childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); calciteOp = SqlStdOperatorTable.OR; } + } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { + childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { @@ -534,6 +537,22 @@ public class RexNodeConverter { return newChildRexNodeLst; } + + private List<RexNode> rewriteToDateChildren(List<RexNode> childRexNodeLst) { + List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>(); + assert childRexNodeLst.size() == 1; + RexNode child = childRexNodeLst.get(0); + if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval( + child.getType())) { + newChildRexNodeLst.add(child); + } else { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), + child)); + } + return newChildRexNodeLst; + } + private List<RexNode> rewriteInClauseChildren(SqlOperator op, List<RexNode> childRexNodeLst) throws SemanticException { assert op.getKind() == SqlKind.IN; http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/queries/clientpositive/druidmini_expressions.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index 273c803..9723585 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -118,6 +118,7 @@ EXPLAIN SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble FROM `default`.`druid_table_n0` `druid_table_alias` GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE); + SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble`)) AS `sum_calculation_4998925219892510720_ok`, CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE) AS `tmn___time_ok` FROM `default`.`druid_table_n0` `druid_table_alias` @@ -139,3 +140,24 @@ SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1 EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2; DROP TABLE druid_table_n0; + +-- Tests for testing handling of date/time funtions on druid dimensions stored as strings +CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1); + +EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; + +SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 9ffcdd8..45f2f4d 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -1404,3 +1404,82 @@ POSTHOOK: query: DROP TABLE druid_table_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: default@druid_table_n0 +PREHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_n1 +POSTHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_n1 +POSTHOOK: Lineage: druid_table_n1.__time SIMPLE [] +POSTHOOK: Lineage: druid_table_n1.date1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.datetime1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.time1 SCRIPT [] +PREHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_n1 + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes date,date + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(\"date1\",'','UTC'),'P1D','','UTC')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_floor(timestamp_parse(\"datetime1\",'','UTC'),'P1D','','UTC')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: date), vc0 (type: date) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1970-01-01 1970-01-01 +2004-04-04 2004-04-04 +2004-04-09 2004-04-09 +2004-04-11 2004-04-11 +2004-04-12 2004-04-12 http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index cb4d65c..0e1f2c6 100644 --- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -40,10 +40,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem - filterExpr: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean) + filterExpr: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean) + predicate: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) http://git-wip-us.apache.org/repos/asf/hive/blob/826e5532/ql/src/test/results/clientpositive/llap/kryo.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/kryo.q.out b/ql/src/test/results/clientpositive/llap/kryo.q.out index 764a914..81da38a 100644 --- a/ql/src/test/results/clientpositive/llap/kryo.q.out +++ b/ql/src/test/results/clientpositive/llap/kryo.q.out @@ -50,7 +50,7 @@ STAGE PLANS: predicate: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(datetime) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(datetime))) THEN (date_) ELSE (to_date(datetime)) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string) + expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(CAST( datetime AS TIMESTAMP)) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(CAST( datetime AS TIMESTAMP)))) THEN (date_) ELSE (to_date(CAST( datetime AS TIMESTAMP))) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Group By Operator