Repository: hive Updated Branches: refs/heads/branch-3 9397c6589 -> 095c93e2c
HIVE-19615: Proper handling of is null and not is null predicate when pushed to Druid (Slim Bouguerra, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7657a6e0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7657a6e0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7657a6e0 Branch: refs/heads/branch-3 Commit: 7657a6e0eb0f703e0d3c3db7594045b6d3e99271 Parents: 9397c65 Author: Slim Bouguerra <slim.bougue...@gmail.com> Authored: Sat May 26 07:53:20 2018 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Sat May 26 07:57:21 2018 -0700 ---------------------------------------------------------------------- .../ql/parse/DruidSqlOperatorConverter.java | 51 +++++++++++--------- .../queries/clientpositive/druidmini_test1.q | 5 ++ .../clientpositive/druid/druidmini_test1.q.out | 24 +++++++++ 3 files changed, 56 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7657a6e0/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java index 2438a86..ce04eec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -25,6 +25,7 @@ import org.apache.calcite.adapter.druid.DruidExpressions; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.ExtractOperatorConversion; import org.apache.calcite.adapter.druid.FloorOperatorConversion; +import org.apache.calcite.adapter.druid.UnarySuffixOperatorConversion; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexLiteral; @@ -49,39 +50,46 @@ import java.util.Map; public class DruidSqlOperatorConverter { private DruidSqlOperatorConverter() { } + private static Map druidOperatorMap = null; public static final Map<SqlOperator, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter> getDefaultMap() { if (druidOperatorMap == null) { - druidOperatorMap = - new HashMap<SqlOperator, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>(); - DruidQuery.DEFAULT_OPERATORS_LIST.stream() - .forEach(op -> druidOperatorMap.put(op.calciteOperator(), op)); + druidOperatorMap = new HashMap<SqlOperator, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>(); + DruidQuery.DEFAULT_OPERATORS_LIST.stream().forEach(op -> druidOperatorMap.put(op.calciteOperator(), op)); //Override Hive specific operators druidOperatorMap.putAll(Maps.asMap(HiveFloorDate.ALL_FUNCTIONS, - (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new FloorOperatorConversion() + (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new + FloorOperatorConversion() )); druidOperatorMap.putAll(Maps.asMap(HiveExtractDate.ALL_FUNCTIONS, - (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new ExtractOperatorConversion() + (Function<SqlFunction, org.apache.calcite.adapter.druid.DruidSqlOperatorConverter>) input -> new + ExtractOperatorConversion() )); + druidOperatorMap.put(HiveConcat.INSTANCE, new DirectOperatorConversion(HiveConcat.INSTANCE, "concat")); + druidOperatorMap + .put(SqlStdOperatorTable.SUBSTRING, new DruidSqlOperatorConverter.DruidSubstringOperatorConversion()); druidOperatorMap - .put(HiveConcat.INSTANCE, new DirectOperatorConversion(HiveConcat.INSTANCE, "concat")); - druidOperatorMap.put(SqlStdOperatorTable.SUBSTRING, - new DruidSqlOperatorConverter.DruidSubstringOperatorConversion() + .put(SqlStdOperatorTable.IS_NULL, new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NULL, "isnull")); + druidOperatorMap.put(SqlStdOperatorTable.IS_NOT_NULL, + new UnarySuffixOperatorConversion(SqlStdOperatorTable.IS_NOT_NULL, "notnull") ); } return druidOperatorMap; } - //@TODO remove this when it is fixed in calcite https://issues.apache.org/jira/browse/HIVE-18996 - public static class DruidSubstringOperatorConversion extends org.apache.calcite.adapter.druid.SubstringOperatorConversion { - @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, - DruidQuery query + /** + * Druid operator converter from Hive Substring to Druid SubString. + * This is a temporary fix that can be removed once we move to a Calcite version including the following. + * https://issues.apache.org/jira/browse/CALCITE-2226 + */ + public static class DruidSubstringOperatorConversion + extends org.apache.calcite.adapter.druid.SubstringOperatorConversion { + @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query ) { final RexCall call = (RexCall) rexNode; - final String arg = DruidExpressions.toDruidExpression( - call.getOperands().get(0), rowType, query); + final String arg = DruidExpressions.toDruidExpression(call.getOperands().get(0), rowType, query); if (arg == null) { return null; } @@ -90,8 +98,7 @@ public class DruidSqlOperatorConverter { final String length; // SQL is 1-indexed, Druid is 0-indexed. if (!call.getOperands().get(1).isA(SqlKind.LITERAL)) { - final String indexExp = DruidExpressions.toDruidExpression( - call.getOperands().get(1), rowType, query); + final String indexExp = DruidExpressions.toDruidExpression(call.getOperands().get(1), rowType, query); if (indexExp == null) { return null; } @@ -104,8 +111,7 @@ public class DruidSqlOperatorConverter { if (call.getOperands().size() > 2) { //case substring from index with length if (!call.getOperands().get(2).isA(SqlKind.LITERAL)) { - length = DruidExpressions.toDruidExpression( - call.getOperands().get(2), rowType, query); + length = DruidExpressions.toDruidExpression(call.getOperands().get(2), rowType, query); if (length == null) { return null; } @@ -117,10 +123,7 @@ public class DruidSqlOperatorConverter { //case substring from index to the end length = DruidExpressions.numberLiteral(-1); } - return DruidQuery.format("substring(%s, %s, %s)", - arg, - indexStart, - length); + return DruidQuery.format("substring(%s, %s, %s)", arg, indexStart, length); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/7657a6e0/ql/src/test/queries/clientpositive/druidmini_test1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/druidmini_test1.q b/ql/src/test/queries/clientpositive/druidmini_test1.q index 4a1bdc5..f93665e 100644 --- a/ql/src/test/queries/clientpositive/druidmini_test1.q +++ b/ql/src/test/queries/clientpositive/druidmini_test1.q @@ -120,3 +120,8 @@ SELECT `__time` FROM druid_table_n3 WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10; + +-- Running this against Druid will if Druid version does not include +-- this patch https://github.com/druid-io/druid/commit/219e77aeac9b07dc20dd9ab2dd537f3f17498346 + +explain select (cstring1 is null ) AS is_null, (cint is not null ) as isnotnull FROM druid_table_n3; http://git-wip-us.apache.org/repos/asf/hive/blob/7657a6e0/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out index a878443..59672a0 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_test1.q.out @@ -797,3 +797,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific +PREHOOK: query: explain select (cstring1 is null ) AS is_null, (cint is not null ) as isnotnull FROM druid_table_n3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select (cstring1 is null ) AS is_null, (cint is not null ) as isnotnull FROM druid_table_n3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_n3 + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes boolean,boolean + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n3","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"(\"cstring1\" isnull)","outputType":"FLOAT"},{"type":"expression","name":"vc0","expression":"(\"cint\" notnull)","outputType":"FLOAT"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: boolean), vc0 (type: boolean) + outputColumnNames: _col0, _col1 + ListSink +