Add extract functions
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/dcc102a2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/dcc102a2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/dcc102a2 Branch: refs/heads/master Commit: dcc102a2a4303d64e919b24d8990c2e115492402 Parents: 3efba1e Author: vkorukanti <[email protected]> Authored: Wed Apr 2 18:48:27 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Sat Apr 19 18:07:11 2014 -0700 ---------------------------------------------------------------------- exec/java-exec/src/main/codegen/config.fmpp | 1 + .../src/main/codegen/data/ExtractTypes.tdd | 20 +++ .../src/main/codegen/templates/Extract.java | 118 ++++++++++++++++ .../drill/exec/expr/EvaluationVisitor.java | 16 +-- .../exec/expr/ExpressionTreeMaterializer.java | 113 ++++++++------- .../physical/impl/TestExtractFunctions.java | 136 +++++++++++++++++++ .../test/resources/functions/extractFrom.json | 46 +++++++ .../src/test/resources/test_simple_time.json | 3 + 8 files changed, 381 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/main/codegen/config.fmpp ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/config.fmpp b/exec/java-exec/src/main/codegen/config.fmpp index 3f19de0..ef82bf2 100644 --- a/exec/java-exec/src/main/codegen/config.fmpp +++ b/exec/java-exec/src/main/codegen/config.fmpp @@ -24,6 +24,7 @@ data: { aggrtypes1: tdd(../data/AggrTypes1.tdd), aggrtypes2: tdd(../data/AggrTypes2.tdd), date: tdd(../data/DateTypes.tdd) + extract: tdd(../data/ExtractTypes.tdd) } freemarkerLinks: { includes: includes/ http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/main/codegen/data/ExtractTypes.tdd ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/data/ExtractTypes.tdd b/exec/java-exec/src/main/codegen/data/ExtractTypes.tdd new file mode 100644 index 0000000..70d7861 --- /dev/null +++ b/exec/java-exec/src/main/codegen/data/ExtractTypes.tdd @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http:# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{ + toTypes: [Second, Minute, Hour, Day, Month, Year], + fromTypes: [Date, Time, TimeStamp, Interval, IntervalDay, IntervalYear] +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/main/codegen/templates/Extract.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/Extract.java b/exec/java-exec/src/main/codegen/templates/Extract.java new file mode 100644 index 0000000..d0e0afe --- /dev/null +++ b/exec/java-exec/src/main/codegen/templates/Extract.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +<@pp.dropOutputFile /> +<#assign className="GExtract" /> + +<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/${className}.java" /> + +<#include "/@includes/license.ftl" /> + +package org.apache.drill.exec.expr.fn.impl; + +import org.apache.drill.exec.expr.DrillSimpleFunc; +import org.apache.drill.exec.expr.annotations.*; +import org.apache.drill.exec.expr.holders.*; +import org.apache.drill.exec.record.RecordBatch; + +public class ${className} { + +<#list extract.toTypes as toUnit> +<#list extract.fromTypes as fromUnit> +<#if fromUnit == "Date" || fromUnit == "Time" || fromUnit == "TimeStamp"> + @FunctionTemplate(name = "extract${toUnit}", scope = FunctionTemplate.FunctionScope.SIMPLE, + nulls = FunctionTemplate.NullHandling.NULL_IF_NULL) + public static class ${toUnit}From${fromUnit} implements DrillSimpleFunc { + + @Param ${fromUnit}Holder in; + @Output BigIntHolder out; + + public void setup(RecordBatch incoming) { } + + public void eval() { + org.joda.time.MutableDateTime temp = new org.joda.time.MutableDateTime(in.value, org.joda.time.DateTimeZone.UTC); + <#if toUnit == "Second"> + out.value = temp.getSecondOfMinute(); + <#elseif toUnit = "Minute"> + out.value = temp.getMinuteOfHour(); + <#elseif toUnit = "Hour"> + out.value = temp.getHourOfDay(); + <#elseif toUnit = "Day"> + out.value = temp.getDayOfMonth(); + <#elseif toUnit = "Month"> + out.value = temp.getMonthOfYear(); + <#elseif toUnit = "Year"> + out.value = temp.getYear(); + </#if> + } + } +<#else> + @FunctionTemplate(name = "extract${toUnit}", scope = FunctionTemplate.FunctionScope.SIMPLE, + nulls = FunctionTemplate.NullHandling.NULL_IF_NULL) + public static class ${toUnit}From${fromUnit} implements DrillSimpleFunc { + + @Param ${fromUnit}Holder in; + @Output BigIntHolder out; + + public void setup(RecordBatch incoming) { } + + public void eval() { + <#if fromUnit == "Interval"> + + int years = (in.months / org.apache.drill.exec.expr.fn.impl.DateUtility.yearsToMonths); + int months = (in.months % org.apache.drill.exec.expr.fn.impl.DateUtility.yearsToMonths); + + int millis = in.milliSeconds; + + int hours = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.hoursToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.hoursToMillis); + + int minutes = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.minutesToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.minutesToMillis); + + int seconds = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.secondsToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.secondsToMillis); + org.joda.time.Period temp = new org.joda.time.Period(years, months, 0, in.days, hours, minutes, seconds, millis); + + <#elseif fromUnit == "IntervalDay"> + + int millis = in.milliSeconds; + + int hours = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.hoursToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.hoursToMillis); + + int minutes = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.minutesToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.minutesToMillis); + + int seconds = millis / (org.apache.drill.exec.expr.fn.impl.DateUtility.secondsToMillis); + millis = millis % (org.apache.drill.exec.expr.fn.impl.DateUtility.secondsToMillis); + org.joda.time.Period temp = new org.joda.time.Period(0, 0, 0, in.days, hours, minutes, seconds, millis); + + <#else> + + int years = (in.value / org.apache.drill.exec.expr.fn.impl.DateUtility.yearsToMonths); + int months = (in.value % org.apache.drill.exec.expr.fn.impl.DateUtility.yearsToMonths); + org.joda.time.Period temp = new org.joda.time.Period(years, months, 0, 0, 0, 0, 0, 0); + + </#if> + out.value = temp.get${toUnit}s(); + } + } + </#if> +</#list> +</#list> +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java index b7670ee..aff47db 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java @@ -377,19 +377,9 @@ public class EvaluationVisitor { @Override public HoldingContainer visitCastExpression(CastExpression e, ClassGenerator<?> value) throws RuntimeException { - // we create - MajorType type = e.getMajorType(); - String castFuncWithType = "cast" + type.getMinorType().name(); - - List<LogicalExpression> newArgs = Lists.newArrayList(); - newArgs.add(e.getInput()); //input_expr - - //VarLen type - if (!Types.isFixedWidthType(type)) { - newArgs.add(new ValueExpressions.LongExpression(type.getWidth(), null)); - } - FunctionCall fc = new FunctionCall(castFuncWithType, newArgs, e.getPosition()); - return fc.accept(this, value); } + throw new UnsupportedOperationException("CastExpression is not expected here. "+ + "It should have been converted to FunctionHolderExpression in materialization"); + } } private class ConstantFilter extends EvalVisitor { http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java index d65ff78..1d8070c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java @@ -289,70 +289,65 @@ public class ExpressionTreeMaterializer { if(castEqual(e.getPosition(), newMajor, input.getMajorType())) return input; // don't do pointless cast. - if(newMinor == MinorType.LATE || newMinor == MinorType.NULL){ - // if the type still isn't fully bound, leave as cast expression. - return new CastExpression(input, e.getMajorType(), e.getPosition()); - }else{ - // if the type is fully bound, convert to functioncall and materialze the function. - MajorType type = e.getMajorType(); - String castFuncWithType = "cast" + type.getMinorType().name(); - - List<LogicalExpression> newArgs = Lists.newArrayList(); - newArgs.add(e.getInput()); //input_expr - - //VarLen type - if (!Types.isFixedWidthType(type)) { - newArgs.add(new ValueExpressions.LongExpression(type.getWidth(), null)); - } - FunctionCall fc = new FunctionCall(castFuncWithType, newArgs, e.getPosition()); - return fc.accept(this, value); + if(newMinor == MinorType.LATE){ + throw new UnsupportedOperationException("LATE binding is not supported"); + } else if (newMinor == MinorType.NULL){ + // convert it into null expression + return NullExpression.INSTANCE; } - - - + + // if the type is fully bound, convert to functioncall and materialze the function. + MajorType type = e.getMajorType(); + String castFuncWithType = "cast" + type.getMinorType().name(); + + List<LogicalExpression> newArgs = Lists.newArrayList(); + newArgs.add(e.getInput()); //input_expr + + //VarLen type + if (!Types.isFixedWidthType(type)) { + newArgs.add(new ValueExpressions.LongExpression(type.getWidth(), null)); + } + FunctionCall fc = new FunctionCall(castFuncWithType, newArgs, e.getPosition()); + return fc.accept(this, value); } - private boolean castEqual(ExpressionPosition pos, MajorType from, MajorType to){ - if(!from.getMinorType().equals(to.getMinorType())) return false; - switch(from.getMinorType()){ - case FLOAT4: - case FLOAT8: - case INT: - case BIGINT: - case BIT: - case TINYINT: - case UINT1: - case UINT2: - case UINT4: - case UINT8: - // nothing else matters. - return true; - - case FIXED16CHAR: - case FIXEDBINARY: - case FIXEDCHAR: - // width always matters - this.errorCollector.addGeneralError(pos, "Casting fixed width types are not yet supported.."); - return false; - - case VAR16CHAR: - case VARBINARY: - case VARCHAR: - if(to.getWidth() < from.getWidth() && to.getWidth() > 0){ - this.errorCollector.addGeneralError(pos, "Casting from a longer variable length type to a shorter variable length type is not currently supported."); - return false; - }else{ + private boolean castEqual(ExpressionPosition pos, MajorType from, MajorType to){ + if(!from.getMinorType().equals(to.getMinorType())) return false; + switch(from.getMinorType()){ + case FLOAT4: + case FLOAT8: + case INT: + case BIGINT: + case BIT: + case TINYINT: + case UINT1: + case UINT2: + case UINT4: + case UINT8: + // nothing else matters. return true; - } - default: - errorCollector.addGeneralError(pos, String.format("Casting rules are unknown for type %s.", from)); - return false; - - } + case FIXED16CHAR: + case FIXEDBINARY: + case FIXEDCHAR: + // width always matters + this.errorCollector.addGeneralError(pos, "Casting fixed width types are not yet supported.."); + return false; - } - - } + case VAR16CHAR: + case VARBINARY: + case VARCHAR: + if(to.getWidth() < from.getWidth() && to.getWidth() > 0){ + this.errorCollector.addGeneralError(pos, "Casting from a longer variable length type to a shorter variable length type is not currently supported."); + return false; + }else{ + return true; + } + default: + errorCollector.addGeneralError(pos, String.format("Casting rules are unknown for type %s.", from)); + return false; + } + } + } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestExtractFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestExtractFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestExtractFunctions.java new file mode 100644 index 0000000..3e1f7b4 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TestExtractFunctions.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl; + +import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; + +import org.apache.drill.common.util.FileUtils; +import org.apache.drill.exec.client.DrillClient; +import org.apache.drill.exec.pop.PopUnitTestBase; +import org.apache.drill.exec.proto.UserProtos; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.rpc.user.QueryResultBatch; +import org.apache.drill.exec.server.Drillbit; +import org.apache.drill.exec.server.RemoteServiceSet; +import org.apache.drill.exec.vector.NullableBigIntVector; + +import org.junit.Ignore; +import org.junit.Test; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; + +import java.util.List; + +/* This class tests the existing date types. Simply using date types + * by casting from VarChar, performing basic functions and converting + * back to VarChar. + */ +public class TestExtractFunctions extends PopUnitTestBase { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestExtractFunctions.class); + + @Test + public void testFromDate() throws Exception { + long expectedValues[][] = { {00, 00, 00, 02, 01, 1970}, {00, 00, 00, 28, 12, 2008}, {00, 00, 00, 27, 02, 2000} }; + testFrom("date", "/test_simple_date.json", "stringdate", expectedValues); + } + + @Test + @Ignore // failing due to some issue in castTime(varchar) + public void testFromTime() throws Exception { + long expectedValues[][] = { {33, 20, 10, 00, 00, 0000}, {00, 34, 11, 00, 00, 0000}, {00, 24, 14, 00, 00, 0000} }; + testFrom("time", "/test_simple_time.json", "stringtime", expectedValues); + } + + @Test + public void testFromTimeStamp() throws Exception { + long expectedValues[][] = { {33, 20, 10, 02, 01, 1970}, {00, 34, 11, 28, 12, 2008}, {00, 24, 14, 27, 02, 2000} }; + testFrom("timestamp", "/test_simple_date.json", "stringdate", expectedValues); + } + + @Test + public void testFromInterval() throws Exception { + long expectedValues[][] = { + { 35, 20, 01, 01, 02, 02}, + { 00, 00, 00, 00, 02, 02}, + { 35, 20, 01, 00, 00, 00}, + { 35, 20, 01, 01, 02, 02}, + { 35, 00, 00, 00, 00, 00}, + {-25,-39, 00, 01, 10, 01} + }; + testFrom("interval", "/test_simple_interval.json", "stringinterval", expectedValues); + } + + @Test + public void testFromIntervalDay() throws Exception { + long expectedValues[][] = { + { 35, 20, 01, 01, 00, 00}, + { 00, 00, 00, 00, 00, 00}, + { 35, 20, 01, 00, 00, 00}, + { 35, 20, 01, 01, 00, 00}, + { 35, 00, 00, 00, 00, 00}, + {-25,-39, 00, 01, 00, 00} + }; + testFrom("intervalday", "/test_simple_interval.json", "stringinterval", expectedValues); + } + + @Test + public void testFromIntervalYear() throws Exception { + long expectedValues[][] = { + { 00, 00, 00, 00, 02, 02}, + { 00, 00, 00, 00, 02, 02}, + { 00, 00, 00, 00, 00, 00}, + { 00, 00, 00, 00, 02, 02}, + { 00, 00, 00, 00, 00, 00}, + { 00, 00, 00, 00, 10, 01} + }; + testFrom("intervalyear", "/test_simple_interval.json", "stringinterval", expectedValues); + } + + private void testFrom(String fromType, String testDataFile, String columnName, + long expectedValues[][]) throws Exception { + try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet(); + Drillbit bit = new Drillbit(CONFIG, serviceSet); + DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) { + + // run query. + bit.run(); + client.connect(); + List<QueryResultBatch> results = client.runQuery(UserProtos.QueryType.PHYSICAL, + Files.toString(FileUtils.getResourceAsFile("/functions/extractFrom.json"), Charsets.UTF_8) + .replace("#{TEST_TYPE}", fromType) + .replace("#{TEST_FILE}", testDataFile) + .replace("#{COLUMN_NAME}", columnName)); + + RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator()); + + QueryResultBatch batch = results.get(0); + assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData())); + + for(int i=0; i<expectedValues.length; i++) { + for(int j=0; j<expectedValues[i].length; j++) { + NullableBigIntVector vv = + (NullableBigIntVector) batchLoader.getValueAccessorById(j, NullableBigIntVector.class).getValueVector(); + System.out.println("["+i+"]["+j+"]: Expected: " + expectedValues[i][j] + ", Actual: " + vv.getAccessor().get(i)); + assertEquals(expectedValues[i][j], vv.getAccessor().get(i)); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/test/resources/functions/extractFrom.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/functions/extractFrom.json b/exec/java-exec/src/test/resources/functions/extractFrom.json new file mode 100644 index 0000000..779581e --- /dev/null +++ b/exec/java-exec/src/test/resources/functions/extractFrom.json @@ -0,0 +1,46 @@ +{ + "head" : { + "version" : 1, + "generator" : { + "type" : "org.apache.drill.exec.planner.logical.DrillImplementor", + "info" : "" + }, + "type" : "APACHE_DRILL_PHYSICAL", + "resultMode" : "EXEC" + }, + graph:[ + { + @id:1, + pop:"fs-scan", + format: {type: "json"}, + storage:{type: "file", connection: "classpath:///"}, + files:["#{TEST_FILE}"] + }, + { + pop:"project", + @id:2, + child: 1, + exprs: [ { + ref: "castExp", expr: "cast(#{COLUMN_NAME} as #{TEST_TYPE})" + } ] + }, + { + pop:"project", + @id:3, + child: 2, + exprs: [ + { ref: "extractSecond", expr: "extractSecond(castExp)" }, + { ref: "extractMinute", expr: "extractMinute(castExp)" }, + { ref: "extractHour", expr: "extractHour(castExp)" }, + { ref: "extractDay", expr: "extractDay(castExp)" }, + { ref: "extractMonth", expr: "extractMonth(castExp)" }, + { ref: "extractYear", expr: "extractYear(castExp)" } + ] + }, + { + @id: 4, + child: 3, + pop: "screen" + } + ] +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/dcc102a2/exec/java-exec/src/test/resources/test_simple_time.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/test_simple_time.json b/exec/java-exec/src/test/resources/test_simple_time.json new file mode 100644 index 0000000..1f8b34f --- /dev/null +++ b/exec/java-exec/src/test/resources/test_simple_time.json @@ -0,0 +1,3 @@ +{ "stringtime" : "10:20:33"} +{ "stringtime" : "11:34:00.129"} +{ "stringtime" : "14:24:00"}
