DRILL-784: Implement min, max, count for VarChar and VarBinary data type. Re-enable aggregate function tests.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/68f4bdb4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/68f4bdb4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/68f4bdb4 Branch: refs/heads/master Commit: 68f4bdb403225d58a5d9d68b4de361d18d3a8bc3 Parents: 37c1d06 Author: Mehant Baid <[email protected]> Authored: Mon May 19 17:23:51 2014 -0700 Committer: Mehant Baid <[email protected]> Committed: Tue May 20 13:43:03 2014 -0700 ---------------------------------------------------------------------- .../src/main/codegen/data/AggrTypes1.tdd | 18 ++- .../templates/VarCharAggrFunctions1.java | 158 +++++++++++++++++++ .../jdbc/test/TestAggregateFunctionsQuery.java | 32 +++- 3 files changed, 198 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/68f4bdb4/exec/java-exec/src/main/codegen/data/AggrTypes1.tdd ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/data/AggrTypes1.tdd b/exec/java-exec/src/main/codegen/data/AggrTypes1.tdd index 71931df..d76d913 100644 --- a/exec/java-exec/src/main/codegen/data/AggrTypes1.tdd +++ b/exec/java-exec/src/main/codegen/data/AggrTypes1.tdd @@ -40,7 +40,11 @@ {inputType: "IntervalYear", outputType: "IntervalYear", runningType: "IntervalYear", major: "Date", initialValue: "Integer.MAX_VALUE"}, {inputType: "NullableIntervalYear", outputType: "IntervalYear", runningType: "IntervalYear", major: "Date", initialValue: "Integer.MAX_VALUE"}, {inputType: "Interval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MAX_VALUE"}, - {inputType: "NullableInterval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MAX_VALUE"} + {inputType: "NullableInterval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MAX_VALUE"}, + {inputType: "VarChar", outputType: "VarChar", runningType: "VarChar", major: "VarBytes", initialValue: "0xFF", bufferEnd: "65536"}, + {inputType: "NullableVarChar", outputType: "VarChar", runningType: "VarChar", major: "VarBytes", initialValue: "0xFF",bufferEnd: "65536"}, + {inputType: "VarBinary", outputType: "VarBinary", runningType: "VarBinary", major: "VarBytes", initialValue: "0xFF", bufferEnd: "65536"}, + {inputType: "NullableVarBinary", outputType: "VarBinary", runningType: "VarBinary", major: "VarBytes", initialValue: "0xFF", bufferEnd: "65536"} ] }, {className: "Max", funcName: "max", types: [ @@ -67,7 +71,11 @@ {inputType: "IntervalYear", outputType: "IntervalYear", runningType: "IntervalYear", major: "Date", initialValue: "Integer.MIN_VALUE"}, {inputType: "NullableIntervalYear", outputType: "IntervalYear", runningType: "IntervalYear", major: "Date", initialValue: "Integer.MIN_VALUE"}, {inputType: "Interval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MIN_VALUE"}, - {inputType: "NullableInterval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MIN_VALUE"} + {inputType: "NullableInterval", outputType: "Interval", runningType: "BigInt", major: "Date", initialValue: "Long.MIN_VALUE"}, + {inputType: "VarChar", outputType: "VarChar", runningType: "VarChar", major: "VarBytes", initialValue: "0x00", bufferEnd: "0"}, + {inputType: "NullableVarChar", outputType: "VarChar", runningType: "VarChar", major: "VarBytes", initialValue: "0x00", bufferEnd: "0"}, + {inputType: "VarBinary", outputType: "VarBinary", runningType: "VarBinary", major: "VarBytes", initialValue: "0x00", bufferEnd: "0"}, + {inputType: "NullableVarBinary", outputType: "VarBinary", runningType: "VarBinary", major: "VarBytes", initialValue: "0x00", bufferEnd: "0"} ] }, {className: "Sum", funcName: "sum", types: [ @@ -113,7 +121,11 @@ {inputType: "IntervalYear", outputType: "BigInt", runningType: "BigInt", major: "Date", initialValue: "0"}, {inputType: "NullableIntervalYear", outputType: "BigInt", runningType: "BigInt", major: "Date", initialValue: "0"}, {inputType: "Interval", outputType: "BigInt", runningType: "BigInt", major: "Date", initialValue: "0"}, - {inputType: "NullableInterval", outputType: "BigInt", runningType: "BigInt", major: "Date", initialValue: "0"} + {inputType: "NullableInterval", outputType: "BigInt", runningType: "BigInt", major: "Date", initialValue: "0"}, + {inputType: "VarChar", outputType: "BigInt", runningType: "BigInt", major: "VarBytes"}, + {inputType: "NullableVarChar", outputType: "BigInt", runningType: "BigInt", major: "VarBytes"}, + {inputType: "VarBinary", outputType: "BigInt", runningType: "BigInt", major: "VarBytes"}, + {inputType: "NullableVarBinary", outputType: "BigInt", runningType: "BigInt", major: "VarBytes"} ] } ] http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/68f4bdb4/exec/java-exec/src/main/codegen/templates/VarCharAggrFunctions1.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/VarCharAggrFunctions1.java b/exec/java-exec/src/main/codegen/templates/VarCharAggrFunctions1.java new file mode 100644 index 0000000..b5b5010 --- /dev/null +++ b/exec/java-exec/src/main/codegen/templates/VarCharAggrFunctions1.java @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +<@pp.dropOutputFile /> + + + +<#list aggrtypes1.aggrtypes as aggrtype> +<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gaggr/${aggrtype.className}VarBytesFunctions.java" /> + +<#include "/@includes/license.ftl" /> + +<#-- A utility class that is used to generate java code for aggr functions that maintain a single --> +<#-- running counter to hold the result. This includes: MIN, MAX, COUNT. --> + +/* + * This class is automatically generated from VarCharAggrFunctions1.java using FreeMarker. + */ + +package org.apache.drill.exec.expr.fn.impl.gaggr; + +import org.apache.drill.exec.expr.DrillAggFunc; +import org.apache.drill.exec.expr.annotations.FunctionTemplate; +import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope; +import org.apache.drill.exec.expr.annotations.Output; +import org.apache.drill.exec.expr.annotations.Param; +import org.apache.drill.exec.expr.annotations.Workspace; +import org.apache.drill.exec.expr.holders.*; +import org.apache.drill.exec.record.RecordBatch; +import io.netty.buffer.ByteBuf; + +@SuppressWarnings("unused") + +public class ${aggrtype.className}VarBytesFunctions { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${aggrtype.className}Functions.class); + +<#list aggrtype.types as type> +<#if type.major == "VarBytes"> + +@FunctionTemplate(name = "${aggrtype.funcName}", scope = FunctionTemplate.FunctionScope.POINT_AGGREGATE) +public static class ${type.inputType}${aggrtype.className} implements DrillAggFunc{ + + @Param ${type.inputType}Holder in; + @Workspace ${type.runningType}Holder value; + @Output ${type.outputType}Holder out; + + public void setup(RecordBatch b) { + value = new ${type.runningType}Holder(); + <#if aggrtype.funcName == "max" || aggrtype.funcName == "min"> + value.start = 0; + value.end = ${type.bufferEnd}; + io.netty.buffer.ByteBuf buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [65536]); + for (int i = 0; i < 65536; i++) { + buffer.setByte(i, ${type.initialValue}); + } + value.buffer = buffer; + + <#else> + value.value = 0; + </#if> + } + + @Override + public void add() { + <#if type.inputType?starts_with("Nullable")> + sout: { + if (in.isSet == 0) { + // processing nullable input and the value is null, so don't do anything... + break sout; + } + </#if> + <#if aggrtype.funcName == "max" || aggrtype.funcName == "min"> + int cmp = 0; + boolean swap = false; + + // Compare the bytes + for (int l = in.start, r = value.start; l < in.end && r < value.end; l++, r++) { + byte leftByte = in.buffer.getByte(l); + byte rightByte = value.buffer.getByte(r); + if (leftByte != rightByte) { + cmp = ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1; + break; + } + } + + if (cmp == 0) { + int l = (in.end - in.start) - (value.end - value.start); + if (l > 0) { + cmp = 1; + } else { + cmp = -1; + } + } + + <#if aggrtype.className == "Min"> + swap = (cmp == -1); + <#elseif aggrtype.className == "Max"> + swap = (cmp == 1); + </#if> + + if (swap) { + int length = in.end - in.start; + in.buffer.getBytes(in.start, value.buffer, 0, length); + value.end = length; + } + <#else> + value.value++; + </#if> + <#if type.inputType?starts_with("Nullable")> + } // end of sout block + </#if> + } + + @Override + public void output() { + <#if aggrtype.funcName == "max" || aggrtype.funcName == "min"> + out.start = value.start; + out.end = value.end; + out.buffer = value.buffer; + <#else> + out.value = value.value; + </#if> + } + + @Override + public void reset() { + value = new ${type.runningType}Holder(); + <#if aggrtype.funcName == "max" || aggrtype.funcName == "min"> + value.start = 0; + value.end = ${type.bufferEnd}; + io.netty.buffer.ByteBuf buffer = io.netty.buffer.Unpooled.wrappedBuffer(new byte [65536]); + for (int i = 0; i < 65536; i++) { + buffer.setByte(i, ${type.initialValue}); + } + value.buffer = buffer; + <#else> + value.value = 0; + </#if> + } +} +</#if> +</#list> +} +</#list> http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/68f4bdb4/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestAggregateFunctionsQuery.java ---------------------------------------------------------------------- diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestAggregateFunctionsQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestAggregateFunctionsQuery.java index 4a57ffb..5c5a218 100644 --- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestAggregateFunctionsQuery.java +++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestAggregateFunctionsQuery.java @@ -36,10 +36,11 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; +import org.joda.time.DateTime; + import com.google.common.base.Function; import com.google.common.base.Stopwatch; -@Ignore // until stream agg changing schema is fixed. public class TestAggregateFunctionsQuery { public static final String WORKING_PATH; @@ -53,12 +54,15 @@ public class TestAggregateFunctionsQuery { String query = new String("SELECT max(cast(HIRE_DATE as date)) as MAX_DATE, min(cast(HIRE_DATE as date)) as MIN_DATE" + " FROM `employee.json`"); + + String t = new DateTime(1998, 1, 1, 0, 0, 0, 0).toString(); + String t1 = new DateTime(1993, 5, 1, 0, 0, 0, 0).toString(); + + String result = String.format("MAX_DATE="+ t + "; " + "MIN_DATE=" + t1 + "\n"); + JdbcAssert.withFull("cp") .sql(query) - .returns( - "MAX_DATE=1998-01-01; " + - "MIN_DATE=1993-05-01\n" - ); + .returns(result); } @Test @@ -69,8 +73,8 @@ public class TestAggregateFunctionsQuery { JdbcAssert.withFull("cp") .sql(query) .returns( - "MAX_DAYS=7671 days 0:0:0.0; " + - "MIN_DAYS=5965 days 0:0:0.0\n" + "MAX_DAYS=P7671D; " + + "MIN_DAYS=P5965D\n" ); } @@ -96,4 +100,18 @@ public class TestAggregateFunctionsQuery { "MIN_DEC38=1.00000000000\n " ); } + + + @Test + public void testVarCharAggFunction() throws Exception{ + String query = new String("select max(full_name) as MAX_NAME, min(full_name) as MIN_NAME" + + " FROM `employee.json`"); + + JdbcAssert.withFull("cp") + .sql(query) + .returns( + "MAX_NAME=Zach Lovell; " + + "MIN_NAME=A. Joyce Jarvis\n" + ); + } }
