Author: cws Date: Mon Dec 19 20:25:48 2011 New Revision: 1220933 URL: http://svn.apache.org/viewvc?rev=1220933&view=rev Log: HIVE-2005. Implement BETWEEN operator (Navis via cws)
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java hive/trunk/ql/src/test/queries/clientpositive/udf_between.q hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1220933&r1=1220932&r2=1220933&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Mon Dec 19 20:25:48 2011 @@ -152,6 +152,7 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; @@ -362,6 +363,7 @@ public final class FunctionRegistry { registerGenericUDF(">=", GenericUDFOPEqualOrGreaterThan.class); registerGenericUDF("not", GenericUDFOPNot.class); registerGenericUDF("!", GenericUDFOPNot.class); + registerGenericUDF("between", GenericUDFBetween.class); registerGenericUDF("ewah_bitmap_and", GenericUDFEWAHBitmapAnd.class); registerGenericUDF("ewah_bitmap_or", GenericUDFEWAHBitmapOr.class); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=1220933&r1=1220932&r2=1220933&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Mon Dec 19 20:25:48 2011 @@ -2003,6 +2003,10 @@ precedenceEqualExpression -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)) | (KW_IN expressions) -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions) + | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max) + | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max) )* ; @@ -2106,6 +2110,7 @@ sysFuncNames | KW_RLIKE | KW_REGEXP | KW_IN + | KW_BETWEEN ; descFuncNames Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java?rev=1220933&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java Mon Dec 19 20:25:48 2011 @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; + +@Description(name = "between", value = "_FUNC_ a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c") +public class GenericUDFBetween extends GenericUDF { + + GenericUDFOPEqualOrGreaterThan egt = new GenericUDFOPEqualOrGreaterThan(); + GenericUDFOPEqualOrLessThan elt = new GenericUDFOPEqualOrLessThan(); + + private ObjectInspector[] argumentOIs; + private final BooleanWritable result = new BooleanWritable(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (!arguments[0].getTypeName().equals("boolean")) { + throw new UDFArgumentTypeException(0, "First argument for BETWEEN should be boolean type"); + } + egt.initialize(new ObjectInspector[] {arguments[1], arguments[2]}); + elt.initialize(new ObjectInspector[] {arguments[1], arguments[3]}); + + argumentOIs = arguments; + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + boolean invert = (Boolean) ((PrimitiveObjectInspector) argumentOIs[0]) + .getPrimitiveJavaObject(arguments[0].get()); + + BooleanWritable left = ((BooleanWritable)egt.evaluate(new DeferredObject[] {arguments[1], arguments[2]})); + if (left == null) { + return null; + } + if (!invert && !left.get()) { + result.set(false); + return result; + } + BooleanWritable right = ((BooleanWritable)elt.evaluate(new DeferredObject[] {arguments[1], arguments[3]})); + if (right == null) { + return null; + } + boolean between = left.get() && right.get(); + result.set(invert ? !between : between); + return result; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append(children[1]); + if (Boolean.valueOf(children[0])) { + sb.append(" NOT"); + } + sb.append(" BETWEEN "); + sb.append(children[2]).append(" AND ").append(children[3]); + return sb.toString(); + } +} Added: hive/trunk/ql/src/test/queries/clientpositive/udf_between.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_between.q?rev=1220933&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/udf_between.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/udf_between.q Mon Dec 19 20:25:48 2011 @@ -0,0 +1,14 @@ +describe function between; +describe function extended between; + +explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20; +SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20; + +explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20; +SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20; + +explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1; +SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1; + +explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1; +SELECT * FROM src where 2 between 2 AND '3' LIMIT 1; Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1220933&r1=1220932&r2=1220933&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Mon Dec 19 20:25:48 2011 @@ -28,6 +28,7 @@ asin assert_true atan avg +between bin case ceil Added: hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out?rev=1220933&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out Mon Dec 19 20:25:48 2011 @@ -0,0 +1,252 @@ +PREHOOK: query: describe function between +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: describe function between +POSTHOOK: type: DESCFUNCTION +between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c +PREHOOK: query: describe function extended between +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: describe function extended between +POSTHOOK: type: DESCFUNCTION +between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c +PREHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key + 100) BETWEEN (150 + (- 50)) AND (150 + 50) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 20 + + +PREHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 val_86 +27 val_27 +98 val_98 +66 val_66 +37 val_37 +15 val_15 +82 val_82 +17 val_17 +0 val_0 +57 val_57 +20 val_20 +92 val_92 +47 val_47 +72 val_72 +4 val_4 +35 val_35 +54 val_54 +51 val_51 +65 val_65 +83 val_83 +PREHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_TRUE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key + 100) NOT BETWEEN (150 + (- 50)) AND (150 + 50) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 20 + + +PREHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +311 val_311 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +484 val_484 +265 val_265 +193 val_193 +401 val_401 +150 val_150 +273 val_273 +224 val_224 +369 val_369 +128 val_128 +213 val_213 +146 val_146 +406 val_406 +429 val_429 +374 val_374 +PREHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 'b' 'a' 'c')) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: 'b' BETWEEN 'a' AND 'c' + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +PREHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 2 2 '3')) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: 2 BETWEEN 2 AND '3' + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238