[ https://issues.apache.org/jira/browse/HIVE-25230?focusedWorklogId=766991&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-766991 ]
ASF GitHub Bot logged work on HIVE-25230: ----------------------------------------- Author: ASF GitHub Bot Created on: 06/May/22 02:34 Start Date: 06/May/22 02:34 Worklog Time Spent: 10m Work Description: dengzhhu653 commented on code in PR #2378: URL: https://github.com/apache/hive/pull/2378#discussion_r866443789 ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java: ########## @@ -20,79 +20,158 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; /** - * Generic UDF for string function <code>INSTR(str,substr)</code>. This mimcs - * the function from MySQL + * Generic UDF for string function <code>INSTR(str,substr[,pos[,occurrence]])</code>. + * This extends the function from MySQL * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_instr + * and mimics the function from Oracle + * https://docs.oracle.com/database/121/SQLRF/functions089.htm#SQLRF00651 * * <pre> * usage: - * INSTR(str, substr) + * INSTR(str, substr[, pos[, occurrence]]) * </pre> * <p> */ @Description(name = "instr", - value = "_FUNC_(str, substr) - Returns the index of the first occurance of substr in str", - extended = "Example:\n" - + " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + " 5") + value = "_FUNC_(str, substr[, pos[, occurrence]]) " + + "- Returns the index of the given occurrence of substr in str after position pos", + extended = "pos is a 1-based index. If pos < 0, the starting position is\n" + + "determined by counting backwards from the end of str and then Hive\n" + + "searches backward from the resulting position.\n" + + "occurrence is also a 1-based index. The value must be positive.\n" + + "If occurrence is greater than the number of matching occurrences,\n" + + "the function returns 0.\n" + + "If either of the optional arguments, pos or occurrence, is NULL,\n" + + "the function also returns NULL.\n" + + "Example:\n" + + " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + + " 5\n" + + " > SELECT _FUNC_('CORPORATE FLOOR','OR', 3, 2) FROM src LIMIT 1;\n" + + " 14\n" + + " > SELECT _FUNC_('CORPORATE FLOOR','OR', -3, 2) FROM src LIMIT 1;\n" + + " 2") public class GenericUDFInstr extends GenericUDF { private transient ObjectInspectorConverters.Converter[] converters; + private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes; + private transient Integer posConst; + private transient boolean isPosConst = false; + private transient Integer occurrenceConst; + private transient boolean isOccurrenceConst = false; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { - throw new UDFArgumentLengthException( - "The function INSTR accepts exactly 2 arguments."); - } + checkArgsSize(arguments, 2, 4); + converters = new ObjectInspectorConverters.Converter[arguments.length]; + inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[arguments.length]; for (int i = 0; i < arguments.length; i++) { - Category category = arguments[i].getCategory(); - if (category != Category.PRIMITIVE) { - throw new UDFArgumentTypeException(i, "The " - + GenericUDFUtils.getOrdinal(i + 1) - + " argument of function INSTR is expected to a " - + Category.PRIMITIVE.toString().toLowerCase() + " type, but " - + category.toString().toLowerCase() + " is found"); + checkArgPrimitive(arguments, i); + if (i < 2) { + obtainStringConverter(arguments, i, inputTypes, converters); + } else { + obtainIntConverter(arguments, i, inputTypes, converters); } } - converters = new ObjectInspectorConverters.Converter[arguments.length]; - for (int i = 0; i < arguments.length; i++) { - converters[i] = ObjectInspectorConverters.getConverter(arguments[i], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); + if (arguments.length > 2 && arguments[2] instanceof ConstantObjectInspector) { Review Comment: we may not need to get the pos or occurrence constants here, they will be wrapped in DerferredObject and delivered to the evaluate method. Issue Time Tracking ------------------- Worklog Id: (was: 766991) Time Spent: 1h 10m (was: 1h) > add position and occurrence to instr() > -------------------------------------- > > Key: HIVE-25230 > URL: https://issues.apache.org/jira/browse/HIVE-25230 > Project: Hive > Issue Type: New Feature > Components: UDF > Reporter: Quanlong Huang > Assignee: Quanlong Huang > Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > Current instr() only supports two arguments: > {code:java} > instr(str, substr) - Returns the index of the first occurance of substr in str > {code} > Other systems (Vertica, Oracle, Impala etc) support additional position and > occurrence arguments: > {code:java} > instr(str, substr[, pos[, occurrence]]) > {code} > Oracle doc: > [https://docs.oracle.com/database/121/SQLRF/functions089.htm#SQLRF00651] > It'd be nice to support this as well. Otherwise, it's a SQL difference > between Impala and Hive. > Impala supports this in IMPALA-3973 -- This message was sent by Atlassian Jira (v8.20.7#820007)