[ https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=826917&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-826917 ]
ASF GitHub Bot logged work on HIVE-26754: ----------------------------------------- Author: ASF GitHub Bot Created on: 17/Nov/22 17:25 Start Date: 17/Nov/22 17:25 Worklog Time Spent: 10m Work Description: scarlin-cloudera commented on code in PR #3777: URL: https://github.com/apache/hive/pull/3777#discussion_r1025468514 ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; Review Comment: Variables should be lower case and camel case. Also, in cases like this? my first preference is to declare these as private and have derived classes retrieve with getter classes. I don't oppose allowing child classes to have access, but if I do that, usually I go with "protected"...which doesn't have as much meaning in Java, but it still shows me as a developer that children are gonna use it. ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT + || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT + || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) { + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + } + + if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) { Review Comment: Perhaps for this one, we can override the initialize, and check these categories in the derived class? ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT + || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT + || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) { + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + } + + if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) { + PrimitiveObjectInspector startIndexObjectInspector = (PrimitiveObjectInspector) arguments[START_IDX]; + PrimitiveObjectInspector lengthObjectInspector = (PrimitiveObjectInspector) arguments[LENGTH_IDX]; + checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME, 2); + checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + argumentOIs = arguments; + + //return initialize(arguments); + return initListOI(arguments); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == MIN_ARG_COUNT); + return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX] + ")"; + } + + List<Object> convertArray(List objects) { + List<Object> ret = new ArrayList<>(); + for (Object o : objects) { + ret.add(converter.convert(o)); + } + return ret; + } + + void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category, + FUNC_NAMES function_name, String typeName) throws UDFArgumentTypeException { + + if (!arguments[idx].getCategory().equals(category)) { + throw new UDFArgumentTypeException(idx, + "\"" + typeName + "\" " + + "expected at function " + function_name + ", but " + + "\"" + arguments[idx].getTypeName() + "\" " + + "is found"); + } + } + + void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector, + FUNC_NAMES function_name, int idx) throws UDFArgumentTypeException { Review Comment: functionName should be camel case ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT + || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT + || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) { + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + } + + if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) { + PrimitiveObjectInspector startIndexObjectInspector = (PrimitiveObjectInspector) arguments[START_IDX]; + PrimitiveObjectInspector lengthObjectInspector = (PrimitiveObjectInspector) arguments[LENGTH_IDX]; + checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME, 2); + checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + argumentOIs = arguments; + + //return initialize(arguments); + return initListOI(arguments); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == MIN_ARG_COUNT); + return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX] + ")"; + } + + List<Object> convertArray(List objects) { + List<Object> ret = new ArrayList<>(); + for (Object o : objects) { + ret.add(converter.convert(o)); + } + return ret; + } + + void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category, + FUNC_NAMES function_name, String typeName) throws UDFArgumentTypeException { + + if (!arguments[idx].getCategory().equals(category)) { + throw new UDFArgumentTypeException(idx, + "\"" + typeName + "\" " + + "expected at function " + function_name + ", but " + + "\"" + arguments[idx].getTypeName() + "\" " + + "is found"); + } + } + + void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector, + FUNC_NAMES function_name, int idx) throws UDFArgumentTypeException { + + switch (objectInspector.getPrimitiveCategory()) { + case SHORT: + case INT: + case LONG: + break; + default: + throw new UDFArgumentTypeException(0, "Argument " + idx + + " of function " + function_name + " must be \"" + + serdeConstants.SMALLINT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.INT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\", but \"" + + objectInspector.getTypeName() + "\" was found."); + } + } + + boolean isListEmpty(Object array, ListObjectInspector listObjectInspector) { + + int arrayLength = listObjectInspector.getListLength(array); + + // Check if array is null or empty or value is null + return array == null || arrayLength <= 0; Review Comment: optional nit: I would prefer this to be one line, as in return array == null || listObjectInspector.getListLength(array) <= 0; ...and then get rid of all the blank lines above and below ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayDistinct.java: ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * Generic UDF for distinct array + * <code>ARRAY_DISTINCT(array(obj1, obj2, obj3...))</code>. + * + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF + */ +@Description(name = "array_distinct", + value = "_FUNC_(array(obj1, obj2,...)) - " + + "The function returns an array of the same type as the input argument where all duplicate" + + " values have been removed.", + extended = "Example:\n" + + " > SELECT _FUNC_(array('b', 'd', 'd', 'a')) FROM src LIMIT 1;\n" + + " 'b', 'd', 'a'") +public class GenericUDFArrayDistinct extends AbstractGenericUDFArrayBase { + + public GenericUDFArrayDistinct() { + FUNC_NAME = FUNC_NAMES.ARRAY_DISTINCT; Review Comment: Even if we do make these variables accessble from the child class, we should only set the variables in the parent class. Please pass these up through the constructor. ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; Review Comment: Same as line 47 ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { Review Comment: I'm not a big fan of having the parent class being aware of the child classes. Gonna comment below on how these might be replaced... ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT Review Comment: Can we have an abstract supporter function like if (supportsTwoArgs() (I'm horrible with names) or something like that? ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT + || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT + || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) { + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + } + + if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) { + PrimitiveObjectInspector startIndexObjectInspector = (PrimitiveObjectInspector) arguments[START_IDX]; + PrimitiveObjectInspector lengthObjectInspector = (PrimitiveObjectInspector) arguments[LENGTH_IDX]; + checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME, 2); + checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + argumentOIs = arguments; + + //return initialize(arguments); + return initListOI(arguments); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == MIN_ARG_COUNT); + return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX] + ")"; + } + + List<Object> convertArray(List objects) { + List<Object> ret = new ArrayList<>(); + for (Object o : objects) { + ret.add(converter.convert(o)); + } + return ret; + } + + void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category, + FUNC_NAMES function_name, String typeName) throws UDFArgumentTypeException { + + if (!arguments[idx].getCategory().equals(category)) { + throw new UDFArgumentTypeException(idx, + "\"" + typeName + "\" " + + "expected at function " + function_name + ", but " + + "\"" + arguments[idx].getTypeName() + "\" " + + "is found"); + } + } + + void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector, + FUNC_NAMES function_name, int idx) throws UDFArgumentTypeException { + + switch (objectInspector.getPrimitiveCategory()) { + case SHORT: + case INT: + case LONG: + break; + default: + throw new UDFArgumentTypeException(0, "Argument " + idx + + " of function " + function_name + " must be \"" + + serdeConstants.SMALLINT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.INT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\", but \"" + + objectInspector.getTypeName() + "\" was found."); + } + } + + boolean isListEmpty(Object array, ListObjectInspector listObjectInspector) { + + int arrayLength = listObjectInspector.getListLength(array); + + // Check if array is null or empty or value is null + return array == null || arrayLength <= 0; + } + + ObjectInspector initListOI(ObjectInspector[] arguments) { + + GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver = + new GenericUDFUtils.ReturnObjectInspectorResolver(true); + + ObjectInspector elementObjectInspector = + ((ListObjectInspector) (arguments[0])).getListElementObjectInspector(); + + ObjectInspector returnOI = returnOIResolver.get(elementObjectInspector); + converter = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI); + if(FUNC_NAME == FUNC_NAMES.ARRAY_MAX || FUNC_NAME == FUNC_NAMES.ARRAY_MIN){ Review Comment: Not sure how to handle this one yet, but again, if we can move these down to the derived class to avoid function names in the base class, I think that would be good. Will think of a way later. Issue Time Tracking ------------------- Worklog Id: (was: 826917) Time Spent: 20m (was: 10m) > Implement array_distinct UDF to return an array after removing duplicates in > it > ------------------------------------------------------------------------------- > > Key: HIVE-26754 > URL: https://issues.apache.org/jira/browse/HIVE-26754 > Project: Hive > Issue Type: Sub-task > Components: Hive > Reporter: Taraka Rama Rao Lethavadla > Assignee: Taraka Rama Rao Lethavadla > Priority: Major > Labels: pull-request-available > Time Spent: 20m > Remaining Estimate: 0h > > *array_distinct(array(obj1, obj2,...))* - The function returns an array of > the same type as the input argument where all duplicate values have been > removed. > Example: > > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1; > ['a', 'b', 'c'] -- This message was sent by Atlassian Jira (v8.20.10#820010)