[ https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=827569&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-827569 ]
ASF GitHub Bot logged work on HIVE-26754: ----------------------------------------- Author: ASF GitHub Bot Created on: 21/Nov/22 13:01 Start Date: 21/Nov/22 13:01 Worklog Time Spent: 10m Work Description: SourabhBadhya commented on code in PR #3777: URL: https://github.com/apache/hive/pull/3777#discussion_r1028009527 ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java: ########## @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde.serdeConstants; + +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +import java.util.ArrayList; +import java.util.List; + +/** + * Abstract GenericUDF for array functions + */ + +public abstract class AbstractGenericUDFArrayBase extends GenericUDF { + + static final int ARRAY_IDX = 0; + static final int ARRAY2_IDX = 1; + static final int START_IDX = 1; + static final int LENGTH_IDX = 2; + static final int SEPARATOR_IDX = 1; + static final int REPLACE_NULL_IDX = 2; + + int MIN_ARG_COUNT; + int MAX_ARG_COUNT; + + transient ListObjectInspector arrayOI; + transient ObjectInspector[] argumentOIs; + + transient Converter converter; + + enum FUNC_NAMES { + ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, ARRAY_EXCEPT, ARRAY_INTERSECT + } + + FUNC_NAMES FUNC_NAME; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + // Check if wrong number of arguments were passed + checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT); + + // Check if the argument is of category LIST or not + checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + + if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT + || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT + || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) { + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); + } + + if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) { + PrimitiveObjectInspector startIndexObjectInspector = (PrimitiveObjectInspector) arguments[START_IDX]; + PrimitiveObjectInspector lengthObjectInspector = (PrimitiveObjectInspector) arguments[LENGTH_IDX]; + checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME, 2); + checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3); + } + + arrayOI = (ListObjectInspector) arguments[ARRAY_IDX]; + argumentOIs = arguments; + + //return initialize(arguments); + return initListOI(arguments); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == MIN_ARG_COUNT); + return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX] + ")"; + } + + List<Object> convertArray(List objects) { + List<Object> ret = new ArrayList<>(); + for (Object o : objects) { + ret.add(converter.convert(o)); + } + return ret; + } + + void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category, + FUNC_NAMES function_name, String typeName) throws UDFArgumentTypeException { + + if (!arguments[idx].getCategory().equals(category)) { + throw new UDFArgumentTypeException(idx, + "\"" + typeName + "\" " + + "expected at function " + function_name + ", but " + + "\"" + arguments[idx].getTypeName() + "\" " + + "is found"); + } + } + + void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector, + FUNC_NAMES function_name, int idx) throws UDFArgumentTypeException { + + switch (objectInspector.getPrimitiveCategory()) { + case SHORT: + case INT: + case LONG: + break; + default: + throw new UDFArgumentTypeException(0, "Argument " + idx + + " of function " + function_name + " must be \"" + + serdeConstants.SMALLINT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.INT_TYPE_NAME + "\"" + + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\", but \"" + + objectInspector.getTypeName() + "\" was found."); + } + } + + boolean isListEmpty(Object array, ListObjectInspector listObjectInspector) { + + int arrayLength = listObjectInspector.getListLength(array); + + // Check if array is null or empty or value is null + return array == null || arrayLength <= 0; Review Comment: This can be simplified to - `return listObjectInspector.getListLength(array) <= 0;` Because the function `getListLength` is already checking for null values within and returning -1. See here - https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java#L42-L45 Issue Time Tracking ------------------- Worklog Id: (was: 827569) Time Spent: 50m (was: 40m) > Implement array_distinct UDF to return an array after removing duplicates in > it > ------------------------------------------------------------------------------- > > Key: HIVE-26754 > URL: https://issues.apache.org/jira/browse/HIVE-26754 > Project: Hive > Issue Type: Sub-task > Components: Hive > Reporter: Taraka Rama Rao Lethavadla > Assignee: Taraka Rama Rao Lethavadla > Priority: Major > Labels: pull-request-available > Time Spent: 50m > Remaining Estimate: 0h > > *array_distinct(array(obj1, obj2,...))* - The function returns an array of > the same type as the input argument where all duplicate values have been > removed. > Example: > > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1; > ['a', 'b', 'c'] -- This message was sent by Atlassian Jira (v8.20.10#820010)