[ 
https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=827569&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-827569
 ]

ASF GitHub Bot logged work on HIVE-26754:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 21/Nov/22 13:01
            Start Date: 21/Nov/22 13:01
    Worklog Time Spent: 10m 
      Work Description: SourabhBadhya commented on code in PR #3777:
URL: https://github.com/apache/hive/pull/3777#discussion_r1028009527


##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+    static final int ARRAY_IDX = 0;
+    static final int ARRAY2_IDX = 1;
+    static final int START_IDX = 1;
+    static final int LENGTH_IDX = 2;
+    static final int SEPARATOR_IDX = 1;
+    static final int REPLACE_NULL_IDX = 2;
+
+    int MIN_ARG_COUNT;
+    int MAX_ARG_COUNT;
+
+    transient ListObjectInspector arrayOI;
+    transient ObjectInspector[] argumentOIs;
+
+    transient Converter converter;
+
+    enum FUNC_NAMES {
+        ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN, 
ARRAY_EXCEPT, ARRAY_INTERSECT
+    }
+
+    FUNC_NAMES FUNC_NAME;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments)
+            throws UDFArgumentException {
+
+        // Check if wrong number of arguments were passed
+        checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+        // Check if the argument is of category LIST or not
+        checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST, 
FUNC_NAME,
+                org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+        if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+                || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+                || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+            checkArgCategory(arguments, ARRAY2_IDX, 
ObjectInspector.Category.LIST, FUNC_NAME,
+                    
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+        }
+
+        if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
+            PrimitiveObjectInspector startIndexObjectInspector = 
(PrimitiveObjectInspector) arguments[START_IDX];
+            PrimitiveObjectInspector lengthObjectInspector = 
(PrimitiveObjectInspector) arguments[LENGTH_IDX];
+            checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME, 
2);
+            checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3);
+        }
+
+        arrayOI = (ListObjectInspector) arguments[ARRAY_IDX];
+        argumentOIs = arguments;
+
+        //return initialize(arguments);
+        return initListOI(arguments);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        assert (children.length == MIN_ARG_COUNT);
+        return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX] 
+ ")";
+    }
+
+    List<Object> convertArray(List objects) {
+        List<Object> ret = new ArrayList<>();
+        for (Object o : objects) {
+            ret.add(converter.convert(o));
+        }
+        return ret;
+    }
+
+    void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category,
+                          FUNC_NAMES function_name, String typeName) throws 
UDFArgumentTypeException {
+
+        if (!arguments[idx].getCategory().equals(category)) {
+            throw new UDFArgumentTypeException(idx,
+                    "\"" + typeName + "\" "
+                            + "expected at function " + function_name + ", but 
"
+                            + "\"" + arguments[idx].getTypeName() + "\" "
+                            + "is found");
+        }
+    }
+
+    void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector,
+                                      FUNC_NAMES function_name, int idx) 
throws UDFArgumentTypeException {
+
+        switch (objectInspector.getPrimitiveCategory()) {
+            case SHORT:
+            case INT:
+            case LONG:
+                break;
+            default:
+                throw new UDFArgumentTypeException(0, "Argument " + idx
+                        + " of function " + function_name + " must be \""
+                        + serdeConstants.SMALLINT_TYPE_NAME + "\""
+                        + " or \"" + serdeConstants.INT_TYPE_NAME + "\""
+                        + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\", 
but \""
+                        + objectInspector.getTypeName() + "\" was found.");
+        }
+    }
+
+    boolean isListEmpty(Object array, ListObjectInspector listObjectInspector) 
{
+
+        int arrayLength = listObjectInspector.getListLength(array);
+
+        // Check if array is null or empty or value is null
+        return array == null || arrayLength <= 0;

Review Comment:
   This can be simplified to -
   `return listObjectInspector.getListLength(array) <= 0;`
   
   Because the function `getListLength` is already checking for null values 
within and returning -1. See here - 
   
https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java#L42-L45





Issue Time Tracking
-------------------

    Worklog Id:     (was: 827569)
    Time Spent: 50m  (was: 40m)

> Implement array_distinct UDF to return an array after removing duplicates in 
> it
> -------------------------------------------------------------------------------
>
>                 Key: HIVE-26754
>                 URL: https://issues.apache.org/jira/browse/HIVE-26754
>             Project: Hive
>          Issue Type: Sub-task
>          Components: Hive
>            Reporter: Taraka Rama Rao Lethavadla
>            Assignee: Taraka Rama Rao Lethavadla
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 50m
>  Remaining Estimate: 0h
>
> *array_distinct(array(obj1, obj2,...))* - The function returns an array of 
> the same type as the input argument where all duplicate values have been 
> removed.
> Example:
> > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1;
> ['a', 'b', 'c']



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to